diff options
Diffstat (limited to 'target')
79 files changed, 11809 insertions, 5378 deletions
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c index b08078e7fc..a953897fcc 100644 --- a/target/alpha/cpu.c +++ b/target/alpha/cpu.c @@ -201,7 +201,6 @@ static void alpha_cpu_initfn(Object *obj) CPUAlphaState *env = &cpu->env; cs->env_ptr = env; - tlb_flush(cs); env->lock_addr = -1; #if defined(CONFIG_USER_ONLY) diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c index ce55eeb682..2b856930fb 100644 --- a/target/arm/arm-powerctl.c +++ b/target/arm/arm-powerctl.c @@ -103,6 +103,16 @@ static void arm_set_cpu_on_async_work(CPUState *target_cpu_state, } else { /* Processor is not in secure mode */ target_cpu->env.cp15.scr_el3 |= SCR_NS; + + /* + * If QEMU is providing the equivalent of EL3 firmware, then we need + * to make sure a CPU targeting EL2 comes out of reset with a + * functional HVC insn. + */ + if (arm_feature(&target_cpu->env, ARM_FEATURE_EL3) + && info->target_el == 2) { + target_cpu->env.cp15.scr_el3 |= SCR_HCE; + } } /* We check if the started CPU is now at the correct level */ diff --git a/target/arm/cpu.c b/target/arm/cpu.c index b5e61cc177..8f16e96b6c 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -144,9 +144,9 @@ static void arm_cpu_reset(CPUState *s) g_hash_table_foreach(cpu->cp_regs, cp_reg_check_reset, cpu); env->vfp.xregs[ARM_VFP_FPSID] = cpu->reset_fpsid; - env->vfp.xregs[ARM_VFP_MVFR0] = cpu->mvfr0; - env->vfp.xregs[ARM_VFP_MVFR1] = cpu->mvfr1; - env->vfp.xregs[ARM_VFP_MVFR2] = cpu->mvfr2; + env->vfp.xregs[ARM_VFP_MVFR0] = cpu->isar.mvfr0; + env->vfp.xregs[ARM_VFP_MVFR1] = cpu->isar.mvfr1; + env->vfp.xregs[ARM_VFP_MVFR2] = cpu->isar.mvfr2; cpu->power_state = cpu->start_powered_off ? PSCI_OFF : PSCI_ON; s->halted = cpu->start_powered_off; @@ -814,7 +814,11 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) /* Some features automatically imply others: */ if (arm_feature(env, ARM_FEATURE_V8)) { - set_feature(env, ARM_FEATURE_V7VE); + if (arm_feature(env, ARM_FEATURE_M)) { + set_feature(env, ARM_FEATURE_V7); + } else { + set_feature(env, ARM_FEATURE_V7VE); + } } if (arm_feature(env, ARM_FEATURE_V7VE)) { /* v7 Virtualization Extensions. In real hardware this implies @@ -825,7 +829,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * Presence of EL2 itself is ARM_FEATURE_EL2, and of the * Security Extensions is ARM_FEATURE_EL3. */ - set_feature(env, ARM_FEATURE_ARM_DIV); + assert(cpu_isar_feature(arm_div, cpu)); set_feature(env, ARM_FEATURE_LPAE); set_feature(env, ARM_FEATURE_V7); } @@ -850,20 +854,14 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) } if (arm_feature(env, ARM_FEATURE_V6)) { set_feature(env, ARM_FEATURE_V5); - set_feature(env, ARM_FEATURE_JAZELLE); if (!arm_feature(env, ARM_FEATURE_M)) { + assert(cpu_isar_feature(jazelle, cpu)); set_feature(env, ARM_FEATURE_AUXCR); } } if (arm_feature(env, ARM_FEATURE_V5)) { set_feature(env, ARM_FEATURE_V4T); } - if (arm_feature(env, ARM_FEATURE_M)) { - set_feature(env, ARM_FEATURE_THUMB_DIV); - } - if (arm_feature(env, ARM_FEATURE_ARM_DIV)) { - set_feature(env, ARM_FEATURE_THUMB_DIV); - } if (arm_feature(env, ARM_FEATURE_VFP4)) { set_feature(env, ARM_FEATURE_VFP3); set_feature(env, ARM_FEATURE_VFP_FP16); @@ -938,7 +936,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * registers as well. These are id_pfr1[7:4] and id_aa64pfr0[15:12]. */ cpu->id_pfr1 &= ~0xf0; - cpu->id_aa64pfr0 &= ~0xf000; + cpu->isar.id_aa64pfr0 &= ~0xf000; } if (!cpu->has_el2) { @@ -955,7 +953,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * registers if we don't have EL2. These are id_pfr1[15:12] and * id_aa64pfr0_el1[11:8]. */ - cpu->id_aa64pfr0 &= ~0xf00; + cpu->isar.id_aa64pfr0 &= ~0xf00; cpu->id_pfr1 &= ~0xf000; } @@ -1084,11 +1082,16 @@ static void arm926_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_VFP); set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); set_feature(&cpu->env, ARM_FEATURE_CACHE_TEST_CLEAN); - set_feature(&cpu->env, ARM_FEATURE_JAZELLE); cpu->midr = 0x41069265; cpu->reset_fpsid = 0x41011090; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00090078; + + /* + * ARMv5 does not have the ID_ISAR registers, but we can still + * set the field to indicate Jazelle support within QEMU. + */ + cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); } static void arm946_initfn(Object *obj) @@ -1114,12 +1117,18 @@ static void arm1026_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_AUXCR); set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); set_feature(&cpu->env, ARM_FEATURE_CACHE_TEST_CLEAN); - set_feature(&cpu->env, ARM_FEATURE_JAZELLE); cpu->midr = 0x4106a262; cpu->reset_fpsid = 0x410110a0; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00090078; cpu->reset_auxcr = 1; + + /* + * ARMv5 does not have the ID_ISAR registers, but we can still + * set the field to indicate Jazelle support within QEMU. + */ + cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); + { /* The 1026 had an IFAR at c6,c0,0,1 rather than the ARMv6 c6,c0,0,2 */ ARMCPRegInfo ifar = { @@ -1151,8 +1160,8 @@ static void arm1136_r2_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); cpu->midr = 0x4107b362; cpu->reset_fpsid = 0x410120b4; - cpu->mvfr0 = 0x11111111; - cpu->mvfr1 = 0x00000000; + cpu->isar.mvfr0 = 0x11111111; + cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; cpu->id_pfr0 = 0x111; @@ -1162,11 +1171,11 @@ static void arm1136_r2_initfn(Object *obj) cpu->id_mmfr0 = 0x01130003; cpu->id_mmfr1 = 0x10030302; cpu->id_mmfr2 = 0x01222110; - cpu->id_isar0 = 0x00140011; - cpu->id_isar1 = 0x12002111; - cpu->id_isar2 = 0x11231111; - cpu->id_isar3 = 0x01102131; - cpu->id_isar4 = 0x141; + cpu->isar.id_isar0 = 0x00140011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11231111; + cpu->isar.id_isar3 = 0x01102131; + cpu->isar.id_isar4 = 0x141; cpu->reset_auxcr = 7; } @@ -1183,8 +1192,8 @@ static void arm1136_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); cpu->midr = 0x4117b363; cpu->reset_fpsid = 0x410120b4; - cpu->mvfr0 = 0x11111111; - cpu->mvfr1 = 0x00000000; + cpu->isar.mvfr0 = 0x11111111; + cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; cpu->id_pfr0 = 0x111; @@ -1194,11 +1203,11 @@ static void arm1136_initfn(Object *obj) cpu->id_mmfr0 = 0x01130003; cpu->id_mmfr1 = 0x10030302; cpu->id_mmfr2 = 0x01222110; - cpu->id_isar0 = 0x00140011; - cpu->id_isar1 = 0x12002111; - cpu->id_isar2 = 0x11231111; - cpu->id_isar3 = 0x01102131; - cpu->id_isar4 = 0x141; + cpu->isar.id_isar0 = 0x00140011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11231111; + cpu->isar.id_isar3 = 0x01102131; + cpu->isar.id_isar4 = 0x141; cpu->reset_auxcr = 7; } @@ -1216,8 +1225,8 @@ static void arm1176_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_EL3); cpu->midr = 0x410fb767; cpu->reset_fpsid = 0x410120b5; - cpu->mvfr0 = 0x11111111; - cpu->mvfr1 = 0x00000000; + cpu->isar.mvfr0 = 0x11111111; + cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; cpu->id_pfr0 = 0x111; @@ -1227,11 +1236,11 @@ static void arm1176_initfn(Object *obj) cpu->id_mmfr0 = 0x01130003; cpu->id_mmfr1 = 0x10030302; cpu->id_mmfr2 = 0x01222100; - cpu->id_isar0 = 0x0140011; - cpu->id_isar1 = 0x12002111; - cpu->id_isar2 = 0x11231121; - cpu->id_isar3 = 0x01102131; - cpu->id_isar4 = 0x01141; + cpu->isar.id_isar0 = 0x0140011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11231121; + cpu->isar.id_isar3 = 0x01102131; + cpu->isar.id_isar4 = 0x01141; cpu->reset_auxcr = 7; } @@ -1247,8 +1256,8 @@ static void arm11mpcore_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); cpu->midr = 0x410fb022; cpu->reset_fpsid = 0x410120b4; - cpu->mvfr0 = 0x11111111; - cpu->mvfr1 = 0x00000000; + cpu->isar.mvfr0 = 0x11111111; + cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1d192992; /* 32K icache 32K dcache */ cpu->id_pfr0 = 0x111; cpu->id_pfr1 = 0x1; @@ -1257,11 +1266,11 @@ static void arm11mpcore_initfn(Object *obj) cpu->id_mmfr0 = 0x01100103; cpu->id_mmfr1 = 0x10020302; cpu->id_mmfr2 = 0x01222000; - cpu->id_isar0 = 0x00100011; - cpu->id_isar1 = 0x12002111; - cpu->id_isar2 = 0x11221011; - cpu->id_isar3 = 0x01102131; - cpu->id_isar4 = 0x141; + cpu->isar.id_isar0 = 0x00100011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11221011; + cpu->isar.id_isar3 = 0x01102131; + cpu->isar.id_isar4 = 0x141; cpu->reset_auxcr = 1; } @@ -1290,13 +1299,13 @@ static void cortex_m3_initfn(Object *obj) cpu->id_mmfr1 = 0x00000000; cpu->id_mmfr2 = 0x00000000; cpu->id_mmfr3 = 0x00000000; - cpu->id_isar0 = 0x01141110; - cpu->id_isar1 = 0x02111000; - cpu->id_isar2 = 0x21112231; - cpu->id_isar3 = 0x01111110; - cpu->id_isar4 = 0x01310102; - cpu->id_isar5 = 0x00000000; - cpu->id_isar6 = 0x00000000; + cpu->isar.id_isar0 = 0x01141110; + cpu->isar.id_isar1 = 0x02111000; + cpu->isar.id_isar2 = 0x21112231; + cpu->isar.id_isar3 = 0x01111110; + cpu->isar.id_isar4 = 0x01310102; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; } static void cortex_m4_initfn(Object *obj) @@ -1317,13 +1326,13 @@ static void cortex_m4_initfn(Object *obj) cpu->id_mmfr1 = 0x00000000; cpu->id_mmfr2 = 0x00000000; cpu->id_mmfr3 = 0x00000000; - cpu->id_isar0 = 0x01141110; - cpu->id_isar1 = 0x02111000; - cpu->id_isar2 = 0x21112231; - cpu->id_isar3 = 0x01111110; - cpu->id_isar4 = 0x01310102; - cpu->id_isar5 = 0x00000000; - cpu->id_isar6 = 0x00000000; + cpu->isar.id_isar0 = 0x01141110; + cpu->isar.id_isar1 = 0x02111000; + cpu->isar.id_isar2 = 0x21112231; + cpu->isar.id_isar3 = 0x01111110; + cpu->isar.id_isar4 = 0x01310102; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; } static void cortex_m33_initfn(Object *obj) @@ -1346,13 +1355,13 @@ static void cortex_m33_initfn(Object *obj) cpu->id_mmfr1 = 0x00000000; cpu->id_mmfr2 = 0x01000000; cpu->id_mmfr3 = 0x00000000; - cpu->id_isar0 = 0x01101110; - cpu->id_isar1 = 0x02212000; - cpu->id_isar2 = 0x20232232; - cpu->id_isar3 = 0x01111131; - cpu->id_isar4 = 0x01310132; - cpu->id_isar5 = 0x00000000; - cpu->id_isar6 = 0x00000000; + cpu->isar.id_isar0 = 0x01101110; + cpu->isar.id_isar1 = 0x02212000; + cpu->isar.id_isar2 = 0x20232232; + cpu->isar.id_isar3 = 0x01111131; + cpu->isar.id_isar4 = 0x01310132; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; cpu->clidr = 0x00000000; cpu->ctr = 0x8000c000; } @@ -1384,8 +1393,6 @@ static void cortex_r5_initfn(Object *obj) ARMCPU *cpu = ARM_CPU(obj); set_feature(&cpu->env, ARM_FEATURE_V7); - set_feature(&cpu->env, ARM_FEATURE_THUMB_DIV); - set_feature(&cpu->env, ARM_FEATURE_ARM_DIV); set_feature(&cpu->env, ARM_FEATURE_V7MP); set_feature(&cpu->env, ARM_FEATURE_PMSA); cpu->midr = 0x411fc153; /* r1p3 */ @@ -1397,13 +1404,13 @@ static void cortex_r5_initfn(Object *obj) cpu->id_mmfr1 = 0x00000000; cpu->id_mmfr2 = 0x01200000; cpu->id_mmfr3 = 0x0211; - cpu->id_isar0 = 0x2101111; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232141; - cpu->id_isar3 = 0x01112131; - cpu->id_isar4 = 0x0010142; - cpu->id_isar5 = 0x0; - cpu->id_isar6 = 0x0; + cpu->isar.id_isar0 = 0x02101111; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232141; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x0010142; + cpu->isar.id_isar5 = 0x0; + cpu->isar.id_isar6 = 0x0; cpu->mp_is_up = true; cpu->pmsav7_dregion = 16; define_arm_cp_regs(cpu, cortexr5_cp_reginfo); @@ -1438,8 +1445,8 @@ static void cortex_a8_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_EL3); cpu->midr = 0x410fc080; cpu->reset_fpsid = 0x410330c0; - cpu->mvfr0 = 0x11110222; - cpu->mvfr1 = 0x00011111; + cpu->isar.mvfr0 = 0x11110222; + cpu->isar.mvfr1 = 0x00011111; cpu->ctr = 0x82048004; cpu->reset_sctlr = 0x00c50078; cpu->id_pfr0 = 0x1031; @@ -1450,11 +1457,11 @@ static void cortex_a8_initfn(Object *obj) cpu->id_mmfr1 = 0x20000000; cpu->id_mmfr2 = 0x01202000; cpu->id_mmfr3 = 0x11; - cpu->id_isar0 = 0x00101111; - cpu->id_isar1 = 0x12112111; - cpu->id_isar2 = 0x21232031; - cpu->id_isar3 = 0x11112131; - cpu->id_isar4 = 0x00111142; + cpu->isar.id_isar0 = 0x00101111; + cpu->isar.id_isar1 = 0x12112111; + cpu->isar.id_isar2 = 0x21232031; + cpu->isar.id_isar3 = 0x11112131; + cpu->isar.id_isar4 = 0x00111142; cpu->dbgdidr = 0x15141000; cpu->clidr = (1 << 27) | (2 << 24) | 3; cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */ @@ -1512,8 +1519,8 @@ static void cortex_a9_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_CBAR); cpu->midr = 0x410fc090; cpu->reset_fpsid = 0x41033090; - cpu->mvfr0 = 0x11110222; - cpu->mvfr1 = 0x01111111; + cpu->isar.mvfr0 = 0x11110222; + cpu->isar.mvfr1 = 0x01111111; cpu->ctr = 0x80038003; cpu->reset_sctlr = 0x00c50078; cpu->id_pfr0 = 0x1031; @@ -1524,11 +1531,11 @@ static void cortex_a9_initfn(Object *obj) cpu->id_mmfr1 = 0x20000000; cpu->id_mmfr2 = 0x01230000; cpu->id_mmfr3 = 0x00002111; - cpu->id_isar0 = 0x00101111; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232041; - cpu->id_isar3 = 0x11112131; - cpu->id_isar4 = 0x00111142; + cpu->isar.id_isar0 = 0x00101111; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232041; + cpu->isar.id_isar3 = 0x11112131; + cpu->isar.id_isar4 = 0x00111142; cpu->dbgdidr = 0x35141000; cpu->clidr = (1 << 27) | (1 << 24) | 3; cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */ @@ -1573,8 +1580,8 @@ static void cortex_a7_initfn(Object *obj) cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A7; cpu->midr = 0x410fc075; cpu->reset_fpsid = 0x41023075; - cpu->mvfr0 = 0x10110222; - cpu->mvfr1 = 0x11111111; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x11111111; cpu->ctr = 0x84448003; cpu->reset_sctlr = 0x00c50078; cpu->id_pfr0 = 0x00001131; @@ -1587,11 +1594,14 @@ static void cortex_a7_initfn(Object *obj) cpu->id_mmfr1 = 0x40000000; cpu->id_mmfr2 = 0x01240000; cpu->id_mmfr3 = 0x02102211; - cpu->id_isar0 = 0x01101110; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232041; - cpu->id_isar3 = 0x11112131; - cpu->id_isar4 = 0x10011142; + /* a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but + * table 4-41 gives 0x02101110, which includes the arm div insns. + */ + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232041; + cpu->isar.id_isar3 = 0x11112131; + cpu->isar.id_isar4 = 0x10011142; cpu->dbgdidr = 0x3515f005; cpu->clidr = 0x0a200023; cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ @@ -1616,8 +1626,8 @@ static void cortex_a15_initfn(Object *obj) cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A15; cpu->midr = 0x412fc0f1; cpu->reset_fpsid = 0x410430f0; - cpu->mvfr0 = 0x10110222; - cpu->mvfr1 = 0x11111111; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x11111111; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50078; cpu->id_pfr0 = 0x00001131; @@ -1630,11 +1640,11 @@ static void cortex_a15_initfn(Object *obj) cpu->id_mmfr1 = 0x20000000; cpu->id_mmfr2 = 0x01240000; cpu->id_mmfr3 = 0x02102211; - cpu->id_isar0 = 0x02101110; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232041; - cpu->id_isar3 = 0x11112131; - cpu->id_isar4 = 0x10011142; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232041; + cpu->isar.id_isar3 = 0x11112131; + cpu->isar.id_isar4 = 0x10011142; cpu->dbgdidr = 0x3515f021; cpu->clidr = 0x0a200023; cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ @@ -1827,17 +1837,26 @@ static void arm_max_initfn(Object *obj) cortex_a15_initfn(obj); #ifdef CONFIG_USER_ONLY /* We don't set these in system emulation mode for the moment, - * since we don't correctly set the ID registers to advertise them, + * since we don't correctly set (all of) the ID registers to + * advertise them. */ set_feature(&cpu->env, ARM_FEATURE_V8); - set_feature(&cpu->env, ARM_FEATURE_V8_AES); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); - set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); - set_feature(&cpu->env, ARM_FEATURE_CRC); - set_feature(&cpu->env, ARM_FEATURE_V8_RDM); - set_feature(&cpu->env, ARM_FEATURE_V8_DOTPROD); - set_feature(&cpu->env, ARM_FEATURE_V8_FCMA); + { + uint32_t t; + + t = cpu->isar.id_isar5; + t = FIELD_DP32(t, ID_ISAR5, AES, 2); + t = FIELD_DP32(t, ID_ISAR5, SHA1, 1); + t = FIELD_DP32(t, ID_ISAR5, SHA2, 1); + t = FIELD_DP32(t, ID_ISAR5, CRC32, 1); + t = FIELD_DP32(t, ID_ISAR5, RDM, 1); + t = FIELD_DP32(t, ID_ISAR5, VCMA, 1); + cpu->isar.id_isar5 = t; + + t = cpu->isar.id_isar6; + t = FIELD_DP32(t, ID_ISAR6, DP, 1); + cpu->isar.id_isar6 = t; + } #endif } } diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 65c0fa0a65..8e6779936e 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -56,6 +56,7 @@ #define EXCP_SEMIHOST 16 /* semihosting call */ #define EXCP_NOCP 17 /* v7M NOCP UsageFault */ #define EXCP_INVSTATE 18 /* v7M INVSTATE UsageFault */ +#define EXCP_STKOF 19 /* v8M STKOF UsageFault */ /* NB: add new EXCP_ defines to the array in arm_log_exception() too */ #define ARMV7M_EXCP_RESET 1 @@ -530,6 +531,13 @@ typedef struct CPUARMState { */ } exception; + /* Information associated with an SError */ + struct { + uint8_t pending; + uint8_t has_esr; + uint64_t esr; + } serror; + /* Thumb-2 EE state. */ uint32_t teecr; uint32_t teehbr; @@ -668,6 +676,8 @@ typedef enum ARMPSCIState { PSCI_ON_PENDING = 2 } ARMPSCIState; +typedef struct ARMISARegisters ARMISARegisters; + /** * ARMCPU: * @env: #CPUARMState @@ -787,13 +797,28 @@ struct ARMCPU { * ARMv7AR ARM Architecture Reference Manual. A reset_ prefix * is used for reset values of non-constant registers; no reset_ * prefix means a constant register. + * Some of these registers are split out into a substructure that + * is shared with the translators to control the ISA. */ + struct ARMISARegisters { + uint32_t id_isar0; + uint32_t id_isar1; + uint32_t id_isar2; + uint32_t id_isar3; + uint32_t id_isar4; + uint32_t id_isar5; + uint32_t id_isar6; + uint32_t mvfr0; + uint32_t mvfr1; + uint32_t mvfr2; + uint64_t id_aa64isar0; + uint64_t id_aa64isar1; + uint64_t id_aa64pfr0; + uint64_t id_aa64pfr1; + } isar; uint32_t midr; uint32_t revidr; uint32_t reset_fpsid; - uint32_t mvfr0; - uint32_t mvfr1; - uint32_t mvfr2; uint32_t ctr; uint32_t reset_sctlr; uint32_t id_pfr0; @@ -807,21 +832,10 @@ struct ARMCPU { uint32_t id_mmfr2; uint32_t id_mmfr3; uint32_t id_mmfr4; - uint32_t id_isar0; - uint32_t id_isar1; - uint32_t id_isar2; - uint32_t id_isar3; - uint32_t id_isar4; - uint32_t id_isar5; - uint32_t id_isar6; - uint64_t id_aa64pfr0; - uint64_t id_aa64pfr1; uint64_t id_aa64dfr0; uint64_t id_aa64dfr1; uint64_t id_aa64afr0; uint64_t id_aa64afr1; - uint64_t id_aa64isar0; - uint64_t id_aa64isar1; uint64_t id_aa64mmfr0; uint64_t id_aa64mmfr1; uint32_t dbgdidr; @@ -910,12 +924,23 @@ int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, int aarch64_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg); int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq); +void aarch64_sve_change_el(CPUARMState *env, int old_el, + int new_el, bool el0_a64); +#else +static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { } +static inline void aarch64_sve_change_el(CPUARMState *env, int o, + int n, bool a) +{ } #endif target_ulong do_arm_semihosting(CPUARMState *env); void aarch64_sync_32_to_64(CPUARMState *env); void aarch64_sync_64_to_32(CPUARMState *env); +int fp_exception_el(CPUARMState *env, int cur_el); +int sve_exception_el(CPUARMState *env, int cur_el); +uint32_t sve_zcr_len_for_el(CPUARMState *env, int el); + static inline bool is_a64(CPUARMState *env) { return env->aarch64; @@ -1336,8 +1361,10 @@ FIELD(V7M_CCR, UNALIGN_TRP, 3, 1) FIELD(V7M_CCR, DIV_0_TRP, 4, 1) FIELD(V7M_CCR, BFHFNMIGN, 8, 1) FIELD(V7M_CCR, STKALIGN, 9, 1) +FIELD(V7M_CCR, STKOFHFNMIGN, 10, 1) FIELD(V7M_CCR, DC, 16, 1) FIELD(V7M_CCR, IC, 17, 1) +FIELD(V7M_CCR, BP, 18, 1) /* V7M SCR bits */ FIELD(V7M_SCR, SLEEPONEXIT, 1, 1) @@ -1378,6 +1405,7 @@ FIELD(V7M_CFSR, UNDEFINSTR, 16 + 0, 1) FIELD(V7M_CFSR, INVSTATE, 16 + 1, 1) FIELD(V7M_CFSR, INVPC, 16 + 2, 1) FIELD(V7M_CFSR, NOCP, 16 + 3, 1) +FIELD(V7M_CFSR, STKOF, 16 + 4, 1) FIELD(V7M_CFSR, UNALIGNED, 16 + 8, 1) FIELD(V7M_CFSR, DIVBYZERO, 16 + 9, 1) @@ -1428,6 +1456,104 @@ FIELD(V7M_CSSELR, LEVEL, 1, 3) */ FIELD(V7M_CSSELR, INDEX, 0, 4) +/* + * System register ID fields. + */ +FIELD(ID_ISAR0, SWAP, 0, 4) +FIELD(ID_ISAR0, BITCOUNT, 4, 4) +FIELD(ID_ISAR0, BITFIELD, 8, 4) +FIELD(ID_ISAR0, CMPBRANCH, 12, 4) +FIELD(ID_ISAR0, COPROC, 16, 4) +FIELD(ID_ISAR0, DEBUG, 20, 4) +FIELD(ID_ISAR0, DIVIDE, 24, 4) + +FIELD(ID_ISAR1, ENDIAN, 0, 4) +FIELD(ID_ISAR1, EXCEPT, 4, 4) +FIELD(ID_ISAR1, EXCEPT_AR, 8, 4) +FIELD(ID_ISAR1, EXTEND, 12, 4) +FIELD(ID_ISAR1, IFTHEN, 16, 4) +FIELD(ID_ISAR1, IMMEDIATE, 20, 4) +FIELD(ID_ISAR1, INTERWORK, 24, 4) +FIELD(ID_ISAR1, JAZELLE, 28, 4) + +FIELD(ID_ISAR2, LOADSTORE, 0, 4) +FIELD(ID_ISAR2, MEMHINT, 4, 4) +FIELD(ID_ISAR2, MULTIACCESSINT, 8, 4) +FIELD(ID_ISAR2, MULT, 12, 4) +FIELD(ID_ISAR2, MULTS, 16, 4) +FIELD(ID_ISAR2, MULTU, 20, 4) +FIELD(ID_ISAR2, PSR_AR, 24, 4) +FIELD(ID_ISAR2, REVERSAL, 28, 4) + +FIELD(ID_ISAR3, SATURATE, 0, 4) +FIELD(ID_ISAR3, SIMD, 4, 4) +FIELD(ID_ISAR3, SVC, 8, 4) +FIELD(ID_ISAR3, SYNCHPRIM, 12, 4) +FIELD(ID_ISAR3, TABBRANCH, 16, 4) +FIELD(ID_ISAR3, T32COPY, 20, 4) +FIELD(ID_ISAR3, TRUENOP, 24, 4) +FIELD(ID_ISAR3, T32EE, 28, 4) + +FIELD(ID_ISAR4, UNPRIV, 0, 4) +FIELD(ID_ISAR4, WITHSHIFTS, 4, 4) +FIELD(ID_ISAR4, WRITEBACK, 8, 4) +FIELD(ID_ISAR4, SMC, 12, 4) +FIELD(ID_ISAR4, BARRIER, 16, 4) +FIELD(ID_ISAR4, SYNCHPRIM_FRAC, 20, 4) +FIELD(ID_ISAR4, PSR_M, 24, 4) +FIELD(ID_ISAR4, SWP_FRAC, 28, 4) + +FIELD(ID_ISAR5, SEVL, 0, 4) +FIELD(ID_ISAR5, AES, 4, 4) +FIELD(ID_ISAR5, SHA1, 8, 4) +FIELD(ID_ISAR5, SHA2, 12, 4) +FIELD(ID_ISAR5, CRC32, 16, 4) +FIELD(ID_ISAR5, RDM, 24, 4) +FIELD(ID_ISAR5, VCMA, 28, 4) + +FIELD(ID_ISAR6, JSCVT, 0, 4) +FIELD(ID_ISAR6, DP, 4, 4) +FIELD(ID_ISAR6, FHM, 8, 4) +FIELD(ID_ISAR6, SB, 12, 4) +FIELD(ID_ISAR6, SPECRES, 16, 4) + +FIELD(ID_AA64ISAR0, AES, 4, 4) +FIELD(ID_AA64ISAR0, SHA1, 8, 4) +FIELD(ID_AA64ISAR0, SHA2, 12, 4) +FIELD(ID_AA64ISAR0, CRC32, 16, 4) +FIELD(ID_AA64ISAR0, ATOMIC, 20, 4) +FIELD(ID_AA64ISAR0, RDM, 28, 4) +FIELD(ID_AA64ISAR0, SHA3, 32, 4) +FIELD(ID_AA64ISAR0, SM3, 36, 4) +FIELD(ID_AA64ISAR0, SM4, 40, 4) +FIELD(ID_AA64ISAR0, DP, 44, 4) +FIELD(ID_AA64ISAR0, FHM, 48, 4) +FIELD(ID_AA64ISAR0, TS, 52, 4) +FIELD(ID_AA64ISAR0, TLB, 56, 4) +FIELD(ID_AA64ISAR0, RNDR, 60, 4) + +FIELD(ID_AA64ISAR1, DPB, 0, 4) +FIELD(ID_AA64ISAR1, APA, 4, 4) +FIELD(ID_AA64ISAR1, API, 8, 4) +FIELD(ID_AA64ISAR1, JSCVT, 12, 4) +FIELD(ID_AA64ISAR1, FCMA, 16, 4) +FIELD(ID_AA64ISAR1, LRCPC, 20, 4) +FIELD(ID_AA64ISAR1, GPA, 24, 4) +FIELD(ID_AA64ISAR1, GPI, 28, 4) +FIELD(ID_AA64ISAR1, FRINTTS, 32, 4) +FIELD(ID_AA64ISAR1, SB, 36, 4) +FIELD(ID_AA64ISAR1, SPECRES, 40, 4) + +FIELD(ID_AA64PFR0, EL0, 0, 4) +FIELD(ID_AA64PFR0, EL1, 4, 4) +FIELD(ID_AA64PFR0, EL2, 8, 4) +FIELD(ID_AA64PFR0, EL3, 12, 4) +FIELD(ID_AA64PFR0, FP, 16, 4) +FIELD(ID_AA64PFR0, ADVSIMD, 20, 4) +FIELD(ID_AA64PFR0, GIC, 24, 4) +FIELD(ID_AA64PFR0, RAS, 28, 4) +FIELD(ID_AA64PFR0, SVE, 32, 4) + QEMU_BUILD_BUG_ON(ARRAY_SIZE(((ARMCPU *)0)->ccsidr) <= R_V7M_CSSELR_INDEX_MASK); /* If adding a feature bit which corresponds to a Linux ELF @@ -1447,7 +1573,6 @@ enum arm_features { ARM_FEATURE_VFP3, ARM_FEATURE_VFP_FP16, ARM_FEATURE_NEON, - ARM_FEATURE_THUMB_DIV, /* divide supported in Thumb encoding */ ARM_FEATURE_M, /* Microcontroller profile. */ ARM_FEATURE_OMAPCP, /* OMAP specific CP15 ops handling. */ ARM_FEATURE_THUMB2EE, @@ -1457,7 +1582,6 @@ enum arm_features { ARM_FEATURE_V5, ARM_FEATURE_STRONGARM, ARM_FEATURE_VAPA, /* cp15 VA to PA lookups */ - ARM_FEATURE_ARM_DIV, /* divide supported in ARM encoding */ ARM_FEATURE_VFP4, /* VFPv4 (implies that NEON is v2) */ ARM_FEATURE_GENERIC_TIMER, ARM_FEATURE_MVFR, /* Media and VFP Feature Registers 0 and 1 */ @@ -1470,30 +1594,15 @@ enum arm_features { ARM_FEATURE_LPAE, /* has Large Physical Address Extension */ ARM_FEATURE_V8, ARM_FEATURE_AARCH64, /* supports 64 bit mode */ - ARM_FEATURE_V8_AES, /* implements AES part of v8 Crypto Extensions */ ARM_FEATURE_CBAR, /* has cp15 CBAR */ ARM_FEATURE_CRC, /* ARMv8 CRC instructions */ ARM_FEATURE_CBAR_RO, /* has cp15 CBAR and it is read-only */ ARM_FEATURE_EL2, /* has EL2 Virtualization support */ ARM_FEATURE_EL3, /* has EL3 Secure monitor support */ - ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_PMULL, /* implements PMULL part of v8 Crypto Extensions */ ARM_FEATURE_THUMB_DSP, /* DSP insns supported in the Thumb encodings */ ARM_FEATURE_PMU, /* has PMU support */ ARM_FEATURE_VBAR, /* has cp15 VBAR */ ARM_FEATURE_M_SECURITY, /* M profile Security Extension */ - ARM_FEATURE_JAZELLE, /* has (trivial) Jazelle implementation */ - ARM_FEATURE_SVE, /* has Scalable Vector Extension */ - ARM_FEATURE_V8_SHA512, /* implements SHA512 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_SHA3, /* implements SHA3 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_SM3, /* implements SM3 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_ATOMICS, /* ARMv8.1-Atomics feature */ - ARM_FEATURE_V8_RDM, /* implements v8.1 simd round multiply */ - ARM_FEATURE_V8_DOTPROD, /* implements v8.2 simd dot product */ - ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */ - ARM_FEATURE_V8_FCMA, /* has complex number part of v8.3 extensions. */ ARM_FEATURE_M_MAIN, /* M profile Main Extension */ }; @@ -2842,6 +2951,9 @@ static inline bool arm_cpu_data_is_big_endian(CPUARMState *env) /* For M profile only, Handler (ie not Thread) mode */ #define ARM_TBFLAG_HANDLER_SHIFT 21 #define ARM_TBFLAG_HANDLER_MASK (1 << ARM_TBFLAG_HANDLER_SHIFT) +/* For M profile only, whether we should generate stack-limit checks */ +#define ARM_TBFLAG_STACKCHECK_SHIFT 22 +#define ARM_TBFLAG_STACKCHECK_MASK (1 << ARM_TBFLAG_STACKCHECK_SHIFT) /* Bit usage when in AArch64 state */ #define ARM_TBFLAG_TBI0_SHIFT 0 /* TBI0 for EL0/1 or TBI for EL2/3 */ @@ -2884,6 +2996,8 @@ static inline bool arm_cpu_data_is_big_endian(CPUARMState *env) (((F) & ARM_TBFLAG_BE_DATA_MASK) >> ARM_TBFLAG_BE_DATA_SHIFT) #define ARM_TBFLAG_HANDLER(F) \ (((F) & ARM_TBFLAG_HANDLER_MASK) >> ARM_TBFLAG_HANDLER_SHIFT) +#define ARM_TBFLAG_STACKCHECK(F) \ + (((F) & ARM_TBFLAG_STACKCHECK_MASK) >> ARM_TBFLAG_STACKCHECK_SHIFT) #define ARM_TBFLAG_TBI0(F) \ (((F) & ARM_TBFLAG_TBI0_MASK) >> ARM_TBFLAG_TBI0_SHIFT) #define ARM_TBFLAG_TBI1(F) \ @@ -3040,4 +3154,157 @@ static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno) /* Shared between translate-sve.c and sve_helper.c. */ extern const uint64_t pred_esz_masks[4]; +/* + * 32-bit feature tests via id registers. + */ +static inline bool isar_feature_thumb_div(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) != 0; +} + +static inline bool isar_feature_arm_div(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) > 1; +} + +static inline bool isar_feature_jazelle(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar1, ID_ISAR1, JAZELLE) != 0; +} + +static inline bool isar_feature_aa32_aes(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) != 0; +} + +static inline bool isar_feature_aa32_pmull(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) > 1; +} + +static inline bool isar_feature_aa32_sha1(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA1) != 0; +} + +static inline bool isar_feature_aa32_sha2(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA2) != 0; +} + +static inline bool isar_feature_aa32_crc32(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, CRC32) != 0; +} + +static inline bool isar_feature_aa32_rdm(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, RDM) != 0; +} + +static inline bool isar_feature_aa32_vcma(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, VCMA) != 0; +} + +static inline bool isar_feature_aa32_dp(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar6, ID_ISAR6, DP) != 0; +} + +static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id) +{ + /* + * This is a placeholder for use by VCMA until the rest of + * the ARMv8.2-FP16 extension is implemented for aa32 mode. + * At which point we can properly set and check MVFR1.FPHP. + */ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1; +} + +/* + * 64-bit feature tests via id registers. + */ +static inline bool isar_feature_aa64_aes(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) != 0; +} + +static inline bool isar_feature_aa64_pmull(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) > 1; +} + +static inline bool isar_feature_aa64_sha1(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA1) != 0; +} + +static inline bool isar_feature_aa64_sha256(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) != 0; +} + +static inline bool isar_feature_aa64_sha512(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) > 1; +} + +static inline bool isar_feature_aa64_crc32(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, CRC32) != 0; +} + +static inline bool isar_feature_aa64_atomics(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, ATOMIC) != 0; +} + +static inline bool isar_feature_aa64_rdm(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RDM) != 0; +} + +static inline bool isar_feature_aa64_sha3(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA3) != 0; +} + +static inline bool isar_feature_aa64_sm3(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM3) != 0; +} + +static inline bool isar_feature_aa64_sm4(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM4) != 0; +} + +static inline bool isar_feature_aa64_dp(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, DP) != 0; +} + +static inline bool isar_feature_aa64_fcma(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FCMA) != 0; +} + +static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id) +{ + /* We always set the AdvSIMD and FP fields identically wrt FP16. */ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1; +} + +static inline bool isar_feature_aa64_sve(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SVE) != 0; +} + +/* + * Forward to the above feature tests given an ARMCPU pointer. + */ +#define cpu_isar_feature(name, cpu) \ + ({ ARMCPU *cpu_ = (cpu); isar_feature_##name(&cpu_->isar); }) + #endif diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 800bff780e..873f059bf2 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -51,7 +51,7 @@ static uint64_t a57_a53_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) } #endif -static const ARMCPRegInfo cortex_a57_a53_cp_reginfo[] = { +static const ARMCPRegInfo cortex_a72_a57_a53_cp_reginfo[] = { #ifndef CONFIG_USER_ONLY { .name = "L2CTLR_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 1, .crn = 11, .crm = 0, .opc2 = 2, @@ -109,11 +109,6 @@ static void aarch64_a57_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); - set_feature(&cpu->env, ARM_FEATURE_V8_AES); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); - set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); - set_feature(&cpu->env, ARM_FEATURE_CRC); set_feature(&cpu->env, ARM_FEATURE_EL2); set_feature(&cpu->env, ARM_FEATURE_EL3); set_feature(&cpu->env, ARM_FEATURE_PMU); @@ -121,9 +116,9 @@ static void aarch64_a57_initfn(Object *obj) cpu->midr = 0x411fd070; cpu->revidr = 0x00000000; cpu->reset_fpsid = 0x41034070; - cpu->mvfr0 = 0x10110222; - cpu->mvfr1 = 0x12111111; - cpu->mvfr2 = 0x00000043; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x12111111; + cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50838; cpu->id_pfr0 = 0x00000131; @@ -134,18 +129,18 @@ static void aarch64_a57_initfn(Object *obj) cpu->id_mmfr1 = 0x40000000; cpu->id_mmfr2 = 0x01260000; cpu->id_mmfr3 = 0x02102211; - cpu->id_isar0 = 0x02101110; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232042; - cpu->id_isar3 = 0x01112131; - cpu->id_isar4 = 0x00011142; - cpu->id_isar5 = 0x00011121; - cpu->id_isar6 = 0; - cpu->id_aa64pfr0 = 0x00002222; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00011142; + cpu->isar.id_isar5 = 0x00011121; + cpu->isar.id_isar6 = 0; + cpu->isar.id_aa64pfr0 = 0x00002222; cpu->id_aa64dfr0 = 0x10305106; cpu->pmceid0 = 0x00000000; cpu->pmceid1 = 0x00000000; - cpu->id_aa64isar0 = 0x00011120; + cpu->isar.id_aa64isar0 = 0x00011120; cpu->id_aa64mmfr0 = 0x00001124; cpu->dbgdidr = 0x3516d000; cpu->clidr = 0x0a200023; @@ -156,7 +151,7 @@ static void aarch64_a57_initfn(Object *obj) cpu->gic_num_lrs = 4; cpu->gic_vpribits = 5; cpu->gic_vprebits = 5; - define_arm_cp_regs(cpu, cortex_a57_a53_cp_reginfo); + define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); } static void aarch64_a53_initfn(Object *obj) @@ -170,11 +165,6 @@ static void aarch64_a53_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); - set_feature(&cpu->env, ARM_FEATURE_V8_AES); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); - set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); - set_feature(&cpu->env, ARM_FEATURE_CRC); set_feature(&cpu->env, ARM_FEATURE_EL2); set_feature(&cpu->env, ARM_FEATURE_EL3); set_feature(&cpu->env, ARM_FEATURE_PMU); @@ -182,9 +172,9 @@ static void aarch64_a53_initfn(Object *obj) cpu->midr = 0x410fd034; cpu->revidr = 0x00000000; cpu->reset_fpsid = 0x41034070; - cpu->mvfr0 = 0x10110222; - cpu->mvfr1 = 0x12111111; - cpu->mvfr2 = 0x00000043; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x12111111; + cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x84448004; /* L1Ip = VIPT */ cpu->reset_sctlr = 0x00c50838; cpu->id_pfr0 = 0x00000131; @@ -195,16 +185,16 @@ static void aarch64_a53_initfn(Object *obj) cpu->id_mmfr1 = 0x40000000; cpu->id_mmfr2 = 0x01260000; cpu->id_mmfr3 = 0x02102211; - cpu->id_isar0 = 0x02101110; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232042; - cpu->id_isar3 = 0x01112131; - cpu->id_isar4 = 0x00011142; - cpu->id_isar5 = 0x00011121; - cpu->id_isar6 = 0; - cpu->id_aa64pfr0 = 0x00002222; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00011142; + cpu->isar.id_isar5 = 0x00011121; + cpu->isar.id_isar6 = 0; + cpu->isar.id_aa64pfr0 = 0x00002222; cpu->id_aa64dfr0 = 0x10305106; - cpu->id_aa64isar0 = 0x00011120; + cpu->isar.id_aa64isar0 = 0x00011120; cpu->id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */ cpu->dbgdidr = 0x3516d000; cpu->clidr = 0x0a200023; @@ -215,7 +205,61 @@ static void aarch64_a53_initfn(Object *obj) cpu->gic_num_lrs = 4; cpu->gic_vpribits = 5; cpu->gic_vprebits = 5; - define_arm_cp_regs(cpu, cortex_a57_a53_cp_reginfo); + define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); +} + +static void aarch64_a72_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,cortex-a72"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_VFP4); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); + cpu->midr = 0x410fd083; + cpu->revidr = 0x00000000; + cpu->reset_fpsid = 0x41034080; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x12111111; + cpu->isar.mvfr2 = 0x00000043; + cpu->ctr = 0x8444c004; + cpu->reset_sctlr = 0x00c50838; + cpu->id_pfr0 = 0x00000131; + cpu->id_pfr1 = 0x00011011; + cpu->id_dfr0 = 0x03010066; + cpu->id_afr0 = 0x00000000; + cpu->id_mmfr0 = 0x10201105; + cpu->id_mmfr1 = 0x40000000; + cpu->id_mmfr2 = 0x01260000; + cpu->id_mmfr3 = 0x02102211; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00011142; + cpu->isar.id_isar5 = 0x00011121; + cpu->isar.id_aa64pfr0 = 0x00002222; + cpu->id_aa64dfr0 = 0x10305106; + cpu->pmceid0 = 0x00000000; + cpu->pmceid1 = 0x00000000; + cpu->isar.id_aa64isar0 = 0x00011120; + cpu->id_aa64mmfr0 = 0x00001124; + cpu->dbgdidr = 0x3516d000; + cpu->clidr = 0x0a200023; + cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ + cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ + cpu->ccsidr[2] = 0x707fe07a; /* 1MB L2 cache */ + cpu->dcz_blocksize = 4; /* 64 bytes */ + cpu->gic_num_lrs = 4; + cpu->gic_vpribits = 5; + cpu->gic_vprebits = 5; + define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); } static void cpu_max_get_sve_vq(Object *obj, Visitor *v, const char *name, @@ -253,24 +297,55 @@ static void aarch64_max_initfn(Object *obj) if (kvm_enabled()) { kvm_arm_set_cpu_features_from_host(cpu); } else { + uint64_t t; + uint32_t u; aarch64_a57_initfn(obj); -#ifdef CONFIG_USER_ONLY - /* We don't set these in system emulation mode for the moment, - * since we don't correctly set the ID registers to advertise them, - * and in some cases they're only available in AArch64 and not AArch32, - * whereas the architecture requires them to be present in both if - * present in either. + + t = cpu->isar.id_aa64isar0; + t = FIELD_DP64(t, ID_AA64ISAR0, AES, 2); /* AES + PMULL */ + t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, SHA2, 2); /* SHA512 */ + t = FIELD_DP64(t, ID_AA64ISAR0, CRC32, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, ATOMIC, 2); + t = FIELD_DP64(t, ID_AA64ISAR0, RDM, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, SHA3, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, SM3, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, SM4, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, DP, 1); + cpu->isar.id_aa64isar0 = t; + + t = cpu->isar.id_aa64isar1; + t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1); + cpu->isar.id_aa64isar1 = t; + + t = cpu->isar.id_aa64pfr0; + t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1); + t = FIELD_DP64(t, ID_AA64PFR0, FP, 1); + t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1); + cpu->isar.id_aa64pfr0 = t; + + /* Replicate the same data to the 32-bit id registers. */ + u = cpu->isar.id_isar5; + u = FIELD_DP32(u, ID_ISAR5, AES, 2); /* AES + PMULL */ + u = FIELD_DP32(u, ID_ISAR5, SHA1, 1); + u = FIELD_DP32(u, ID_ISAR5, SHA2, 1); + u = FIELD_DP32(u, ID_ISAR5, CRC32, 1); + u = FIELD_DP32(u, ID_ISAR5, RDM, 1); + u = FIELD_DP32(u, ID_ISAR5, VCMA, 1); + cpu->isar.id_isar5 = u; + + u = cpu->isar.id_isar6; + u = FIELD_DP32(u, ID_ISAR6, DP, 1); + cpu->isar.id_isar6 = u; + + /* + * FIXME: We do not yet support ARMv8.2-fp16 for AArch32 yet, + * so do not set MVFR1.FPHP. Strictly speaking this is not legal, + * but it is also not legal to enable SVE without support for FP16, + * and enabling SVE in system mode is more useful in the short term. */ - set_feature(&cpu->env, ARM_FEATURE_V8_SHA512); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA3); - set_feature(&cpu->env, ARM_FEATURE_V8_SM3); - set_feature(&cpu->env, ARM_FEATURE_V8_SM4); - set_feature(&cpu->env, ARM_FEATURE_V8_ATOMICS); - set_feature(&cpu->env, ARM_FEATURE_V8_RDM); - set_feature(&cpu->env, ARM_FEATURE_V8_DOTPROD); - set_feature(&cpu->env, ARM_FEATURE_V8_FP16); - set_feature(&cpu->env, ARM_FEATURE_V8_FCMA); - set_feature(&cpu->env, ARM_FEATURE_SVE); + +#ifdef CONFIG_USER_ONLY /* For usermode -cpu max we can use a larger and more efficient DCZ * blocksize since we don't have to follow what the hardware does. */ @@ -293,6 +368,7 @@ typedef struct ARMCPUInfo { static const ARMCPUInfo aarch64_cpus[] = { { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, + { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, { .name = "max", .initfn = aarch64_max_initfn }, { .name = NULL } }; @@ -410,45 +486,3 @@ static void aarch64_cpu_register_types(void) } type_init(aarch64_cpu_register_types) - -/* The manual says that when SVE is enabled and VQ is widened the - * implementation is allowed to zero the previously inaccessible - * portion of the registers. The corollary to that is that when - * SVE is enabled and VQ is narrowed we are also allowed to zero - * the now inaccessible portion of the registers. - * - * The intent of this is that no predicate bit beyond VQ is ever set. - * Which means that some operations on predicate registers themselves - * may operate on full uint64_t or even unrolled across the maximum - * uint64_t[4]. Performing 4 bits of host arithmetic unconditionally - * may well be cheaper than conditionals to restrict the operation - * to the relevant portion of a uint16_t[16]. - * - * TODO: Need to call this for changes to the real system registers - * and EL state changes. - */ -void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) -{ - int i, j; - uint64_t pmask; - - assert(vq >= 1 && vq <= ARM_MAX_VQ); - assert(vq <= arm_env_get_cpu(env)->sve_max_vq); - - /* Zap the high bits of the zregs. */ - for (i = 0; i < 32; i++) { - memset(&env->vfp.zregs[i].d[2 * vq], 0, 16 * (ARM_MAX_VQ - vq)); - } - - /* Zap the high bits of the pregs and ffr. */ - pmask = 0; - if (vq & 3) { - pmask = ~(-1ULL << (16 * (vq & 3))); - } - for (j = vq / 4; j < ARM_MAX_VQ / 4; j++) { - for (i = 0; i < 17; ++i) { - env->vfp.pregs[i].p[j] &= pmask; - } - pmask = 0; - } -} diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index 7f6ad3000b..61799d20e1 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -30,6 +30,7 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "qemu/int128.h" +#include "qemu/atomic128.h" #include "tcg.h" #include "fpu/softfloat.h" #include <zlib.h> /* For crc32 */ @@ -509,189 +510,187 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) return crc32c(acc, buf, bytes) ^ 0xffffffff; } -/* Returns 0 on success; 1 otherwise. */ -static uint64_t do_paired_cmpxchg64_le(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi, - bool parallel, uintptr_t ra) +uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, + uint64_t new_lo, uint64_t new_hi) { - Int128 oldv, cmpv, newv; + Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + Int128 newv = int128_make128(new_lo, new_hi); + Int128 oldv; + uintptr_t ra = GETPC(); + uint64_t o0, o1; bool success; - cmpv = int128_make128(env->exclusive_val, env->exclusive_high); - newv = int128_make128(new_lo, new_hi); - - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); - oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); - success = int128_eq(oldv, cmpv); -#endif - } else { - uint64_t o0, o1; - #ifdef CONFIG_USER_ONLY - /* ??? Enforce alignment. */ - uint64_t *haddr = g2h(addr); - - helper_retaddr = ra; - o0 = ldq_le_p(haddr + 0); - o1 = ldq_le_p(haddr + 1); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - stq_le_p(haddr + 0, int128_getlo(newv)); - stq_le_p(haddr + 1, int128_gethi(newv)); - } - helper_retaddr = 0; + /* ??? Enforce alignment. */ + uint64_t *haddr = g2h(addr); + + helper_retaddr = ra; + o0 = ldq_le_p(haddr + 0); + o1 = ldq_le_p(haddr + 1); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + stq_le_p(haddr + 0, int128_getlo(newv)); + stq_le_p(haddr + 1, int128_gethi(newv)); + } + helper_retaddr = 0; #else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); - TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); - - o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); - o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); - helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); - } -#endif + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); + TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); + + o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); + o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); + helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); } +#endif return !success; } -uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) -{ - return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, false, GETPC()); -} - uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr, uint64_t new_lo, uint64_t new_hi) { - return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, true, GETPC()); -} - -static uint64_t do_paired_cmpxchg64_be(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi, - bool parallel, uintptr_t ra) -{ Int128 oldv, cmpv, newv; + uintptr_t ra = GETPC(); bool success; + int mem_idx; + TCGMemOpIdx oi; - /* high and low need to be switched here because this is not actually a - * 128bit store but two doublewords stored consecutively - */ - cmpv = int128_make128(env->exclusive_high, env->exclusive_val); - newv = int128_make128(new_hi, new_lo); + assert(HAVE_CMPXCHG128); - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); - oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); - success = int128_eq(oldv, cmpv); -#endif - } else { - uint64_t o0, o1; + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); -#ifdef CONFIG_USER_ONLY - /* ??? Enforce alignment. */ - uint64_t *haddr = g2h(addr); - - helper_retaddr = ra; - o1 = ldq_be_p(haddr + 0); - o0 = ldq_be_p(haddr + 1); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - stq_be_p(haddr + 0, int128_gethi(newv)); - stq_be_p(haddr + 1, int128_getlo(newv)); - } - helper_retaddr = 0; -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); - TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); - - o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); - o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); - helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); - } -#endif - } + cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + newv = int128_make128(new_lo, new_hi); + oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); + success = int128_eq(oldv, cmpv); return !success; } uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr, uint64_t new_lo, uint64_t new_hi) { - return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, false, GETPC()); + /* + * High and low need to be switched here because this is not actually a + * 128bit store but two doublewords stored consecutively + */ + Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + Int128 newv = int128_make128(new_lo, new_hi); + Int128 oldv; + uintptr_t ra = GETPC(); + uint64_t o0, o1; + bool success; + +#ifdef CONFIG_USER_ONLY + /* ??? Enforce alignment. */ + uint64_t *haddr = g2h(addr); + + helper_retaddr = ra; + o1 = ldq_be_p(haddr + 0); + o0 = ldq_be_p(haddr + 1); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + stq_be_p(haddr + 0, int128_gethi(newv)); + stq_be_p(haddr + 1, int128_getlo(newv)); + } + helper_retaddr = 0; +#else + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); + TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); + + o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); + o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); + helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); + } +#endif + + return !success; } uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) + uint64_t new_lo, uint64_t new_hi) { - return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC()); + Int128 oldv, cmpv, newv; + uintptr_t ra = GETPC(); + bool success; + int mem_idx; + TCGMemOpIdx oi; + + assert(HAVE_CMPXCHG128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); + + /* + * High and low need to be switched here because this is not actually a + * 128bit store but two doublewords stored consecutively + */ + cmpv = int128_make128(env->exclusive_high, env->exclusive_val); + newv = int128_make128(new_hi, new_lo); + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); + + success = int128_eq(oldv, cmpv); + return !success; } /* Writes back the old data into Rs. */ void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, uint64_t new_lo, uint64_t new_hi) { - uintptr_t ra = GETPC(); -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else Int128 oldv, cmpv, newv; + uintptr_t ra = GETPC(); + int mem_idx; + TCGMemOpIdx oi; + + assert(HAVE_CMPXCHG128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]); newv = int128_make128(new_lo, new_hi); - - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); env->xregs[rs] = int128_getlo(oldv); env->xregs[rs + 1] = int128_gethi(oldv); -#endif } void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, uint64_t new_hi, uint64_t new_lo) { - uintptr_t ra = GETPC(); -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else Int128 oldv, cmpv, newv; + uintptr_t ra = GETPC(); + int mem_idx; + TCGMemOpIdx oi; + + assert(HAVE_CMPXCHG128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]); newv = int128_make128(new_lo, new_hi); - - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); env->xregs[rs + 1] = int128_getlo(oldv); env->xregs[rs] = int128_gethi(oldv); -#endif } /* diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h index 023952a9a4..9e79182ab4 100644 --- a/target/arm/helper-sve.h +++ b/target/arm/helper-sve.h @@ -1128,20 +1128,35 @@ DEF_HELPER_FLAGS_4(sve_ld2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld4bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) @@ -1150,13 +1165,21 @@ DEF_HELPER_FLAGS_4(sve_ld1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1sdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1sds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) @@ -1166,17 +1189,28 @@ DEF_HELPER_FLAGS_4(sve_ldff1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1sdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1sds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) @@ -1186,218 +1220,357 @@ DEF_HELPER_FLAGS_4(sve_ldnf1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1sdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1sds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st4bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1bh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1bs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1bd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1hs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1hd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hs_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hs_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1sd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1sd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbsu_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhsu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhsu_be_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_le_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldssu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldss_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbss_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhss_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhss_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbsu_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhsu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhsu_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhsu_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldssu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldss_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbss_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhss_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhss_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbdu_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhdu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldsdu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldddu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbds_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhds_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhds_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldsds_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldsds_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbdu_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhdu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldsdu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_lddd_le_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldddu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_lddd_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbds_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhds_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhds_le_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldsds_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhds_be_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbdu_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhdu_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldsdu_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldddu_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_lddd_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbds_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhds_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhds_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldsds_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldsds_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffhsu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffssu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffss_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffss_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffbss_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffhss_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhss_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhss_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffbsu_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffhsu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffssu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffss_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffss_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffbss_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffhss_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhss_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhss_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffbdu_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffhdu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffsdu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffddu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffbds_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffhds_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhds_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffsds_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffsds_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffbdu_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffhdu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffsdu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffdd_le_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffddu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffdd_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffbds_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffhds_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhds_le_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffsds_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhds_be_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffbdu_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffhdu_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffsdu_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffddu_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffdd_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffbds_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffhds_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhds_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffsds_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_stbs_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_sths_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_sths_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sths_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stss_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_stss_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_stbs_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_sths_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_sths_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sths_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stss_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_stss_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_stbd_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_sthd_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_sthd_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stsd_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_stsd_le_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stdd_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_stsd_be_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_stbd_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_sthd_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_sthd_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_be_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_le_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stsd_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_stsd_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stdd_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_stdd_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_stbd_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_sthd_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_sthd_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_be_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stsd_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_stdd_le_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stdd_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_stdd_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) diff --git a/target/arm/helper.c b/target/arm/helper.c index 64b1564594..0ea95b0815 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -56,6 +56,8 @@ static void v8m_security_lookup(CPUARMState *env, uint32_t address, V8M_SAttributes *sattrs); #endif +static void switch_mode(CPUARMState *env, int mode); + static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg) { int nregs; @@ -552,12 +554,61 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri, raw_write(env, ri, value); } +/* IS variants of TLB operations must affect all cores */ +static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = ENV_GET_CPU(env); + + tlb_flush_all_cpus_synced(cs); +} + +static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = ENV_GET_CPU(env); + + tlb_flush_all_cpus_synced(cs); +} + +static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = ENV_GET_CPU(env); + + tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK); +} + +static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = ENV_GET_CPU(env); + + tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK); +} + +/* + * Non-IS variants of TLB operations are upgraded to + * IS versions if we are at NS EL1 and HCR_EL2.FB is set to + * force broadcast of these operations. + */ +static bool tlb_force_broadcast(CPUARMState *env) +{ + return (env->cp15.hcr_el2 & HCR_FB) && + arm_current_el(env) == 1 && arm_is_secure_below_el3(env); +} + static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { /* Invalidate all (TLBIALL) */ ARMCPU *cpu = arm_env_get_cpu(env); + if (tlb_force_broadcast(env)) { + tlbiall_is_write(env, NULL, value); + return; + } + tlb_flush(CPU(cpu)); } @@ -567,6 +618,11 @@ static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri, /* Invalidate single TLB entry by MVA and ASID (TLBIMVA) */ ARMCPU *cpu = arm_env_get_cpu(env); + if (tlb_force_broadcast(env)) { + tlbimva_is_write(env, NULL, value); + return; + } + tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK); } @@ -576,6 +632,11 @@ static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri, /* Invalidate by ASID (TLBIASID) */ ARMCPU *cpu = arm_env_get_cpu(env); + if (tlb_force_broadcast(env)) { + tlbiasid_is_write(env, NULL, value); + return; + } + tlb_flush(CPU(cpu)); } @@ -585,40 +646,12 @@ static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri, /* Invalidate single entry by MVA, all ASIDs (TLBIMVAA) */ ARMCPU *cpu = arm_env_get_cpu(env); - tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK); -} - -/* IS variants of TLB operations must affect all cores */ -static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = ENV_GET_CPU(env); - - tlb_flush_all_cpus_synced(cs); -} - -static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = ENV_GET_CPU(env); - - tlb_flush_all_cpus_synced(cs); -} - -static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = ENV_GET_CPU(env); - - tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK); -} - -static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = ENV_GET_CPU(env); + if (tlb_force_broadcast(env)) { + tlbimvaa_is_write(env, NULL, value); + return; + } - tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK); + tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK); } static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -1179,6 +1212,7 @@ static void pmcntenclr_write(CPUARMState *env, const ARMCPRegInfo *ri, static void pmovsr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { + value &= pmu_counter_mask(env); env->cp15.c9_pmovsr &= ~value; } @@ -1295,12 +1329,26 @@ static uint64_t isr_read(CPUARMState *env, const ARMCPRegInfo *ri) CPUState *cs = ENV_GET_CPU(env); uint64_t ret = 0; - if (cs->interrupt_request & CPU_INTERRUPT_HARD) { - ret |= CPSR_I; + if (arm_hcr_el2_imo(env)) { + if (cs->interrupt_request & CPU_INTERRUPT_VIRQ) { + ret |= CPSR_I; + } + } else { + if (cs->interrupt_request & CPU_INTERRUPT_HARD) { + ret |= CPSR_I; + } } - if (cs->interrupt_request & CPU_INTERRUPT_FIQ) { - ret |= CPSR_F; + + if (arm_hcr_el2_fmo(env)) { + if (cs->interrupt_request & CPU_INTERRUPT_VFIQ) { + ret |= CPSR_F; + } + } else { + if (cs->interrupt_request & CPU_INTERRUPT_FIQ) { + ret |= CPSR_F; + } } + /* External aborts are not possible in QEMU so A bit is always clear */ return ret; } @@ -1423,12 +1471,14 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .writefn = pmintenset_write, .raw_writefn = raw_write, .resetvalue = 0x0 }, { .name = "PMINTENCLR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2, - .access = PL1_RW, .accessfn = access_tpm, .type = ARM_CP_ALIAS, + .access = PL1_RW, .accessfn = access_tpm, + .type = ARM_CP_ALIAS | ARM_CP_IO, .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), .writefn = pmintenclr_write, }, { .name = "PMINTENCLR_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 2, - .access = PL1_RW, .accessfn = access_tpm, .type = ARM_CP_ALIAS, + .access = PL1_RW, .accessfn = access_tpm, + .type = ARM_CP_ALIAS | ARM_CP_IO, .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), .writefn = pmintenclr_write }, { .name = "CCSIDR", .state = ARM_CP_STATE_BOTH, @@ -2267,13 +2317,15 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value, * * The Non-secure TTBCR.EAE bit is set to 1 * * The implementation includes EL2, and the value of HCR.VM is 1 * + * (Note that HCR.DC makes HCR.VM behave as if it is 1.) + * * ATS1Hx always uses the 64bit format (not supported yet). */ format64 = arm_s1_regime_using_lpae_format(env, mmu_idx); if (arm_feature(env, ARM_FEATURE_EL2)) { if (mmu_idx == ARMMMUIdx_S12NSE0 || mmu_idx == ARMMMUIdx_S12NSE1) { - format64 |= env->cp15.hcr_el2 & HCR_VM; + format64 |= env->cp15.hcr_el2 & (HCR_VM | HCR_DC); } else { format64 |= arm_current_el(env) == 2; } @@ -2706,12 +2758,10 @@ static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri, static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - /* 64 bit accesses to the TTBRs can change the ASID and so we - * must flush the TLB. - */ - if (cpreg_field_is_64bit(ri)) { + /* If the ASID changes (with a 64-bit write), we must flush the TLB. */ + if (cpreg_field_is_64bit(ri) && + extract64(raw_read(env, ri) ^ value, 48, 16) != 0) { ARMCPU *cpu = arm_env_get_cpu(env); - tlb_flush(CPU(cpu)); } raw_write(env, ri, value); @@ -3080,22 +3130,6 @@ static CPAccessResult aa64_cacheop_access(CPUARMState *env, * Page D4-1736 (DDI0487A.b) */ -static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = ENV_GET_CPU(env); - - if (arm_is_secure_below_el3(env)) { - tlb_flush_by_mmuidx(cs, - ARMMMUIdxBit_S1SE1 | - ARMMMUIdxBit_S1SE0); - } else { - tlb_flush_by_mmuidx(cs, - ARMMMUIdxBit_S12NSE1 | - ARMMMUIdxBit_S12NSE0); - } -} - static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -3113,6 +3147,27 @@ static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri, } } +static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = ENV_GET_CPU(env); + + if (tlb_force_broadcast(env)) { + tlbi_aa64_vmalle1_write(env, NULL, value); + return; + } + + if (arm_is_secure_below_el3(env)) { + tlb_flush_by_mmuidx(cs, + ARMMMUIdxBit_S1SE1 | + ARMMMUIdxBit_S1SE0); + } else { + tlb_flush_by_mmuidx(cs, + ARMMMUIdxBit_S12NSE1 | + ARMMMUIdxBit_S12NSE0); + } +} + static void tlbi_aa64_alle1_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -3202,29 +3257,6 @@ static void tlbi_aa64_alle3is_write(CPUARMState *env, const ARMCPRegInfo *ri, tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_S1E3); } -static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Invalidate by VA, EL1&0 (AArch64 version). - * Currently handles all of VAE1, VAAE1, VAALE1 and VALE1, - * since we don't support flush-for-specific-ASID-only or - * flush-last-level-only. - */ - ARMCPU *cpu = arm_env_get_cpu(env); - CPUState *cs = CPU(cpu); - uint64_t pageaddr = sextract64(value << 12, 0, 56); - - if (arm_is_secure_below_el3(env)) { - tlb_flush_page_by_mmuidx(cs, pageaddr, - ARMMMUIdxBit_S1SE1 | - ARMMMUIdxBit_S1SE0); - } else { - tlb_flush_page_by_mmuidx(cs, pageaddr, - ARMMMUIdxBit_S12NSE1 | - ARMMMUIdxBit_S12NSE0); - } -} - static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -3272,6 +3304,34 @@ static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri, } } +static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate by VA, EL1&0 (AArch64 version). + * Currently handles all of VAE1, VAAE1, VAALE1 and VALE1, + * since we don't support flush-for-specific-ASID-only or + * flush-last-level-only. + */ + ARMCPU *cpu = arm_env_get_cpu(env); + CPUState *cs = CPU(cpu); + uint64_t pageaddr = sextract64(value << 12, 0, 56); + + if (tlb_force_broadcast(env)) { + tlbi_aa64_vae1is_write(env, NULL, value); + return; + } + + if (arm_is_secure_below_el3(env)) { + tlb_flush_page_by_mmuidx(cs, pageaddr, + ARMMMUIdxBit_S1SE1 | + ARMMMUIdxBit_S1SE0); + } else { + tlb_flush_page_by_mmuidx(cs, pageaddr, + ARMMMUIdxBit_S12NSE1 | + ARMMMUIdxBit_S12NSE0); + } +} + static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -3869,6 +3929,7 @@ static const ARMCPRegInfo el3_no_el2_v8_cp_reginfo[] = { static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { ARMCPU *cpu = arm_env_get_cpu(env); + CPUState *cs = ENV_GET_CPU(env); uint64_t valid_mask = HCR_MASK; if (arm_feature(env, ARM_FEATURE_EL3)) { @@ -3887,6 +3948,28 @@ static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) /* Clear RES0 bits. */ value &= valid_mask; + /* + * VI and VF are kept in cs->interrupt_request. Modifying that + * requires that we have the iothread lock, which is done by + * marking the reginfo structs as ARM_CP_IO. + * Note that if a write to HCR pends a VIRQ or VFIQ it is never + * possible for it to be taken immediately, because VIRQ and + * VFIQ are masked unless running at EL0 or EL1, and HCR + * can only be written at EL2. + */ + g_assert(qemu_mutex_iothread_locked()); + if (value & HCR_VI) { + cs->interrupt_request |= CPU_INTERRUPT_VIRQ; + } else { + cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ; + } + if (value & HCR_VF) { + cs->interrupt_request |= CPU_INTERRUPT_VFIQ; + } else { + cs->interrupt_request &= ~CPU_INTERRUPT_VFIQ; + } + value &= ~(HCR_VI | HCR_VF); + /* These bits change the MMU setup: * HCR_VM enables stage 2 translation * HCR_PTW forbids certain page-table setups @@ -3914,16 +3997,32 @@ static void hcr_writelow(CPUARMState *env, const ARMCPRegInfo *ri, hcr_write(env, NULL, value); } +static uint64_t hcr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + /* The VI and VF bits live in cs->interrupt_request */ + uint64_t ret = env->cp15.hcr_el2 & ~(HCR_VI | HCR_VF); + CPUState *cs = ENV_GET_CPU(env); + + if (cs->interrupt_request & CPU_INTERRUPT_VIRQ) { + ret |= HCR_VI; + } + if (cs->interrupt_request & CPU_INTERRUPT_VFIQ) { + ret |= HCR_VF; + } + return ret; +} + static const ARMCPRegInfo el2_cp_reginfo[] = { { .name = "HCR_EL2", .state = ARM_CP_STATE_AA64, + .type = ARM_CP_IO, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2), - .writefn = hcr_write }, + .writefn = hcr_write, .readfn = hcr_read }, { .name = "HCR", .state = ARM_CP_STATE_AA32, - .type = ARM_CP_ALIAS, + .type = ARM_CP_ALIAS | ARM_CP_IO, .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2), - .writefn = hcr_writelow }, + .writefn = hcr_writelow, .readfn = hcr_read }, { .name = "ELR_EL2", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 1, @@ -4160,7 +4259,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { static const ARMCPRegInfo el2_v8_cp_reginfo[] = { { .name = "HCR2", .state = ARM_CP_STATE_AA32, - .type = ARM_CP_ALIAS, + .type = ARM_CP_ALIAS | ARM_CP_IO, .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 4, .access = PL2_RW, .fieldoffset = offsetofhigh32(CPUARMState, cp15.hcr_el2), @@ -4211,7 +4310,7 @@ static const ARMCPRegInfo el3_cp_reginfo[] = { .fieldoffset = offsetof(CPUARMState, cp15.mvbar) }, { .name = "TTBR0_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 0, .opc2 = 0, - .access = PL3_RW, .writefn = vmsa_ttbr_write, .resetvalue = 0, + .access = PL3_RW, .resetvalue = 0, .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el[3]) }, { .name = "TCR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 0, .opc2 = 2, @@ -4400,78 +4499,105 @@ static const ARMCPRegInfo debug_lpae_cp_reginfo[] = { REGINFO_SENTINEL }; -/* Return the exception level to which SVE-disabled exceptions should - * be taken, or 0 if SVE is enabled. +/* Return the exception level to which exceptions should be taken + * via SVEAccessTrap. If an exception should be routed through + * AArch64.AdvSIMDFPAccessTrap, return 0; fp_exception_el should + * take care of raising that exception. + * C.f. the ARM pseudocode function CheckSVEEnabled. */ -static int sve_exception_el(CPUARMState *env) +int sve_exception_el(CPUARMState *env, int el) { #ifndef CONFIG_USER_ONLY - unsigned current_el = arm_current_el(env); + if (el <= 1) { + bool disabled = false; - /* The CPACR.ZEN controls traps to EL1: - * 0, 2 : trap EL0 and EL1 accesses - * 1 : trap only EL0 accesses - * 3 : trap no accesses - */ - switch (extract32(env->cp15.cpacr_el1, 16, 2)) { - default: - if (current_el <= 1) { - /* Trap to PL1, which might be EL1 or EL3 */ - if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) { - return 3; - } - return 1; + /* The CPACR.ZEN controls traps to EL1: + * 0, 2 : trap EL0 and EL1 accesses + * 1 : trap only EL0 accesses + * 3 : trap no accesses + */ + if (!extract32(env->cp15.cpacr_el1, 16, 1)) { + disabled = true; + } else if (!extract32(env->cp15.cpacr_el1, 17, 1)) { + disabled = el == 0; } - break; - case 1: - if (current_el == 0) { - return 1; + if (disabled) { + /* route_to_el2 */ + return (arm_feature(env, ARM_FEATURE_EL2) + && !arm_is_secure(env) + && (env->cp15.hcr_el2 & HCR_TGE) ? 2 : 1); } - break; - case 3: - break; - } - /* Similarly for CPACR.FPEN, after having checked ZEN. */ - switch (extract32(env->cp15.cpacr_el1, 20, 2)) { - default: - if (current_el <= 1) { - if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) { - return 3; - } - return 1; + /* Check CPACR.FPEN. */ + if (!extract32(env->cp15.cpacr_el1, 20, 1)) { + disabled = true; + } else if (!extract32(env->cp15.cpacr_el1, 21, 1)) { + disabled = el == 0; } - break; - case 1: - if (current_el == 0) { - return 1; + if (disabled) { + return 0; } - break; - case 3: - break; } - /* CPTR_EL2. Check both TZ and TFP. */ - if (current_el <= 2 - && (env->cp15.cptr_el[2] & (CPTR_TFP | CPTR_TZ)) - && !arm_is_secure_below_el3(env)) { - return 2; + /* CPTR_EL2. Since TZ and TFP are positive, + * they will be zero when EL2 is not present. + */ + if (el <= 2 && !arm_is_secure_below_el3(env)) { + if (env->cp15.cptr_el[2] & CPTR_TZ) { + return 2; + } + if (env->cp15.cptr_el[2] & CPTR_TFP) { + return 0; + } } - /* CPTR_EL3. Check both EZ and TFP. */ - if (!(env->cp15.cptr_el[3] & CPTR_EZ) - || (env->cp15.cptr_el[3] & CPTR_TFP)) { + /* CPTR_EL3. Since EZ is negative we must check for EL3. */ + if (arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.cptr_el[3] & CPTR_EZ)) { return 3; } #endif return 0; } +/* + * Given that SVE is enabled, return the vector length for EL. + */ +uint32_t sve_zcr_len_for_el(CPUARMState *env, int el) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + uint32_t zcr_len = cpu->sve_max_vq - 1; + + if (el <= 1) { + zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]); + } + if (el < 2 && arm_feature(env, ARM_FEATURE_EL2)) { + zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]); + } + if (el < 3 && arm_feature(env, ARM_FEATURE_EL3)) { + zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]); + } + return zcr_len; +} + static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { + int cur_el = arm_current_el(env); + int old_len = sve_zcr_len_for_el(env, cur_el); + int new_len; + /* Bits other than [3:0] are RAZ/WI. */ raw_write(env, ri, value & 0xf); + + /* + * Because we arrived here, we know both FP and SVE are enabled; + * otherwise we would have trapped access to the ZCR_ELn register. + */ + new_len = sve_zcr_len_for_el(env, cur_el); + if (new_len < old_len) { + aarch64_sve_narrow_vq(env, new_len + 1); + } } static const ARMCPRegInfo zcr_el1_reginfo = { @@ -4843,7 +4969,7 @@ static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri) static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri) { ARMCPU *cpu = arm_env_get_cpu(env); - uint64_t pfr0 = cpu->id_aa64pfr0; + uint64_t pfr0 = cpu->isar.id_aa64pfr0; if (env->gicv3state) { pfr0 |= 1 << 24; @@ -4910,27 +5036,27 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "ID_ISAR0", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar0 }, + .resetvalue = cpu->isar.id_isar0 }, { .name = "ID_ISAR1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar1 }, + .resetvalue = cpu->isar.id_isar1 }, { .name = "ID_ISAR2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar2 }, + .resetvalue = cpu->isar.id_isar2 }, { .name = "ID_ISAR3", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar3 }, + .resetvalue = cpu->isar.id_isar3 }, { .name = "ID_ISAR4", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar4 }, + .resetvalue = cpu->isar.id_isar4 }, { .name = "ID_ISAR5", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar5 }, + .resetvalue = cpu->isar.id_isar5 }, { .name = "ID_MMFR4", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, @@ -4938,7 +5064,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "ID_ISAR6", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar6 }, + .resetvalue = cpu->isar.id_isar6 }, REGINFO_SENTINEL }; define_arm_cp_regs(cpu, v6_idregs); @@ -5009,7 +5135,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "ID_AA64PFR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_aa64pfr1}, + .resetvalue = cpu->isar.id_aa64pfr1}, { .name = "ID_AA64PFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, @@ -5018,9 +5144,10 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "ID_AA64PFR4_EL1_RESERVED", .state = ARM_CP_STATE_AA64, + { .name = "ID_AA64ZFR0_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, + /* At present, only SVEver == 0 is defined anyway. */ .resetvalue = 0 }, { .name = "ID_AA64PFR5_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 5, @@ -5069,11 +5196,11 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "ID_AA64ISAR0_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_aa64isar0 }, + .resetvalue = cpu->isar.id_aa64isar0 }, { .name = "ID_AA64ISAR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_aa64isar1 }, + .resetvalue = cpu->isar.id_aa64isar1 }, { .name = "ID_AA64ISAR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, @@ -5133,15 +5260,15 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "MVFR0_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->mvfr0 }, + .resetvalue = cpu->isar.mvfr0 }, { .name = "MVFR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->mvfr1 }, + .resetvalue = cpu->isar.mvfr1 }, { .name = "MVFR2_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->mvfr2 }, + .resetvalue = cpu->isar.mvfr2 }, { .name = "MVFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, @@ -5587,7 +5714,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_one_arm_cp_reg(cpu, &sctlr); } - if (arm_feature(env, ARM_FEATURE_SVE)) { + if (cpu_isar_feature(aa64_sve, cpu)) { define_one_arm_cp_reg(cpu, &zcr_el1_reginfo); if (arm_feature(env, ARM_FEATURE_EL2)) { define_one_arm_cp_reg(cpu, &zcr_el2_reginfo); @@ -6177,7 +6304,17 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask, mask |= CPSR_IL; val |= CPSR_IL; } + qemu_log_mask(LOG_GUEST_ERROR, + "Illegal AArch32 mode switch attempt from %s to %s\n", + aarch32_mode_name(env->uncached_cpsr), + aarch32_mode_name(val)); } else { + qemu_log_mask(CPU_LOG_INT, "%s %s to %s PC 0x%" PRIx32 "\n", + write_type == CPSRWriteExceptionReturn ? + "Exception return from AArch32" : + "AArch32 mode switch from", + aarch32_mode_name(env->uncached_cpsr), + aarch32_mode_name(val), env->regs[15]); switch_mode(env, val & CPSR_M); } } @@ -6275,7 +6412,7 @@ uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op) return 0; } -void switch_mode(CPUARMState *env, int mode) +static void switch_mode(CPUARMState *env, int mode) { ARMCPU *cpu = arm_env_get_cpu(env); @@ -6297,7 +6434,7 @@ void aarch64_sync_64_to_32(CPUARMState *env) #else -void switch_mode(CPUARMState *env, int mode) +static void switch_mode(CPUARMState *env, int mode) { int old_mode; int i; @@ -6441,7 +6578,7 @@ static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value, target_ulong page_size; hwaddr physaddr; int prot; - ARMMMUFaultInfo fi; + ARMMMUFaultInfo fi = {}; bool secure = mmu_idx & ARM_MMU_IDX_M_S; int exc; bool exc_secure; @@ -6503,7 +6640,7 @@ static bool v7m_stack_read(ARMCPU *cpu, uint32_t *dest, uint32_t addr, target_ulong page_size; hwaddr physaddr; int prot; - ARMMMUFaultInfo fi; + ARMMMUFaultInfo fi = {}; bool secure = mmu_idx & ARM_MMU_IDX_M_S; int exc; bool exc_secure; @@ -6554,18 +6691,6 @@ pend_fault: return false; } -/* Return true if we're using the process stack pointer (not the MSP) */ -static bool v7m_using_psp(CPUARMState *env) -{ - /* Handler mode always uses the main stack; for thread mode - * the CONTROL.SPSEL bit determines the answer. - * Note that in v7M it is not possible to be in Handler mode with - * CONTROL.SPSEL non-zero, but in v8M it is, so we must check both. - */ - return !arm_v7m_is_handler_mode(env) && - env->v7m.control[env->v7m.secure] & R_V7M_CONTROL_SPSEL_MASK; -} - /* Write to v7M CONTROL.SPSEL bit for the specified security bank. * This may change the current stack pointer between Main and Process * stack pointers if it is done for the CONTROL register for the current @@ -6722,6 +6847,10 @@ void HELPER(v7m_blxns)(CPUARMState *env, uint32_t dest) "BLXNS with misaligned SP is UNPREDICTABLE\n"); } + if (sp < v7m_sp_limit(env)) { + raise_exception(env, EXCP_STKOF, 0, 1); + } + saved_psr = env->v7m.exception; if (env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK) { saved_psr |= XPSR_SFPA; @@ -6851,6 +6980,8 @@ static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain, uint32_t frameptr; ARMMMUIdx mmu_idx; bool stacked_ok; + uint32_t limit; + bool want_psp; if (dotailchain) { bool mode = lr & R_V7M_EXCRET_MODE_MASK; @@ -6860,12 +6991,34 @@ static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain, mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, M_REG_S, priv); frame_sp_p = get_v7m_sp_ptr(env, M_REG_S, mode, lr & R_V7M_EXCRET_SPSEL_MASK); + want_psp = mode && (lr & R_V7M_EXCRET_SPSEL_MASK); + if (want_psp) { + limit = env->v7m.psplim[M_REG_S]; + } else { + limit = env->v7m.msplim[M_REG_S]; + } } else { mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false)); frame_sp_p = &env->regs[13]; + limit = v7m_sp_limit(env); } frameptr = *frame_sp_p - 0x28; + if (frameptr < limit) { + /* + * Stack limit failure: set SP to the limit value, and generate + * STKOF UsageFault. Stack pushes below the limit must not be + * performed. It is IMPDEF whether pushes above the limit are + * performed; we choose not to. + */ + qemu_log_mask(CPU_LOG_INT, + "...STKOF during callee-saves register stacking\n"); + env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK; + armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, + env->v7m.secure); + *frame_sp_p = limit; + return true; + } /* Write as much of the stack frame as we can. A write failure may * cause us to pend a derived exception. @@ -6889,10 +7042,7 @@ static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain, v7m_stack_write(cpu, frameptr + 0x24, env->regs[11], mmu_idx, ignore_faults); - /* Update SP regardless of whether any of the stack accesses failed. - * When we implement v8M stack limit checking then this attempt to - * update SP might also fail and result in a derived exception. - */ + /* Update SP regardless of whether any of the stack accesses failed. */ *frame_sp_p = frameptr; return !stacked_ok; @@ -6938,7 +7088,7 @@ static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain, * not already saved. */ if (lr & R_V7M_EXCRET_DCRS_MASK && - !(dotailchain && (lr & R_V7M_EXCRET_ES_MASK))) { + !(dotailchain && !(lr & R_V7M_EXCRET_ES_MASK))) { push_failed = v7m_push_callee_stack(cpu, lr, dotailchain, ignore_stackfaults); } @@ -7040,6 +7190,26 @@ static bool v7m_push_stack(ARMCPU *cpu) frameptr -= 0x20; + if (arm_feature(env, ARM_FEATURE_V8)) { + uint32_t limit = v7m_sp_limit(env); + + if (frameptr < limit) { + /* + * Stack limit failure: set SP to the limit value, and generate + * STKOF UsageFault. Stack pushes below the limit must not be + * performed. It is IMPDEF whether pushes above the limit are + * performed; we choose not to. + */ + qemu_log_mask(CPU_LOG_INT, + "...STKOF during stacking\n"); + env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK; + armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, + env->v7m.secure); + env->regs[13] = limit; + return true; + } + } + /* Write as much of the stack frame as we can. If we fail a stack * write this will result in a derived exception being pended * (which may be taken in preference to the one we started with @@ -7055,10 +7225,7 @@ static bool v7m_push_stack(ARMCPU *cpu) v7m_stack_write(cpu, frameptr + 24, env->regs[15], mmu_idx, false) && v7m_stack_write(cpu, frameptr + 28, xpsr, mmu_idx, false); - /* Update SP regardless of whether any of the stack accesses failed. - * When we implement v8M stack limit checking then this attempt to - * update SP might also fail and result in a derived exception. - */ + /* Update SP regardless of whether any of the stack accesses failed. */ env->regs[13] = frameptr; return !stacked_ok; @@ -7304,7 +7471,6 @@ static void do_v7m_exception_exit(ARMCPU *cpu) pop_ok = pop_ok && v7m_stack_read(cpu, &env->regs[4], frameptr + 0x8, mmu_idx) && - v7m_stack_read(cpu, &env->regs[4], frameptr + 0x8, mmu_idx) && v7m_stack_read(cpu, &env->regs[5], frameptr + 0xc, mmu_idx) && v7m_stack_read(cpu, &env->regs[6], frameptr + 0x10, mmu_idx) && v7m_stack_read(cpu, &env->regs[7], frameptr + 0x14, mmu_idx) && @@ -7512,6 +7678,7 @@ static void arm_log_exception(int idx) [EXCP_SEMIHOST] = "Semihosting call", [EXCP_NOCP] = "v7M NOCP UsageFault", [EXCP_INVSTATE] = "v7M INVSTATE UsageFault", + [EXCP_STKOF] = "v8M STKOF UsageFault", }; if (idx >= 0 && idx < ARRAY_SIZE(excnames)) { @@ -7667,6 +7834,10 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs) armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure); env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVSTATE_MASK; break; + case EXCP_STKOF: + armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure); + env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK; + break; case EXCP_SWI: /* The PC already points to the next instruction. */ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SVC, env->v7m.secure); @@ -8129,6 +8300,19 @@ static void arm_cpu_do_interrupt_aarch32_hyp(CPUState *cs) } if (cs->exception_index != EXCP_IRQ && cs->exception_index != EXCP_FIQ) { + if (!arm_feature(env, ARM_FEATURE_V8)) { + /* + * QEMU syndrome values are v8-style. v7 has the IL bit + * UNK/SBZP for "field not valid" cases, where v8 uses RES1. + * If this is a v7 CPU, squash the IL bit in those cases. + */ + if (cs->exception_index == EXCP_PREFETCH_ABORT || + (cs->exception_index == EXCP_DATA_ABORT && + !(env->exception.syndrome & ARM_EL_ISV)) || + syn_get_ec(env->exception.syndrome) == EC_UNCATEGORIZED) { + env->exception.syndrome &= ~ARM_EL_IL; + } + } env->cp15.esr_el[2] = env->exception.syndrome; } @@ -8163,7 +8347,7 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs) uint32_t moe; /* If this is a debug exception we must update the DBGDSCR.MOE bits */ - switch (env->exception.syndrome >> ARM_EL_EC_SHIFT) { + switch (syn_get_ec(env->exception.syndrome)) { case EC_BREAKPOINT: case EC_BREAKPOINT_SAME_EL: moe = 1; @@ -8310,8 +8494,15 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) unsigned int new_el = env->exception.target_el; target_ulong addr = env->cp15.vbar_el[new_el]; unsigned int new_mode = aarch64_pstate_mode(new_el, true); + unsigned int cur_el = arm_current_el(env); + + /* + * Note that new_el can never be 0. If cur_el is 0, then + * el0_a64 is is_a64(), else el0_a64 is ignored. + */ + aarch64_sve_change_el(env, cur_el, new_el, is_a64(env)); - if (arm_current_el(env) < new_el) { + if (cur_el < new_el) { /* Entry vector offset depends on whether the implemented EL * immediately lower than the target level is using AArch32 or AArch64 */ @@ -8353,6 +8544,15 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) case EXCP_HVC: case EXCP_HYP_TRAP: case EXCP_SMC: + if (syn_get_ec(env->exception.syndrome) == EC_ADVSIMDFPACCESSTRAP) { + /* + * QEMU internal FP/SIMD syndromes from AArch32 include the + * TA and coproc fields which are only exposed if the exception + * is taken to AArch32 Hyp mode. Mask them out to get a valid + * AArch64 format syndrome. + */ + env->exception.syndrome &= ~MAKE_64BIT_MASK(0, 20); + } env->cp15.esr_el[new_el] = env->exception.syndrome; break; case EXCP_IRQ: @@ -8496,7 +8696,7 @@ void arm_cpu_do_interrupt(CPUState *cs) if (qemu_loglevel_mask(CPU_LOG_INT) && !excp_is_internal(cs->exception_index)) { qemu_log_mask(CPU_LOG_INT, "...with ESR 0x%x/0x%" PRIx32 "\n", - env->exception.syndrome >> ARM_EL_EC_SHIFT, + syn_get_ec(env->exception.syndrome), env->exception.syndrome); } @@ -8593,7 +8793,8 @@ static inline bool regime_translation_disabled(CPUARMState *env, } if (mmu_idx == ARMMMUIdx_S2NS) { - return (env->cp15.hcr_el2 & HCR_VM) == 0; + /* HCR.DC means HCR.VM behaves as 1 */ + return (env->cp15.hcr_el2 & (HCR_DC | HCR_VM)) == 0; } if (env->cp15.hcr_el2 & HCR_TGE) { @@ -8603,6 +8804,12 @@ static inline bool regime_translation_disabled(CPUARMState *env, } } + if ((env->cp15.hcr_el2 & HCR_DC) && + (mmu_idx == ARMMMUIdx_S1NSE0 || mmu_idx == ARMMMUIdx_S1NSE1)) { + /* HCR.DC means SCTLR_EL1.M behaves as 0 */ + return true; + } + return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0; } @@ -8954,9 +9161,20 @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx, hwaddr s2pa; int s2prot; int ret; + ARMCacheAttrs cacheattrs = {}; + ARMCacheAttrs *pcacheattrs = NULL; + + if (env->cp15.hcr_el2 & HCR_PTW) { + /* + * PTW means we must fault if this S1 walk touches S2 Device + * memory; otherwise we don't care about the attributes and can + * save the S2 translation the effort of computing them. + */ + pcacheattrs = &cacheattrs; + } ret = get_phys_addr_lpae(env, addr, 0, ARMMMUIdx_S2NS, &s2pa, - &txattrs, &s2prot, &s2size, fi, NULL); + &txattrs, &s2prot, &s2size, fi, pcacheattrs); if (ret) { assert(fi->type != ARMFault_None); fi->s2addr = addr; @@ -8964,6 +9182,14 @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx, fi->s1ptw = true; return ~0; } + if (pcacheattrs && (pcacheattrs->attrs & 0xf0) == 0) { + /* Access was to Device memory: generate Permission fault */ + fi->type = ARMFault_Permission; + fi->s2addr = addr; + fi->stage2 = true; + fi->s1ptw = true; + return ~0; + } addr = s2pa; } return addr; @@ -10583,6 +10809,16 @@ static bool get_phys_addr(CPUARMState *env, target_ulong address, /* Combine the S1 and S2 cache attributes, if needed */ if (!ret && cacheattrs != NULL) { + if (env->cp15.hcr_el2 & HCR_DC) { + /* + * HCR.DC forces the first stage attributes to + * Normal Non-Shareable, + * Inner Write-Back Read-Allocate Write-Allocate, + * Outer Write-Back Read-Allocate Write-Allocate. + */ + cacheattrs->attrs = 0xff; + cacheattrs->shareability = 0; + } *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2); } @@ -10929,11 +11165,23 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val) * currently in handler mode or not, using the NS CONTROL.SPSEL. */ bool spsel = env->v7m.control[M_REG_NS] & R_V7M_CONTROL_SPSEL_MASK; + bool is_psp = !arm_v7m_is_handler_mode(env) && spsel; + uint32_t limit; if (!env->v7m.secure) { return; } - if (!arm_v7m_is_handler_mode(env) && spsel) { + + limit = is_psp ? env->v7m.psplim[false] : env->v7m.msplim[false]; + + if (val < limit) { + CPUState *cs = CPU(arm_env_get_cpu(env)); + + cpu_restore_state(cs, GETPC(), true); + raise_exception(env, EXCP_STKOF, 0, 1); + } + + if (is_psp) { env->v7m.other_ss_psp = val; } else { env->v7m.other_ss_msp = val; @@ -11528,7 +11776,7 @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val) uint32_t changed; /* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */ - if (!arm_feature(env, ARM_FEATURE_V8_FP16)) { + if (!cpu_isar_feature(aa64_fp16, arm_env_get_cpu(env))) { val &= ~FPCR_FZ16; } @@ -12516,11 +12764,10 @@ uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes) /* Return the exception level to which FP-disabled exceptions should * be taken, or 0 if FP is enabled. */ -static inline int fp_exception_el(CPUARMState *env) +int fp_exception_el(CPUARMState *env, int cur_el) { #ifndef CONFIG_USER_ONLY int fpen; - int cur_el = arm_current_el(env); /* CPACR and the CPTR registers don't exist before v6, so FP is * always accessible @@ -12583,18 +12830,21 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, target_ulong *cs_base, uint32_t *pflags) { ARMMMUIdx mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false)); - int fp_el = fp_exception_el(env); + int current_el = arm_current_el(env); + int fp_el = fp_exception_el(env, current_el); uint32_t flags; if (is_a64(env)) { + ARMCPU *cpu = arm_env_get_cpu(env); + *pc = env->pc; flags = ARM_TBFLAG_AARCH64_STATE_MASK; /* Get control bits for tagged addresses */ flags |= (arm_regime_tbi0(env, mmu_idx) << ARM_TBFLAG_TBI0_SHIFT); flags |= (arm_regime_tbi1(env, mmu_idx) << ARM_TBFLAG_TBI1_SHIFT); - if (arm_feature(env, ARM_FEATURE_SVE)) { - int sve_el = sve_exception_el(env); + if (cpu_isar_feature(aa64_sve, cpu)) { + int sve_el = sve_exception_el(env, current_el); uint32_t zcr_len; /* If SVE is disabled, but FP is enabled, @@ -12603,19 +12853,7 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, if (sve_el != 0 && fp_el == 0) { zcr_len = 0; } else { - int current_el = arm_current_el(env); - ARMCPU *cpu = arm_env_get_cpu(env); - - zcr_len = cpu->sve_max_vq - 1; - if (current_el <= 1) { - zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]); - } - if (current_el < 2 && arm_feature(env, ARM_FEATURE_EL2)) { - zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]); - } - if (current_el < 3 && arm_feature(env, ARM_FEATURE_EL3)) { - zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]); - } + zcr_len = sve_zcr_len_for_el(env, current_el); } flags |= sve_el << ARM_TBFLAG_SVEEXC_EL_SHIFT; flags |= zcr_len << ARM_TBFLAG_ZCR_LEN_SHIFT; @@ -12668,6 +12906,103 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, flags |= ARM_TBFLAG_HANDLER_MASK; } + /* v8M always applies stack limit checks unless CCR.STKOFHFNMIGN is + * suppressing them because the requested execution priority is less than 0. + */ + if (arm_feature(env, ARM_FEATURE_V8) && + arm_feature(env, ARM_FEATURE_M) && + !((mmu_idx & ARM_MMU_IDX_M_NEGPRI) && + (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_STKOFHFNMIGN_MASK))) { + flags |= ARM_TBFLAG_STACKCHECK_MASK; + } + *pflags = flags; *cs_base = 0; } + +#ifdef TARGET_AARCH64 +/* + * The manual says that when SVE is enabled and VQ is widened the + * implementation is allowed to zero the previously inaccessible + * portion of the registers. The corollary to that is that when + * SVE is enabled and VQ is narrowed we are also allowed to zero + * the now inaccessible portion of the registers. + * + * The intent of this is that no predicate bit beyond VQ is ever set. + * Which means that some operations on predicate registers themselves + * may operate on full uint64_t or even unrolled across the maximum + * uint64_t[4]. Performing 4 bits of host arithmetic unconditionally + * may well be cheaper than conditionals to restrict the operation + * to the relevant portion of a uint16_t[16]. + */ +void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) +{ + int i, j; + uint64_t pmask; + + assert(vq >= 1 && vq <= ARM_MAX_VQ); + assert(vq <= arm_env_get_cpu(env)->sve_max_vq); + + /* Zap the high bits of the zregs. */ + for (i = 0; i < 32; i++) { + memset(&env->vfp.zregs[i].d[2 * vq], 0, 16 * (ARM_MAX_VQ - vq)); + } + + /* Zap the high bits of the pregs and ffr. */ + pmask = 0; + if (vq & 3) { + pmask = ~(-1ULL << (16 * (vq & 3))); + } + for (j = vq / 4; j < ARM_MAX_VQ / 4; j++) { + for (i = 0; i < 17; ++i) { + env->vfp.pregs[i].p[j] &= pmask; + } + pmask = 0; + } +} + +/* + * Notice a change in SVE vector size when changing EL. + */ +void aarch64_sve_change_el(CPUARMState *env, int old_el, + int new_el, bool el0_a64) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + int old_len, new_len; + bool old_a64, new_a64; + + /* Nothing to do if no SVE. */ + if (!cpu_isar_feature(aa64_sve, cpu)) { + return; + } + + /* Nothing to do if FP is disabled in either EL. */ + if (fp_exception_el(env, old_el) || fp_exception_el(env, new_el)) { + return; + } + + /* + * DDI0584A.d sec 3.2: "If SVE instructions are disabled or trapped + * at ELx, or not available because the EL is in AArch32 state, then + * for all purposes other than a direct read, the ZCR_ELx.LEN field + * has an effective value of 0". + * + * Consider EL2 (aa64, vq=4) -> EL0 (aa32) -> EL1 (aa64, vq=0). + * If we ignore aa32 state, we would fail to see the vq4->vq0 transition + * from EL2->EL1. Thus we go ahead and narrow when entering aa32 so that + * we already have the correct register contents when encountering the + * vq0->vq0 transition between EL0->EL1. + */ + old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64; + old_len = (old_a64 && !sve_exception_el(env, old_el) + ? sve_zcr_len_for_el(env, old_el) : 0); + new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64; + new_len = (new_a64 && !sve_exception_el(env, new_el) + ? sve_zcr_len_for_el(env, new_el) : 0); + + /* When changing vector length, clear inaccessible state. */ + if (new_len < old_len) { + aarch64_sve_narrow_vq(env, new_len + 1); + } +} +#endif diff --git a/target/arm/helper.h b/target/arm/helper.h index 59e8c3bd1b..8c9590091b 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -69,6 +69,8 @@ DEF_HELPER_2(v7m_blxns, void, env, i32) DEF_HELPER_3(v7m_tt, i32, env, i32, i32) +DEF_HELPER_2(v8m_stackcheck, void, env, i32) + DEF_HELPER_4(access_check_cp_reg, void, env, ptr, i32, i32) DEF_HELPER_3(set_cp_reg, void, env, ptr, i32) DEF_HELPER_2(get_cp_reg, i32, env, ptr) diff --git a/target/arm/internals.h b/target/arm/internals.h index dc9357766c..6c2bb2deeb 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -94,6 +94,15 @@ FIELD(V7M_EXCRET, RES1, 7, 25) /* including the must-be-1 prefix */ #define M_FAKE_FSR_NSC_EXEC 0xf /* NS executing in S&NSC memory */ #define M_FAKE_FSR_SFAULT 0xe /* SecureFault INVTRAN, INVEP or AUVIOL */ +/** + * raise_exception: Raise the specified exception. + * Raise a guest exception with the specified value, syndrome register + * and target exception level. This should be called from helper functions, + * and never returns because we will longjump back up to the CPU main loop. + */ +void QEMU_NORETURN raise_exception(CPUARMState *env, uint32_t excp, + uint32_t syndrome, uint32_t target_el); + /* * For AArch64, map a given EL to an index in the banked_spsr array. * Note that this mapping and the AArch32 mapping defined in bank_number() @@ -136,7 +145,6 @@ static inline int bank_number(int mode) g_assert_not_reached(); } -void switch_mode(CPUARMState *, int); void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu); void arm_translate_init(void); @@ -270,14 +278,19 @@ enum arm_exception_class { #define ARM_EL_IL (1 << ARM_EL_IL_SHIFT) #define ARM_EL_ISV (1 << ARM_EL_ISV_SHIFT) +static inline uint32_t syn_get_ec(uint32_t syn) +{ + return syn >> ARM_EL_EC_SHIFT; +} + /* Utility functions for constructing various kinds of syndrome value. * Note that in general we follow the AArch64 syndrome values; in a * few cases the value in HSR for exceptions taken to AArch32 Hyp - * mode differs slightly, so if we ever implemented Hyp mode then the - * syndrome value would need some massaging on exception entry. - * (One example of this is that AArch64 defaults to IL bit set for - * exceptions which don't specifically indicate information about the - * trapping instruction, whereas AArch32 defaults to IL bit clear.) + * mode differs slightly, and we fix this up when populating HSR in + * arm_cpu_do_interrupt_aarch32_hyp(). + * The exception is FP/SIMD access traps -- these report extra information + * when taking an exception to AArch32. For those we include the extra coproc + * and TA fields, and mask them out when taking the exception to AArch64. */ static inline uint32_t syn_uncategorized(void) { @@ -377,9 +390,18 @@ static inline uint32_t syn_cp15_rrt_trap(int cv, int cond, int opc1, int crm, static inline uint32_t syn_fp_access_trap(int cv, int cond, bool is_16bit) { + /* AArch32 FP trap or any AArch64 FP/SIMD trap: TA == 0 coproc == 0xa */ return (EC_ADVSIMDFPACCESSTRAP << ARM_EL_EC_SHIFT) | (is_16bit ? 0 : ARM_EL_IL) - | (cv << 24) | (cond << 20); + | (cv << 24) | (cond << 20) | 0xa; +} + +static inline uint32_t syn_simd_access_trap(int cv, int cond, bool is_16bit) +{ + /* AArch32 SIMD trap: TA == 1 coproc == 0 */ + return (EC_ADVSIMDFPACCESSTRAP << ARM_EL_EC_SHIFT) + | (is_16bit ? 0 : ARM_EL_IL) + | (cv << 24) | (cond << 20) | (1 << 5); } static inline uint32_t syn_sve_access_trap(void) @@ -796,4 +818,57 @@ static inline uint32_t arm_debug_exception_fsr(CPUARMState *env) } } +/* Note make_memop_idx reserves 4 bits for mmu_idx, and MO_BSWAP is bit 3. + * Thus a TCGMemOpIdx, without any MO_ALIGN bits, fits in 8 bits. + */ +#define MEMOPIDX_SHIFT 8 + +/** + * v7m_using_psp: Return true if using process stack pointer + * Return true if the CPU is currently using the process stack + * pointer, or false if it is using the main stack pointer. + */ +static inline bool v7m_using_psp(CPUARMState *env) +{ + /* Handler mode always uses the main stack; for thread mode + * the CONTROL.SPSEL bit determines the answer. + * Note that in v7M it is not possible to be in Handler mode with + * CONTROL.SPSEL non-zero, but in v8M it is, so we must check both. + */ + return !arm_v7m_is_handler_mode(env) && + env->v7m.control[env->v7m.secure] & R_V7M_CONTROL_SPSEL_MASK; +} + +/** + * v7m_sp_limit: Return SP limit for current CPU state + * Return the SP limit value for the current CPU security state + * and stack pointer. + */ +static inline uint32_t v7m_sp_limit(CPUARMState *env) +{ + if (v7m_using_psp(env)) { + return env->v7m.psplim[env->v7m.secure]; + } else { + return env->v7m.msplim[env->v7m.secure]; + } +} + +/** + * aarch32_mode_name(): Return name of the AArch32 CPU mode + * @psr: Program Status Register indicating CPU mode + * + * Returns, for debug logging purposes, a printable representation + * of the AArch32 CPU mode ("svc", "usr", etc) as indicated by + * the low bits of the specified PSR. + */ +static inline const char *aarch32_mode_name(uint32_t psr) +{ + static const char cpu_mode_names[16][4] = { + "usr", "fiq", "irq", "svc", "???", "???", "mon", "abt", + "???", "???", "hyp", "und", "???", "???", "???", "sys" + }; + + return cpu_mode_names[psr & 0xf]; +} + #endif diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 65f867d569..09a86e2820 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -34,6 +34,7 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { }; static bool cap_has_mp_state; +static bool cap_has_inject_serror_esr; static ARMHostCPUFeatures arm_host_cpu_features; @@ -48,6 +49,12 @@ int kvm_arm_vcpu_init(CPUState *cs) return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init); } +void kvm_arm_init_serror_injection(CPUState *cs) +{ + cap_has_inject_serror_esr = kvm_check_extension(cs->kvm_state, + KVM_CAP_ARM_INJECT_SERROR_ESR); +} + bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, int *fdarray, struct kvm_vcpu_init *init) @@ -310,7 +317,7 @@ static int compare_u64(const void *a, const void *b) return 0; } -/* Initialize the CPUState's cpreg list according to the kernel's +/* Initialize the ARMCPU cpreg list according to the kernel's * definition of what CPU registers it knows about (and throw away * the previous TCG-created cpreg list). */ @@ -522,6 +529,59 @@ int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) return 0; } +int kvm_put_vcpu_events(ARMCPU *cpu) +{ + CPUARMState *env = &cpu->env; + struct kvm_vcpu_events events; + int ret; + + if (!kvm_has_vcpu_events()) { + return 0; + } + + memset(&events, 0, sizeof(events)); + events.exception.serror_pending = env->serror.pending; + + /* Inject SError to guest with specified syndrome if host kernel + * supports it, otherwise inject SError without syndrome. + */ + if (cap_has_inject_serror_esr) { + events.exception.serror_has_esr = env->serror.has_esr; + events.exception.serror_esr = env->serror.esr; + } + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events); + if (ret) { + error_report("failed to put vcpu events"); + } + + return ret; +} + +int kvm_get_vcpu_events(ARMCPU *cpu) +{ + CPUARMState *env = &cpu->env; + struct kvm_vcpu_events events; + int ret; + + if (!kvm_has_vcpu_events()) { + return 0; + } + + memset(&events, 0, sizeof(events)); + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events); + if (ret) { + error_report("failed to get vcpu events"); + return ret; + } + + env->serror.pending = events.exception.serror_pending; + env->serror.has_esr = events.exception.serror_has_esr; + env->serror.esr = events.exception.serror_esr; + + return 0; +} + void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) { } diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c index 4e91c11796..0f1e94c7b5 100644 --- a/target/arm/kvm32.c +++ b/target/arm/kvm32.c @@ -217,6 +217,9 @@ int kvm_arch_init_vcpu(CPUState *cs) } cpu->mp_affinity = mpidr & ARM32_AFFINITY_MASK; + /* Check whether userspace can specify guest syndrome value */ + kvm_arm_init_serror_injection(cs); + return kvm_arm_init_cpreg_list(cpu); } @@ -358,6 +361,11 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } + ret = kvm_put_vcpu_events(cpu); + if (ret) { + return ret; + } + /* Note that we do not call write_cpustate_to_list() * here, so we are only writing the tuple list back to * KVM. This is safe because nothing can change the @@ -445,6 +453,11 @@ int kvm_arch_get_registers(CPUState *cs) } vfp_set_fpscr(env, fpscr); + ret = kvm_get_vcpu_events(cpu); + if (ret) { + return ret; + } + if (!write_kvmstate_to_list(cpu)) { return EINVAL; } diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index e0b8246283..5de8ff0ac5 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -546,6 +546,9 @@ int kvm_arch_init_vcpu(CPUState *cs) kvm_arm_init_debug(cs); + /* Check whether user space can specify guest syndrome value */ + kvm_arm_init_serror_injection(cs); + return kvm_arm_init_cpreg_list(cpu); } @@ -727,6 +730,11 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } + ret = kvm_put_vcpu_events(cpu); + if (ret) { + return ret; + } + if (!write_list_to_kvmstate(cpu, level)) { return EINVAL; } @@ -863,6 +871,11 @@ int kvm_arch_get_registers(CPUState *cs) } vfp_set_fpcr(env, fpr); + ret = kvm_get_vcpu_events(cpu); + if (ret) { + return ret; + } + if (!write_kvmstate_to_list(cpu)) { return EINVAL; } @@ -920,7 +933,7 @@ int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit) { - int hsr_ec = debug_exit->hsr >> ARM_EL_EC_SHIFT; + int hsr_ec = syn_get_ec(debug_exit->hsr); ARMCPU *cpu = ARM_CPU(cs); CPUClass *cc = CPU_GET_CLASS(cs); CPUARMState *env = &cpu->env; diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index 863f205822..21c0129da2 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -50,9 +50,9 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, /** * kvm_arm_init_cpreg_list: - * @cs: CPUState + * @cpu: ARMCPU * - * Initialize the CPUState's cpreg list according to the kernel's + * Initialize the ARMCPU cpreg list according to the kernel's * definition of what CPU registers it knows about (and throw away * the previous TCG-created cpreg list). * @@ -121,6 +121,30 @@ bool write_kvmstate_to_list(ARMCPU *cpu); */ void kvm_arm_reset_vcpu(ARMCPU *cpu); +/** + * kvm_arm_init_serror_injection: + * @cs: CPUState + * + * Check whether KVM can set guest SError syndrome. + */ +void kvm_arm_init_serror_injection(CPUState *cs); + +/** + * kvm_get_vcpu_events: + * @cpu: ARMCPU + * + * Get VCPU related state from kvm. + */ +int kvm_get_vcpu_events(ARMCPU *cpu); + +/** + * kvm_put_vcpu_events: + * @cpu: ARMCPU + * + * Put VCPU related state to kvm. + */ +int kvm_put_vcpu_events(ARMCPU *cpu); + #ifdef CONFIG_KVM /** * kvm_arm_create_scratch_host_vcpu: diff --git a/target/arm/machine.c b/target/arm/machine.c index ff4ec22bf7..239fe4e84d 100644 --- a/target/arm/machine.c +++ b/target/arm/machine.c @@ -131,9 +131,8 @@ static const VMStateDescription vmstate_iwmmxt = { static bool sve_needed(void *opaque) { ARMCPU *cpu = opaque; - CPUARMState *env = &cpu->env; - return arm_feature(env, ARM_FEATURE_SVE); + return cpu_isar_feature(aa64_sve, cpu); } /* The first two words of each Zreg is stored in VFP state. */ @@ -172,6 +171,27 @@ static const VMStateDescription vmstate_sve = { }; #endif /* AARCH64 */ +static bool serror_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + + return env->serror.pending != 0; +} + +static const VMStateDescription vmstate_serror = { + .name = "cpu/serror", + .version_id = 1, + .minimum_version_id = 1, + .needed = serror_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8(env.serror.pending, ARMCPU), + VMSTATE_UINT8(env.serror.has_esr, ARMCPU), + VMSTATE_UINT64(env.serror.esr, ARMCPU), + VMSTATE_END_OF_LIST() + } +}; + static bool m_needed(void *opaque) { ARMCPU *cpu = opaque; @@ -726,6 +746,7 @@ const VMStateDescription vmstate_arm_cpu = { #ifdef TARGET_AARCH64 &vmstate_sve, #endif + &vmstate_serror, NULL } }; diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c index 952b8d122b..90741f6331 100644 --- a/target/arm/op_helper.c +++ b/target/arm/op_helper.c @@ -28,8 +28,8 @@ #define SIGNBIT (uint32_t)0x80000000 #define SIGNBIT64 ((uint64_t)1 << 63) -static void raise_exception(CPUARMState *env, uint32_t excp, - uint32_t syndrome, uint32_t target_el) +void raise_exception(CPUARMState *env, uint32_t excp, + uint32_t syndrome, uint32_t target_el) { CPUState *cs = CPU(arm_env_get_cpu(env)); @@ -42,7 +42,7 @@ static void raise_exception(CPUARMState *env, uint32_t excp, * (see DDI0478C.a D1.10.4) */ target_el = 2; - if (syndrome >> ARM_EL_EC_SHIFT == EC_ADVSIMDFPACCESSTRAP) { + if (syn_get_ec(syndrome) == EC_ADVSIMDFPACCESSTRAP) { syndrome = syn_uncategorized(); } } @@ -238,6 +238,25 @@ void arm_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, #endif /* !defined(CONFIG_USER_ONLY) */ +void HELPER(v8m_stackcheck)(CPUARMState *env, uint32_t newvalue) +{ + /* + * Perform the v8M stack limit check for SP updates from translated code, + * raising an exception if the limit is breached. + */ + if (newvalue < v7m_sp_limit(env)) { + CPUState *cs = CPU(arm_env_get_cpu(env)); + + /* + * Stack limit exceptions are a rare case, so rather than syncing + * PC/condbits before the call, we use cpu_restore_state() to + * get them right before raising the exception. + */ + cpu_restore_state(cs, GETPC(), true); + raise_exception(env, EXCP_STKOF, 0, 1); + } +} + uint32_t HELPER(add_setq)(CPUARMState *env, uint32_t a, uint32_t b) { uint32_t res = a + b; @@ -1082,6 +1101,11 @@ void HELPER(exception_return)(CPUARMState *env) "AArch64 EL%d PC 0x%" PRIx64 "\n", cur_el, new_el, env->pc); } + /* + * Note that cur_el can never be 0. If new_el is 0, then + * el0_a64 is return_to_aa64, else el0_a64 is ignored. + */ + aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64); qemu_mutex_lock_iothread(); arm_call_el_change_hook(arm_env_get_cpu(env)); diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 0f98097253..8cbc6516ab 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -19,6 +19,7 @@ #include "qemu/osdep.h" #include "cpu.h" +#include "internals.h" #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "exec/helper-proto.h" @@ -1688,6 +1689,47 @@ static void swap_memmove(void *vd, void *vs, size_t n) } } +/* Similarly for memset of 0. */ +static void swap_memzero(void *vd, size_t n) +{ + uintptr_t d = (uintptr_t)vd; + uintptr_t o = (d | n) & 7; + size_t i; + + /* Usually, the first bit of a predicate is set, so N is 0. */ + if (likely(n == 0)) { + return; + } + +#ifndef HOST_WORDS_BIGENDIAN + o = 0; +#endif + switch (o) { + case 0: + memset(vd, 0, n); + break; + + case 4: + for (i = 0; i < n; i += 4) { + *(uint32_t *)H1_4(d + i) = 0; + } + break; + + case 2: + case 6: + for (i = 0; i < n; i += 2) { + *(uint16_t *)H1_2(d + i) = 0; + } + break; + + default: + for (i = 0; i < n; i++) { + *(uint8_t *)H1(d + i) = 0; + } + break; + } +} + void HELPER(sve_ext)(void *vd, void *vn, void *vm, uint32_t desc) { intptr_t opr_sz = simd_oprsz(desc); @@ -3927,161 +3969,471 @@ void HELPER(sve_fcmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) /* * Load contiguous data, protected by a governing predicate. */ -#define DO_LD1(NAME, FN, TYPEE, TYPEM, H) \ -static void do_##NAME(CPUARMState *env, void *vd, void *vg, \ - target_ulong addr, intptr_t oprsz, \ - uintptr_t ra) \ -{ \ - intptr_t i = 0; \ - do { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - TYPEM m = 0; \ - if (pg & 1) { \ - m = FN(env, addr, ra); \ - } \ - *(TYPEE *)(vd + H(i)) = m; \ - i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \ - addr += sizeof(TYPEM); \ - } while (i & 15); \ - } while (i < oprsz); \ -} \ -void HELPER(NAME)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - do_##NAME(env, &env->vfp.zregs[simd_data(desc)], vg, \ - addr, simd_oprsz(desc), GETPC()); \ + +/* + * Load elements into @vd, controlled by @vg, from @host + @mem_ofs. + * Memory is valid through @host + @mem_max. The register element + * indicies are inferred from @mem_ofs, as modified by the types for + * which the helper is built. Return the @mem_ofs of the first element + * not loaded (which is @mem_max if they are all loaded). + * + * For softmmu, we have fully validated the guest page. For user-only, + * we cannot fully validate without taking the mmap lock, but since we + * know the access is within one host page, if any access is valid they + * all must be valid. However, when @vg is all false, it may be that + * no access is valid. + */ +typedef intptr_t sve_ld1_host_fn(void *vd, void *vg, void *host, + intptr_t mem_ofs, intptr_t mem_max); + +/* + * Load one element into @vd + @reg_off from (@env, @vaddr, @ra). + * The controlling predicate is known to be true. + */ +typedef void sve_ld1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off, + target_ulong vaddr, TCGMemOpIdx oi, uintptr_t ra); +typedef sve_ld1_tlb_fn sve_st1_tlb_fn; + +/* + * Generate the above primitives. + */ + +#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \ +static intptr_t sve_##NAME##_host(void *vd, void *vg, void *host, \ + intptr_t mem_off, const intptr_t mem_max) \ +{ \ + intptr_t reg_off = mem_off * (sizeof(TYPEE) / sizeof(TYPEM)); \ + uint64_t *pg = vg; \ + while (mem_off + sizeof(TYPEM) <= mem_max) { \ + TYPEM val = 0; \ + if (likely((pg[reg_off >> 6] >> (reg_off & 63)) & 1)) { \ + val = HOST(host + mem_off); \ + } \ + *(TYPEE *)(vd + H(reg_off)) = val; \ + mem_off += sizeof(TYPEM), reg_off += sizeof(TYPEE); \ + } \ + return mem_off; \ +} + +#ifdef CONFIG_SOFTMMU +#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, HOST, MOEND, TLB) \ +static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \ +{ \ + TYPEM val = TLB(env, addr, oi, ra); \ + *(TYPEE *)(vd + H(reg_off)) = val; \ } +#else +#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, HOST, MOEND, TLB) \ +static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \ +{ \ + TYPEM val = HOST(g2h(addr)); \ + *(TYPEE *)(vd + H(reg_off)) = val; \ +} +#endif -#define DO_LD2(NAME, FN, TYPEE, TYPEM, H) \ -void HELPER(NAME)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc); \ - intptr_t ra = GETPC(); \ - unsigned rd = simd_data(desc); \ - void *d1 = &env->vfp.zregs[rd]; \ - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - TYPEM m1 = 0, m2 = 0; \ - if (pg & 1) { \ - m1 = FN(env, addr, ra); \ - m2 = FN(env, addr + sizeof(TYPEM), ra); \ - } \ - *(TYPEE *)(d1 + H(i)) = m1; \ - *(TYPEE *)(d2 + H(i)) = m2; \ - i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \ - addr += 2 * sizeof(TYPEM); \ - } while (i & 15); \ - } \ +#define DO_LD_PRIM_1(NAME, H, TE, TM) \ + DO_LD_HOST(NAME, H, TE, TM, ldub_p) \ + DO_LD_TLB(NAME, H, TE, TM, ldub_p, 0, helper_ret_ldub_mmu) + +DO_LD_PRIM_1(ld1bb, H1, uint8_t, uint8_t) +DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t) +DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t, int8_t) +DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t) +DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t) +DO_LD_PRIM_1(ld1bdu, , uint64_t, uint8_t) +DO_LD_PRIM_1(ld1bds, , uint64_t, int8_t) + +#define DO_LD_PRIM_2(NAME, end, MOEND, H, TE, TM, PH, PT) \ + DO_LD_HOST(NAME##_##end, H, TE, TM, PH##_##end##_p) \ + DO_LD_TLB(NAME##_##end, H, TE, TM, PH##_##end##_p, \ + MOEND, helper_##end##_##PT##_mmu) + +DO_LD_PRIM_2(ld1hh, le, MO_LE, H1_2, uint16_t, uint16_t, lduw, lduw) +DO_LD_PRIM_2(ld1hsu, le, MO_LE, H1_4, uint32_t, uint16_t, lduw, lduw) +DO_LD_PRIM_2(ld1hss, le, MO_LE, H1_4, uint32_t, int16_t, lduw, lduw) +DO_LD_PRIM_2(ld1hdu, le, MO_LE, , uint64_t, uint16_t, lduw, lduw) +DO_LD_PRIM_2(ld1hds, le, MO_LE, , uint64_t, int16_t, lduw, lduw) + +DO_LD_PRIM_2(ld1ss, le, MO_LE, H1_4, uint32_t, uint32_t, ldl, ldul) +DO_LD_PRIM_2(ld1sdu, le, MO_LE, , uint64_t, uint32_t, ldl, ldul) +DO_LD_PRIM_2(ld1sds, le, MO_LE, , uint64_t, int32_t, ldl, ldul) + +DO_LD_PRIM_2(ld1dd, le, MO_LE, , uint64_t, uint64_t, ldq, ldq) + +DO_LD_PRIM_2(ld1hh, be, MO_BE, H1_2, uint16_t, uint16_t, lduw, lduw) +DO_LD_PRIM_2(ld1hsu, be, MO_BE, H1_4, uint32_t, uint16_t, lduw, lduw) +DO_LD_PRIM_2(ld1hss, be, MO_BE, H1_4, uint32_t, int16_t, lduw, lduw) +DO_LD_PRIM_2(ld1hdu, be, MO_BE, , uint64_t, uint16_t, lduw, lduw) +DO_LD_PRIM_2(ld1hds, be, MO_BE, , uint64_t, int16_t, lduw, lduw) + +DO_LD_PRIM_2(ld1ss, be, MO_BE, H1_4, uint32_t, uint32_t, ldl, ldul) +DO_LD_PRIM_2(ld1sdu, be, MO_BE, , uint64_t, uint32_t, ldl, ldul) +DO_LD_PRIM_2(ld1sds, be, MO_BE, , uint64_t, int32_t, ldl, ldul) + +DO_LD_PRIM_2(ld1dd, be, MO_BE, , uint64_t, uint64_t, ldq, ldq) + +#undef DO_LD_TLB +#undef DO_LD_HOST +#undef DO_LD_PRIM_1 +#undef DO_LD_PRIM_2 + +/* + * Skip through a sequence of inactive elements in the guarding predicate @vg, + * beginning at @reg_off bounded by @reg_max. Return the offset of the active + * element >= @reg_off, or @reg_max if there were no active elements at all. + */ +static intptr_t find_next_active(uint64_t *vg, intptr_t reg_off, + intptr_t reg_max, int esz) +{ + uint64_t pg_mask = pred_esz_masks[esz]; + uint64_t pg = (vg[reg_off >> 6] & pg_mask) >> (reg_off & 63); + + /* In normal usage, the first element is active. */ + if (likely(pg & 1)) { + return reg_off; + } + + if (pg == 0) { + reg_off &= -64; + do { + reg_off += 64; + if (unlikely(reg_off >= reg_max)) { + /* The entire predicate was false. */ + return reg_max; + } + pg = vg[reg_off >> 6] & pg_mask; + } while (pg == 0); + } + reg_off += ctz64(pg); + + /* We should never see an out of range predicate bit set. */ + tcg_debug_assert(reg_off < reg_max); + return reg_off; } -#define DO_LD3(NAME, FN, TYPEE, TYPEM, H) \ -void HELPER(NAME)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc); \ - intptr_t ra = GETPC(); \ - unsigned rd = simd_data(desc); \ - void *d1 = &env->vfp.zregs[rd]; \ - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \ - void *d3 = &env->vfp.zregs[(rd + 2) & 31]; \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - TYPEM m1 = 0, m2 = 0, m3 = 0; \ - if (pg & 1) { \ - m1 = FN(env, addr, ra); \ - m2 = FN(env, addr + sizeof(TYPEM), ra); \ - m3 = FN(env, addr + 2 * sizeof(TYPEM), ra); \ - } \ - *(TYPEE *)(d1 + H(i)) = m1; \ - *(TYPEE *)(d2 + H(i)) = m2; \ - *(TYPEE *)(d3 + H(i)) = m3; \ - i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \ - addr += 3 * sizeof(TYPEM); \ - } while (i & 15); \ - } \ +/* + * Return the maximum offset <= @mem_max which is still within the page + * referenced by @base + @mem_off. + */ +static intptr_t max_for_page(target_ulong base, intptr_t mem_off, + intptr_t mem_max) +{ + target_ulong addr = base + mem_off; + intptr_t split = -(intptr_t)(addr | TARGET_PAGE_MASK); + return MIN(split, mem_max - mem_off) + mem_off; } -#define DO_LD4(NAME, FN, TYPEE, TYPEM, H) \ -void HELPER(NAME)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc); \ - intptr_t ra = GETPC(); \ - unsigned rd = simd_data(desc); \ - void *d1 = &env->vfp.zregs[rd]; \ - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \ - void *d3 = &env->vfp.zregs[(rd + 2) & 31]; \ - void *d4 = &env->vfp.zregs[(rd + 3) & 31]; \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - TYPEM m1 = 0, m2 = 0, m3 = 0, m4 = 0; \ - if (pg & 1) { \ - m1 = FN(env, addr, ra); \ - m2 = FN(env, addr + sizeof(TYPEM), ra); \ - m3 = FN(env, addr + 2 * sizeof(TYPEM), ra); \ - m4 = FN(env, addr + 3 * sizeof(TYPEM), ra); \ - } \ - *(TYPEE *)(d1 + H(i)) = m1; \ - *(TYPEE *)(d2 + H(i)) = m2; \ - *(TYPEE *)(d3 + H(i)) = m3; \ - *(TYPEE *)(d4 + H(i)) = m4; \ - i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \ - addr += 4 * sizeof(TYPEM); \ - } while (i & 15); \ - } \ +static inline void set_helper_retaddr(uintptr_t ra) +{ +#ifdef CONFIG_USER_ONLY + helper_retaddr = ra; +#endif } -DO_LD1(sve_ld1bhu_r, cpu_ldub_data_ra, uint16_t, uint8_t, H1_2) -DO_LD1(sve_ld1bhs_r, cpu_ldsb_data_ra, uint16_t, int8_t, H1_2) -DO_LD1(sve_ld1bsu_r, cpu_ldub_data_ra, uint32_t, uint8_t, H1_4) -DO_LD1(sve_ld1bss_r, cpu_ldsb_data_ra, uint32_t, int8_t, H1_4) -DO_LD1(sve_ld1bdu_r, cpu_ldub_data_ra, uint64_t, uint8_t, ) -DO_LD1(sve_ld1bds_r, cpu_ldsb_data_ra, uint64_t, int8_t, ) +/* + * The result of tlb_vaddr_to_host for user-only is just g2h(x), + * which is always non-null. Elide the useless test. + */ +static inline bool test_host_page(void *host) +{ +#ifdef CONFIG_USER_ONLY + return true; +#else + return likely(host != NULL); +#endif +} -DO_LD1(sve_ld1hsu_r, cpu_lduw_data_ra, uint32_t, uint16_t, H1_4) -DO_LD1(sve_ld1hss_r, cpu_ldsw_data_ra, uint32_t, int16_t, H1_4) -DO_LD1(sve_ld1hdu_r, cpu_lduw_data_ra, uint64_t, uint16_t, ) -DO_LD1(sve_ld1hds_r, cpu_ldsw_data_ra, uint64_t, int16_t, ) +/* + * Common helper for all contiguous one-register predicated loads. + */ +static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr, + uint32_t desc, const uintptr_t retaddr, + const int esz, const int msz, + sve_ld1_host_fn *host_fn, + sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int mmu_idx = get_mmuidx(oi); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + void *vd = &env->vfp.zregs[rd]; + const int diffsz = esz - msz; + const intptr_t reg_max = simd_oprsz(desc); + const intptr_t mem_max = reg_max >> diffsz; + ARMVectorReg scratch; + void *host; + intptr_t split, reg_off, mem_off; + + /* Find the first active element. */ + reg_off = find_next_active(vg, 0, reg_max, esz); + if (unlikely(reg_off == reg_max)) { + /* The entire predicate was false; no load occurs. */ + memset(vd, 0, reg_max); + return; + } + mem_off = reg_off >> diffsz; + set_helper_retaddr(retaddr); + + /* + * If the (remaining) load is entirely within a single page, then: + * For softmmu, and the tlb hits, then no faults will occur; + * For user-only, either the first load will fault or none will. + * We can thus perform the load directly to the destination and + * Vd will be unmodified on any exception path. + */ + split = max_for_page(addr, mem_off, mem_max); + if (likely(split == mem_max)) { + host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); + if (test_host_page(host)) { + mem_off = host_fn(vd, vg, host - mem_off, mem_off, mem_max); + tcg_debug_assert(mem_off == mem_max); + set_helper_retaddr(0); + /* After having taken any fault, zero leading inactive elements. */ + swap_memzero(vd, reg_off); + return; + } + } -DO_LD1(sve_ld1sdu_r, cpu_ldl_data_ra, uint64_t, uint32_t, ) -DO_LD1(sve_ld1sds_r, cpu_ldl_data_ra, uint64_t, int32_t, ) + /* + * Perform the predicated read into a temporary, thus ensuring + * if the load of the last element faults, Vd is not modified. + */ +#ifdef CONFIG_USER_ONLY + swap_memzero(&scratch, reg_off); + host_fn(&scratch, vg, g2h(addr), mem_off, mem_max); +#else + memset(&scratch, 0, reg_max); + goto start; + while (1) { + reg_off = find_next_active(vg, reg_off, reg_max, esz); + if (reg_off >= reg_max) { + break; + } + mem_off = reg_off >> diffsz; + split = max_for_page(addr, mem_off, mem_max); + + start: + if (split - mem_off >= (1 << msz)) { + /* At least one whole element on this page. */ + host = tlb_vaddr_to_host(env, addr + mem_off, + MMU_DATA_LOAD, mmu_idx); + if (host) { + mem_off = host_fn(&scratch, vg, host - mem_off, + mem_off, split); + reg_off = mem_off << diffsz; + continue; + } + } -DO_LD1(sve_ld1bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1) -DO_LD2(sve_ld2bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1) -DO_LD3(sve_ld3bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1) -DO_LD4(sve_ld4bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1) + /* + * Perform one normal read. This may fault, longjmping out to the + * main loop in order to raise an exception. It may succeed, and + * as a side-effect load the TLB entry for the next round. Finally, + * in the extremely unlikely case we're performing this operation + * on I/O memory, it may succeed but not bring in the TLB entry. + * But even then we have still made forward progress. + */ + tlb_fn(env, &scratch, reg_off, addr + mem_off, oi, retaddr); + reg_off += 1 << esz; + } +#endif -DO_LD1(sve_ld1hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2) -DO_LD2(sve_ld2hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2) -DO_LD3(sve_ld3hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2) -DO_LD4(sve_ld4hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2) + set_helper_retaddr(0); + memcpy(vd, &scratch, reg_max); +} -DO_LD1(sve_ld1ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4) -DO_LD2(sve_ld2ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4) -DO_LD3(sve_ld3ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4) -DO_LD4(sve_ld4ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4) +#define DO_LD1_1(NAME, ESZ) \ +void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, 0, \ + sve_##NAME##_host, sve_##NAME##_tlb); \ +} + +#define DO_LD1_2(NAME, ESZ, MSZ) \ +void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ + sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ +} \ +void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ + sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ +} + +DO_LD1_1(ld1bb, 0) +DO_LD1_1(ld1bhu, 1) +DO_LD1_1(ld1bhs, 1) +DO_LD1_1(ld1bsu, 2) +DO_LD1_1(ld1bss, 2) +DO_LD1_1(ld1bdu, 3) +DO_LD1_1(ld1bds, 3) -DO_LD1(sve_ld1dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, ) -DO_LD2(sve_ld2dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, ) -DO_LD3(sve_ld3dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, ) -DO_LD4(sve_ld4dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, ) +DO_LD1_2(ld1hh, 1, 1) +DO_LD1_2(ld1hsu, 2, 1) +DO_LD1_2(ld1hss, 2, 1) +DO_LD1_2(ld1hdu, 3, 1) +DO_LD1_2(ld1hds, 3, 1) -#undef DO_LD1 -#undef DO_LD2 -#undef DO_LD3 -#undef DO_LD4 +DO_LD1_2(ld1ss, 2, 2) +DO_LD1_2(ld1sdu, 3, 2) +DO_LD1_2(ld1sds, 3, 2) + +DO_LD1_2(ld1dd, 3, 3) + +#undef DO_LD1_1 +#undef DO_LD1_2 /* - * Load contiguous data, first-fault and no-fault. + * Common helpers for all contiguous 2,3,4-register predicated loads. */ +static void sve_ld2_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, int size, uintptr_t ra, + sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + ARMVectorReg scratch[2] = { }; -#ifdef CONFIG_USER_ONLY + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, &scratch[0], i, addr, oi, ra); + tlb_fn(env, &scratch[1], i, addr + size, oi, ra); + } + i += size, pg >>= size; + addr += 2 * size; + } while (i & 15); + } + set_helper_retaddr(0); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); + memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); +} + +static void sve_ld3_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, int size, uintptr_t ra, + sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + ARMVectorReg scratch[3] = { }; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, &scratch[0], i, addr, oi, ra); + tlb_fn(env, &scratch[1], i, addr + size, oi, ra); + tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra); + } + i += size, pg >>= size; + addr += 3 * size; + } while (i & 15); + } + set_helper_retaddr(0); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); + memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); + memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz); +} + +static void sve_ld4_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, int size, uintptr_t ra, + sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + ARMVectorReg scratch[4] = { }; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, &scratch[0], i, addr, oi, ra); + tlb_fn(env, &scratch[1], i, addr + size, oi, ra); + tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra); + tlb_fn(env, &scratch[3], i, addr + 3 * size, oi, ra); + } + i += size, pg >>= size; + addr += 4 * size; + } while (i & 15); + } + set_helper_retaddr(0); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); + memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); + memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz); + memcpy(&env->vfp.zregs[(rd + 3) & 31], &scratch[3], oprsz); +} + +#define DO_LDN_1(N) \ +void __attribute__((flatten)) HELPER(sve_ld##N##bb_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_ld##N##_r(env, vg, addr, desc, 1, GETPC(), sve_ld1bb_tlb); \ +} + +#define DO_LDN_2(N, SUFF, SIZE) \ +void __attribute__((flatten)) HELPER(sve_ld##N##SUFF##_le_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(), \ + sve_ld1##SUFF##_le_tlb); \ +} \ +void __attribute__((flatten)) HELPER(sve_ld##N##SUFF##_be_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(), \ + sve_ld1##SUFF##_be_tlb); \ +} + +DO_LDN_1(2) +DO_LDN_1(3) +DO_LDN_1(4) + +DO_LDN_2(2, hh, 2) +DO_LDN_2(3, hh, 2) +DO_LDN_2(4, hh, 2) + +DO_LDN_2(2, ss, 4) +DO_LDN_2(3, ss, 4) +DO_LDN_2(4, ss, 4) + +DO_LDN_2(2, dd, 8) +DO_LDN_2(3, dd, 8) +DO_LDN_2(4, dd, 8) + +#undef DO_LDN_1 +#undef DO_LDN_2 + +/* + * Load contiguous data, first-fault and no-fault. + * + * For user-only, one could argue that we should hold the mmap_lock during + * the operation so that there is no race between page_check_range and the + * load operation. However, unmapping pages out from under a running thread + * is extraordinarily unlikely. This theoretical race condition also affects + * linux-user/ in its get_user/put_user macros. + * + * TODO: Construct some helpers, written in assembly, that interact with + * handle_cpu_signal to produce memory ops which can properly report errors + * without racing. + */ /* Fault on byte I. All bits in FFR from I are cleared. The vector * result from I is CONSTRAINED UNPREDICTABLE; we choose the MERGE @@ -4100,573 +4452,932 @@ static void record_fault(CPUARMState *env, uintptr_t i, uintptr_t oprsz) } } -/* Hold the mmap lock during the operation so that there is no race - * between page_check_range and the load operation. We expect the - * usual case to have no faults at all, so we check the whole range - * first and if successful defer to the normal load operation. - * - * TODO: Change mmap_lock to a rwlock so that multiple readers - * can run simultaneously. This will probably help other uses - * within QEMU as well. +/* + * Common helper for all contiguous first-fault loads. */ -#define DO_LDFF1(PART, FN, TYPEE, TYPEM, H) \ -static void do_sve_ldff1##PART(CPUARMState *env, void *vd, void *vg, \ - target_ulong addr, intptr_t oprsz, \ - bool first, uintptr_t ra) \ -{ \ - intptr_t i = 0; \ - do { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - TYPEM m = 0; \ - if (pg & 1) { \ - if (!first && \ - unlikely(page_check_range(addr, sizeof(TYPEM), \ - PAGE_READ))) { \ - record_fault(env, i, oprsz); \ - return; \ - } \ - m = FN(env, addr, ra); \ - first = false; \ - } \ - *(TYPEE *)(vd + H(i)) = m; \ - i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \ - addr += sizeof(TYPEM); \ - } while (i & 15); \ - } while (i < oprsz); \ -} \ -void HELPER(sve_ldff1##PART)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - intptr_t oprsz = simd_oprsz(desc); \ - unsigned rd = simd_data(desc); \ - void *vd = &env->vfp.zregs[rd]; \ - mmap_lock(); \ - if (likely(page_check_range(addr, oprsz, PAGE_READ) == 0)) { \ - do_sve_ld1##PART(env, vd, vg, addr, oprsz, GETPC()); \ - } else { \ - do_sve_ldff1##PART(env, vd, vg, addr, oprsz, true, GETPC()); \ - } \ - mmap_unlock(); \ -} +static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr, + uint32_t desc, const uintptr_t retaddr, + const int esz, const int msz, + sve_ld1_host_fn *host_fn, + sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int mmu_idx = get_mmuidx(oi); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + void *vd = &env->vfp.zregs[rd]; + const int diffsz = esz - msz; + const intptr_t reg_max = simd_oprsz(desc); + const intptr_t mem_max = reg_max >> diffsz; + intptr_t split, reg_off, mem_off; + void *host; + + /* Skip to the first active element. */ + reg_off = find_next_active(vg, 0, reg_max, esz); + if (unlikely(reg_off == reg_max)) { + /* The entire predicate was false; no load occurs. */ + memset(vd, 0, reg_max); + return; + } + mem_off = reg_off >> diffsz; + set_helper_retaddr(retaddr); + + /* + * If the (remaining) load is entirely within a single page, then: + * For softmmu, and the tlb hits, then no faults will occur; + * For user-only, either the first load will fault or none will. + * We can thus perform the load directly to the destination and + * Vd will be unmodified on any exception path. + */ + split = max_for_page(addr, mem_off, mem_max); + if (likely(split == mem_max)) { + host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); + if (test_host_page(host)) { + mem_off = host_fn(vd, vg, host - mem_off, mem_off, mem_max); + tcg_debug_assert(mem_off == mem_max); + set_helper_retaddr(0); + /* After any fault, zero any leading inactive elements. */ + swap_memzero(vd, reg_off); + return; + } + } -/* No-fault loads are like first-fault loads without the - * first faulting special case. - */ -#define DO_LDNF1(PART) \ -void HELPER(sve_ldnf1##PART)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - intptr_t oprsz = simd_oprsz(desc); \ - unsigned rd = simd_data(desc); \ - void *vd = &env->vfp.zregs[rd]; \ - mmap_lock(); \ - if (likely(page_check_range(addr, oprsz, PAGE_READ) == 0)) { \ - do_sve_ld1##PART(env, vd, vg, addr, oprsz, GETPC()); \ - } else { \ - do_sve_ldff1##PART(env, vd, vg, addr, oprsz, false, GETPC()); \ - } \ - mmap_unlock(); \ -} +#ifdef CONFIG_USER_ONLY + /* + * The page(s) containing this first element at ADDR+MEM_OFF must + * be valid. Considering that this first element may be misaligned + * and cross a page boundary itself, take the rest of the page from + * the last byte of the element. + */ + split = max_for_page(addr, mem_off + (1 << msz) - 1, mem_max); + mem_off = host_fn(vd, vg, g2h(addr), mem_off, split); + /* After any fault, zero any leading inactive elements. */ + swap_memzero(vd, reg_off); + reg_off = mem_off << diffsz; #else + /* + * Perform one normal read, which will fault or not. + * But it is likely to bring the page into the tlb. + */ + tlb_fn(env, vd, reg_off, addr + mem_off, oi, retaddr); + + /* After any fault, zero any leading predicated false elts. */ + swap_memzero(vd, reg_off); + mem_off += 1 << msz; + reg_off += 1 << esz; + + /* Try again to read the balance of the page. */ + split = max_for_page(addr, mem_off - 1, mem_max); + if (split >= (1 << msz)) { + host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); + if (host) { + mem_off = host_fn(vd, vg, host - mem_off, mem_off, split); + reg_off = mem_off << diffsz; + } + } +#endif -/* TODO: System mode is not yet supported. - * This would probably use tlb_vaddr_to_host. - */ -#define DO_LDFF1(PART, FN, TYPEE, TYPEM, H) \ -void HELPER(sve_ldff1##PART)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - g_assert_not_reached(); \ + set_helper_retaddr(0); + record_fault(env, reg_off, reg_max); } -#define DO_LDNF1(PART) \ -void HELPER(sve_ldnf1##PART)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - g_assert_not_reached(); \ -} +/* + * Common helper for all contiguous no-fault loads. + */ +static void sve_ldnf1_r(CPUARMState *env, void *vg, const target_ulong addr, + uint32_t desc, const int esz, const int msz, + sve_ld1_host_fn *host_fn) +{ + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + void *vd = &env->vfp.zregs[rd]; + const int diffsz = esz - msz; + const intptr_t reg_max = simd_oprsz(desc); + const intptr_t mem_max = reg_max >> diffsz; + const int mmu_idx = cpu_mmu_index(env, false); + intptr_t split, reg_off, mem_off; + void *host; +#ifdef CONFIG_USER_ONLY + host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx); + if (likely(page_check_range(addr, mem_max, PAGE_READ) == 0)) { + /* The entire operation is valid and will not fault. */ + host_fn(vd, vg, host, 0, mem_max); + return; + } #endif -DO_LDFF1(bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1) -DO_LDFF1(bhu_r, cpu_ldub_data_ra, uint16_t, uint8_t, H1_2) -DO_LDFF1(bhs_r, cpu_ldsb_data_ra, uint16_t, int8_t, H1_2) -DO_LDFF1(bsu_r, cpu_ldub_data_ra, uint32_t, uint8_t, H1_4) -DO_LDFF1(bss_r, cpu_ldsb_data_ra, uint32_t, int8_t, H1_4) -DO_LDFF1(bdu_r, cpu_ldub_data_ra, uint64_t, uint8_t, ) -DO_LDFF1(bds_r, cpu_ldsb_data_ra, uint64_t, int8_t, ) + /* There will be no fault, so we may modify in advance. */ + memset(vd, 0, reg_max); -DO_LDFF1(hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2) -DO_LDFF1(hsu_r, cpu_lduw_data_ra, uint32_t, uint16_t, H1_4) -DO_LDFF1(hss_r, cpu_ldsw_data_ra, uint32_t, int8_t, H1_4) -DO_LDFF1(hdu_r, cpu_lduw_data_ra, uint64_t, uint16_t, ) -DO_LDFF1(hds_r, cpu_ldsw_data_ra, uint64_t, int16_t, ) + /* Skip to the first active element. */ + reg_off = find_next_active(vg, 0, reg_max, esz); + if (unlikely(reg_off == reg_max)) { + /* The entire predicate was false; no load occurs. */ + return; + } + mem_off = reg_off >> diffsz; -DO_LDFF1(ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4) -DO_LDFF1(sdu_r, cpu_ldl_data_ra, uint64_t, uint32_t, ) -DO_LDFF1(sds_r, cpu_ldl_data_ra, uint64_t, int32_t, ) +#ifdef CONFIG_USER_ONLY + if (page_check_range(addr + mem_off, 1 << msz, PAGE_READ) == 0) { + /* At least one load is valid; take the rest of the page. */ + split = max_for_page(addr, mem_off + (1 << msz) - 1, mem_max); + mem_off = host_fn(vd, vg, host, mem_off, split); + reg_off = mem_off << diffsz; + } +#else + /* + * If the address is not in the TLB, we have no way to bring the + * entry into the TLB without also risking a fault. Note that + * the corollary is that we never load from an address not in RAM. + * + * This last is out of spec, in a weird corner case. + * Per the MemNF/MemSingleNF pseudocode, a NF load from Device memory + * must not actually hit the bus -- it returns UNKNOWN data instead. + * But if you map non-RAM with Normal memory attributes and do a NF + * load then it should access the bus. (Nobody ought actually do this + * in the real world, obviously.) + * + * Then there are the annoying special cases with watchpoints... + * + * TODO: Add a form of tlb_fill that does not raise an exception, + * with a form of tlb_vaddr_to_host and a set of loads to match. + * The non_fault_vaddr_to_host would handle everything, usually, + * and the loads would handle the iomem path for watchpoints. + */ + host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); + split = max_for_page(addr, mem_off, mem_max); + if (host && split >= (1 << msz)) { + mem_off = host_fn(vd, vg, host - mem_off, mem_off, split); + reg_off = mem_off << diffsz; + } +#endif -DO_LDFF1(dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, ) + record_fault(env, reg_off, reg_max); +} -#undef DO_LDFF1 +#define DO_LDFF1_LDNF1_1(PART, ESZ) \ +void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, 0, \ + sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ +} \ +void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnf1_r(env, vg, addr, desc, ESZ, 0, sve_ld1##PART##_host); \ +} -DO_LDNF1(bb_r) -DO_LDNF1(bhu_r) -DO_LDNF1(bhs_r) -DO_LDNF1(bsu_r) -DO_LDNF1(bss_r) -DO_LDNF1(bdu_r) -DO_LDNF1(bds_r) +#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \ +void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ + sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ +} \ +void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_le_host); \ +} \ +void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ + sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ +} \ +void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_be_host); \ +} + +DO_LDFF1_LDNF1_1(bb, 0) +DO_LDFF1_LDNF1_1(bhu, 1) +DO_LDFF1_LDNF1_1(bhs, 1) +DO_LDFF1_LDNF1_1(bsu, 2) +DO_LDFF1_LDNF1_1(bss, 2) +DO_LDFF1_LDNF1_1(bdu, 3) +DO_LDFF1_LDNF1_1(bds, 3) -DO_LDNF1(hh_r) -DO_LDNF1(hsu_r) -DO_LDNF1(hss_r) -DO_LDNF1(hdu_r) -DO_LDNF1(hds_r) +DO_LDFF1_LDNF1_2(hh, 1, 1) +DO_LDFF1_LDNF1_2(hsu, 2, 1) +DO_LDFF1_LDNF1_2(hss, 2, 1) +DO_LDFF1_LDNF1_2(hdu, 3, 1) +DO_LDFF1_LDNF1_2(hds, 3, 1) -DO_LDNF1(ss_r) -DO_LDNF1(sdu_r) -DO_LDNF1(sds_r) +DO_LDFF1_LDNF1_2(ss, 2, 2) +DO_LDFF1_LDNF1_2(sdu, 3, 2) +DO_LDFF1_LDNF1_2(sds, 3, 2) -DO_LDNF1(dd_r) +DO_LDFF1_LDNF1_2(dd, 3, 3) -#undef DO_LDNF1 +#undef DO_LDFF1_LDNF1_1 +#undef DO_LDFF1_LDNF1_2 /* * Store contiguous data, protected by a governing predicate. */ -#define DO_ST1(NAME, FN, TYPEE, TYPEM, H) \ -void HELPER(NAME)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc); \ - intptr_t ra = GETPC(); \ - unsigned rd = simd_data(desc); \ - void *vd = &env->vfp.zregs[rd]; \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - if (pg & 1) { \ - TYPEM m = *(TYPEE *)(vd + H(i)); \ - FN(env, addr, m, ra); \ - } \ - i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \ - addr += sizeof(TYPEM); \ - } while (i & 15); \ - } \ -} -#define DO_ST1_D(NAME, FN, TYPEM) \ -void HELPER(NAME)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc) / 8; \ - intptr_t ra = GETPC(); \ - unsigned rd = simd_data(desc); \ - uint64_t *d = &env->vfp.zregs[rd].d[0]; \ - uint8_t *pg = vg; \ - for (i = 0; i < oprsz; i += 1) { \ - if (pg[H1(i)] & 1) { \ - FN(env, addr, d[i], ra); \ - } \ - addr += sizeof(TYPEM); \ - } \ +#ifdef CONFIG_SOFTMMU +#define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \ +static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \ +{ \ + TLB(env, addr, *(TYPEM *)(vd + H(reg_off)), oi, ra); \ } - -#define DO_ST2(NAME, FN, TYPEE, TYPEM, H) \ -void HELPER(NAME)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc); \ - intptr_t ra = GETPC(); \ - unsigned rd = simd_data(desc); \ - void *d1 = &env->vfp.zregs[rd]; \ - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - if (pg & 1) { \ - TYPEM m1 = *(TYPEE *)(d1 + H(i)); \ - TYPEM m2 = *(TYPEE *)(d2 + H(i)); \ - FN(env, addr, m1, ra); \ - FN(env, addr + sizeof(TYPEM), m2, ra); \ - } \ - i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \ - addr += 2 * sizeof(TYPEM); \ - } while (i & 15); \ - } \ +#else +#define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \ +static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \ +{ \ + HOST(g2h(addr), *(TYPEM *)(vd + H(reg_off))); \ } +#endif -#define DO_ST3(NAME, FN, TYPEE, TYPEM, H) \ -void HELPER(NAME)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc); \ - intptr_t ra = GETPC(); \ - unsigned rd = simd_data(desc); \ - void *d1 = &env->vfp.zregs[rd]; \ - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \ - void *d3 = &env->vfp.zregs[(rd + 2) & 31]; \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - if (pg & 1) { \ - TYPEM m1 = *(TYPEE *)(d1 + H(i)); \ - TYPEM m2 = *(TYPEE *)(d2 + H(i)); \ - TYPEM m3 = *(TYPEE *)(d3 + H(i)); \ - FN(env, addr, m1, ra); \ - FN(env, addr + sizeof(TYPEM), m2, ra); \ - FN(env, addr + 2 * sizeof(TYPEM), m3, ra); \ - } \ - i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \ - addr += 3 * sizeof(TYPEM); \ - } while (i & 15); \ - } \ -} +DO_ST_TLB(st1bb, H1, uint8_t, stb_p, 0, helper_ret_stb_mmu) +DO_ST_TLB(st1bh, H1_2, uint16_t, stb_p, 0, helper_ret_stb_mmu) +DO_ST_TLB(st1bs, H1_4, uint32_t, stb_p, 0, helper_ret_stb_mmu) +DO_ST_TLB(st1bd, , uint64_t, stb_p, 0, helper_ret_stb_mmu) -#define DO_ST4(NAME, FN, TYPEE, TYPEM, H) \ -void HELPER(NAME)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc); \ - intptr_t ra = GETPC(); \ - unsigned rd = simd_data(desc); \ - void *d1 = &env->vfp.zregs[rd]; \ - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \ - void *d3 = &env->vfp.zregs[(rd + 2) & 31]; \ - void *d4 = &env->vfp.zregs[(rd + 3) & 31]; \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - if (pg & 1) { \ - TYPEM m1 = *(TYPEE *)(d1 + H(i)); \ - TYPEM m2 = *(TYPEE *)(d2 + H(i)); \ - TYPEM m3 = *(TYPEE *)(d3 + H(i)); \ - TYPEM m4 = *(TYPEE *)(d4 + H(i)); \ - FN(env, addr, m1, ra); \ - FN(env, addr + sizeof(TYPEM), m2, ra); \ - FN(env, addr + 2 * sizeof(TYPEM), m3, ra); \ - FN(env, addr + 3 * sizeof(TYPEM), m4, ra); \ - } \ - i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \ - addr += 4 * sizeof(TYPEM); \ - } while (i & 15); \ - } \ -} +DO_ST_TLB(st1hh_le, H1_2, uint16_t, stw_le_p, MO_LE, helper_le_stw_mmu) +DO_ST_TLB(st1hs_le, H1_4, uint32_t, stw_le_p, MO_LE, helper_le_stw_mmu) +DO_ST_TLB(st1hd_le, , uint64_t, stw_le_p, MO_LE, helper_le_stw_mmu) -DO_ST1(sve_st1bh_r, cpu_stb_data_ra, uint16_t, uint8_t, H1_2) -DO_ST1(sve_st1bs_r, cpu_stb_data_ra, uint32_t, uint8_t, H1_4) -DO_ST1_D(sve_st1bd_r, cpu_stb_data_ra, uint8_t) +DO_ST_TLB(st1ss_le, H1_4, uint32_t, stl_le_p, MO_LE, helper_le_stl_mmu) +DO_ST_TLB(st1sd_le, , uint64_t, stl_le_p, MO_LE, helper_le_stl_mmu) -DO_ST1(sve_st1hs_r, cpu_stw_data_ra, uint32_t, uint16_t, H1_4) -DO_ST1_D(sve_st1hd_r, cpu_stw_data_ra, uint16_t) +DO_ST_TLB(st1dd_le, , uint64_t, stq_le_p, MO_LE, helper_le_stq_mmu) -DO_ST1_D(sve_st1sd_r, cpu_stl_data_ra, uint32_t) +DO_ST_TLB(st1hh_be, H1_2, uint16_t, stw_be_p, MO_BE, helper_be_stw_mmu) +DO_ST_TLB(st1hs_be, H1_4, uint32_t, stw_be_p, MO_BE, helper_be_stw_mmu) +DO_ST_TLB(st1hd_be, , uint64_t, stw_be_p, MO_BE, helper_be_stw_mmu) -DO_ST1(sve_st1bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1) -DO_ST2(sve_st2bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1) -DO_ST3(sve_st3bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1) -DO_ST4(sve_st4bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1) +DO_ST_TLB(st1ss_be, H1_4, uint32_t, stl_be_p, MO_BE, helper_be_stl_mmu) +DO_ST_TLB(st1sd_be, , uint64_t, stl_be_p, MO_BE, helper_be_stl_mmu) -DO_ST1(sve_st1hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2) -DO_ST2(sve_st2hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2) -DO_ST3(sve_st3hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2) -DO_ST4(sve_st4hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2) +DO_ST_TLB(st1dd_be, , uint64_t, stq_be_p, MO_BE, helper_be_stq_mmu) -DO_ST1(sve_st1ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4) -DO_ST2(sve_st2ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4) -DO_ST3(sve_st3ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4) -DO_ST4(sve_st4ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4) +#undef DO_ST_TLB -DO_ST1_D(sve_st1dd_r, cpu_stq_data_ra, uint64_t) +/* + * Common helpers for all contiguous 1,2,3,4-register predicated stores. + */ +static void sve_st1_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, const uintptr_t ra, + const int esize, const int msize, + sve_st1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + void *vd = &env->vfp.zregs[rd]; -void HELPER(sve_st2dd_r)(CPUARMState *env, void *vg, - target_ulong addr, uint32_t desc) + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, vd, i, addr, oi, ra); + } + i += esize, pg >>= esize; + addr += msize; + } while (i & 15); + } + set_helper_retaddr(0); +} + +static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, const uintptr_t ra, + const int esize, const int msize, + sve_st1_tlb_fn *tlb_fn) { - intptr_t i, oprsz = simd_oprsz(desc) / 8; - intptr_t ra = GETPC(); - unsigned rd = simd_data(desc); - uint64_t *d1 = &env->vfp.zregs[rd].d[0]; - uint64_t *d2 = &env->vfp.zregs[(rd + 1) & 31].d[0]; - uint8_t *pg = vg; + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + void *d1 = &env->vfp.zregs[rd]; + void *d2 = &env->vfp.zregs[(rd + 1) & 31]; - for (i = 0; i < oprsz; i += 1) { - if (pg[H1(i)] & 1) { - cpu_stq_data_ra(env, addr, d1[i], ra); - cpu_stq_data_ra(env, addr + 8, d2[i], ra); - } - addr += 2 * 8; + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, d1, i, addr, oi, ra); + tlb_fn(env, d2, i, addr + msize, oi, ra); + } + i += esize, pg >>= esize; + addr += 2 * msize; + } while (i & 15); } + set_helper_retaddr(0); } -void HELPER(sve_st3dd_r)(CPUARMState *env, void *vg, - target_ulong addr, uint32_t desc) +static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, const uintptr_t ra, + const int esize, const int msize, + sve_st1_tlb_fn *tlb_fn) { - intptr_t i, oprsz = simd_oprsz(desc) / 8; - intptr_t ra = GETPC(); - unsigned rd = simd_data(desc); - uint64_t *d1 = &env->vfp.zregs[rd].d[0]; - uint64_t *d2 = &env->vfp.zregs[(rd + 1) & 31].d[0]; - uint64_t *d3 = &env->vfp.zregs[(rd + 2) & 31].d[0]; - uint8_t *pg = vg; + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + void *d1 = &env->vfp.zregs[rd]; + void *d2 = &env->vfp.zregs[(rd + 1) & 31]; + void *d3 = &env->vfp.zregs[(rd + 2) & 31]; - for (i = 0; i < oprsz; i += 1) { - if (pg[H1(i)] & 1) { - cpu_stq_data_ra(env, addr, d1[i], ra); - cpu_stq_data_ra(env, addr + 8, d2[i], ra); - cpu_stq_data_ra(env, addr + 16, d3[i], ra); - } - addr += 3 * 8; + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, d1, i, addr, oi, ra); + tlb_fn(env, d2, i, addr + msize, oi, ra); + tlb_fn(env, d3, i, addr + 2 * msize, oi, ra); + } + i += esize, pg >>= esize; + addr += 3 * msize; + } while (i & 15); } + set_helper_retaddr(0); } -void HELPER(sve_st4dd_r)(CPUARMState *env, void *vg, - target_ulong addr, uint32_t desc) +static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, const uintptr_t ra, + const int esize, const int msize, + sve_st1_tlb_fn *tlb_fn) { - intptr_t i, oprsz = simd_oprsz(desc) / 8; - intptr_t ra = GETPC(); - unsigned rd = simd_data(desc); - uint64_t *d1 = &env->vfp.zregs[rd].d[0]; - uint64_t *d2 = &env->vfp.zregs[(rd + 1) & 31].d[0]; - uint64_t *d3 = &env->vfp.zregs[(rd + 2) & 31].d[0]; - uint64_t *d4 = &env->vfp.zregs[(rd + 3) & 31].d[0]; - uint8_t *pg = vg; + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + void *d1 = &env->vfp.zregs[rd]; + void *d2 = &env->vfp.zregs[(rd + 1) & 31]; + void *d3 = &env->vfp.zregs[(rd + 2) & 31]; + void *d4 = &env->vfp.zregs[(rd + 3) & 31]; - for (i = 0; i < oprsz; i += 1) { - if (pg[H1(i)] & 1) { - cpu_stq_data_ra(env, addr, d1[i], ra); - cpu_stq_data_ra(env, addr + 8, d2[i], ra); - cpu_stq_data_ra(env, addr + 16, d3[i], ra); - cpu_stq_data_ra(env, addr + 24, d4[i], ra); - } - addr += 4 * 8; + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, d1, i, addr, oi, ra); + tlb_fn(env, d2, i, addr + msize, oi, ra); + tlb_fn(env, d3, i, addr + 2 * msize, oi, ra); + tlb_fn(env, d4, i, addr + 3 * msize, oi, ra); + } + i += esize, pg >>= esize; + addr += 4 * msize; + } while (i & 15); } + set_helper_retaddr(0); } -/* Loads with a vector index. */ +#define DO_STN_1(N, NAME, ESIZE) \ +void __attribute__((flatten)) HELPER(sve_st##N##NAME##_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, 1, \ + sve_st1##NAME##_tlb); \ +} -#define DO_LD1_ZPZ_S(NAME, TYPEI, TYPEM, FN) \ -void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc); \ - unsigned scale = simd_data(desc); \ - uintptr_t ra = GETPC(); \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - TYPEM m = 0; \ - if (pg & 1) { \ - target_ulong off = *(TYPEI *)(vm + H1_4(i)); \ - m = FN(env, base + (off << scale), ra); \ - } \ - *(uint32_t *)(vd + H1_4(i)) = m; \ - i += 4, pg >>= 4; \ - } while (i & 15); \ - } \ +#define DO_STN_2(N, NAME, ESIZE, MSIZE) \ +void __attribute__((flatten)) HELPER(sve_st##N##NAME##_le_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE, \ + sve_st1##NAME##_le_tlb); \ +} \ +void __attribute__((flatten)) HELPER(sve_st##N##NAME##_be_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE, \ + sve_st1##NAME##_be_tlb); \ +} + +DO_STN_1(1, bb, 1) +DO_STN_1(1, bh, 2) +DO_STN_1(1, bs, 4) +DO_STN_1(1, bd, 8) +DO_STN_1(2, bb, 1) +DO_STN_1(3, bb, 1) +DO_STN_1(4, bb, 1) + +DO_STN_2(1, hh, 2, 2) +DO_STN_2(1, hs, 4, 2) +DO_STN_2(1, hd, 8, 2) +DO_STN_2(2, hh, 2, 2) +DO_STN_2(3, hh, 2, 2) +DO_STN_2(4, hh, 2, 2) + +DO_STN_2(1, ss, 4, 4) +DO_STN_2(1, sd, 8, 4) +DO_STN_2(2, ss, 4, 4) +DO_STN_2(3, ss, 4, 4) +DO_STN_2(4, ss, 4, 4) + +DO_STN_2(1, dd, 8, 8) +DO_STN_2(2, dd, 8, 8) +DO_STN_2(3, dd, 8, 8) +DO_STN_2(4, dd, 8, 8) + +#undef DO_STN_1 +#undef DO_STN_2 + +/* + * Loads with a vector index. + */ + +/* + * Load the element at @reg + @reg_ofs, sign or zero-extend as needed. + */ +typedef target_ulong zreg_off_fn(void *reg, intptr_t reg_ofs); + +static target_ulong off_zsu_s(void *reg, intptr_t reg_ofs) +{ + return *(uint32_t *)(reg + H1_4(reg_ofs)); } -#define DO_LD1_ZPZ_D(NAME, TYPEI, TYPEM, FN) \ -void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc) / 8; \ - unsigned scale = simd_data(desc); \ - uintptr_t ra = GETPC(); \ - uint64_t *d = vd, *m = vm; uint8_t *pg = vg; \ - for (i = 0; i < oprsz; i++) { \ - TYPEM mm = 0; \ - if (pg[H1(i)] & 1) { \ - target_ulong off = (TYPEI)m[i]; \ - mm = FN(env, base + (off << scale), ra); \ - } \ - d[i] = mm; \ - } \ +static target_ulong off_zss_s(void *reg, intptr_t reg_ofs) +{ + return *(int32_t *)(reg + H1_4(reg_ofs)); } -DO_LD1_ZPZ_S(sve_ldbsu_zsu, uint32_t, uint8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_S(sve_ldhsu_zsu, uint32_t, uint16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_S(sve_ldssu_zsu, uint32_t, uint32_t, cpu_ldl_data_ra) -DO_LD1_ZPZ_S(sve_ldbss_zsu, uint32_t, int8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_S(sve_ldhss_zsu, uint32_t, int16_t, cpu_lduw_data_ra) - -DO_LD1_ZPZ_S(sve_ldbsu_zss, int32_t, uint8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_S(sve_ldhsu_zss, int32_t, uint16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_S(sve_ldssu_zss, int32_t, uint32_t, cpu_ldl_data_ra) -DO_LD1_ZPZ_S(sve_ldbss_zss, int32_t, int8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_S(sve_ldhss_zss, int32_t, int16_t, cpu_lduw_data_ra) - -DO_LD1_ZPZ_D(sve_ldbdu_zsu, uint32_t, uint8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_D(sve_ldhdu_zsu, uint32_t, uint16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_D(sve_ldsdu_zsu, uint32_t, uint32_t, cpu_ldl_data_ra) -DO_LD1_ZPZ_D(sve_ldddu_zsu, uint32_t, uint64_t, cpu_ldq_data_ra) -DO_LD1_ZPZ_D(sve_ldbds_zsu, uint32_t, int8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_D(sve_ldhds_zsu, uint32_t, int16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_D(sve_ldsds_zsu, uint32_t, int32_t, cpu_ldl_data_ra) - -DO_LD1_ZPZ_D(sve_ldbdu_zss, int32_t, uint8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_D(sve_ldhdu_zss, int32_t, uint16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_D(sve_ldsdu_zss, int32_t, uint32_t, cpu_ldl_data_ra) -DO_LD1_ZPZ_D(sve_ldddu_zss, int32_t, uint64_t, cpu_ldq_data_ra) -DO_LD1_ZPZ_D(sve_ldbds_zss, int32_t, int8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_D(sve_ldhds_zss, int32_t, int16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_D(sve_ldsds_zss, int32_t, int32_t, cpu_ldl_data_ra) - -DO_LD1_ZPZ_D(sve_ldbdu_zd, uint64_t, uint8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_D(sve_ldhdu_zd, uint64_t, uint16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_D(sve_ldsdu_zd, uint64_t, uint32_t, cpu_ldl_data_ra) -DO_LD1_ZPZ_D(sve_ldddu_zd, uint64_t, uint64_t, cpu_ldq_data_ra) -DO_LD1_ZPZ_D(sve_ldbds_zd, uint64_t, int8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_D(sve_ldhds_zd, uint64_t, int16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_D(sve_ldsds_zd, uint64_t, int32_t, cpu_ldl_data_ra) +static target_ulong off_zsu_d(void *reg, intptr_t reg_ofs) +{ + return (uint32_t)*(uint64_t *)(reg + reg_ofs); +} -/* First fault loads with a vector index. */ +static target_ulong off_zss_d(void *reg, intptr_t reg_ofs) +{ + return (int32_t)*(uint64_t *)(reg + reg_ofs); +} -#ifdef CONFIG_USER_ONLY +static target_ulong off_zd_d(void *reg, intptr_t reg_ofs) +{ + return *(uint64_t *)(reg + reg_ofs); +} -#define DO_LDFF1_ZPZ(NAME, TYPEE, TYPEI, TYPEM, FN, H) \ -void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc); \ - unsigned scale = simd_data(desc); \ - uintptr_t ra = GETPC(); \ - bool first = true; \ - mmap_lock(); \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - TYPEM m = 0; \ - if (pg & 1) { \ - target_ulong off = *(TYPEI *)(vm + H(i)); \ - target_ulong addr = base + (off << scale); \ - if (!first && \ - page_check_range(addr, sizeof(TYPEM), PAGE_READ)) { \ - record_fault(env, i, oprsz); \ - goto exit; \ - } \ - m = FN(env, addr, ra); \ - first = false; \ - } \ - *(TYPEE *)(vd + H(i)) = m; \ - i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \ - } while (i & 15); \ - } \ - exit: \ - mmap_unlock(); \ +static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t i, oprsz = simd_oprsz(desc); + ARMVectorReg scratch = { }; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + do { + if (likely(pg & 1)) { + target_ulong off = off_fn(vm, i); + tlb_fn(env, &scratch, i, base + (off << scale), oi, ra); + } + i += 4, pg >>= 4; + } while (i & 15); + } + set_helper_retaddr(0); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(vd, &scratch, oprsz); } +static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t i, oprsz = simd_oprsz(desc) / 8; + ARMVectorReg scratch = { }; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; i++) { + uint8_t pg = *(uint8_t *)(vg + H1(i)); + if (likely(pg & 1)) { + target_ulong off = off_fn(vm, i * 8); + tlb_fn(env, &scratch, i * 8, base + (off << scale), oi, ra); + } + } + set_helper_retaddr(0); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(vd, &scratch, oprsz * 8); +} + +#define DO_LD1_ZPZ_S(MEM, OFS) \ +void __attribute__((flatten)) HELPER(sve_ld##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_ld1_zs(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_s, sve_ld1##MEM##_tlb); \ +} + +#define DO_LD1_ZPZ_D(MEM, OFS) \ +void __attribute__((flatten)) HELPER(sve_ld##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_ld1_zd(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_d, sve_ld1##MEM##_tlb); \ +} + +DO_LD1_ZPZ_S(bsu, zsu) +DO_LD1_ZPZ_S(bsu, zss) +DO_LD1_ZPZ_D(bdu, zsu) +DO_LD1_ZPZ_D(bdu, zss) +DO_LD1_ZPZ_D(bdu, zd) + +DO_LD1_ZPZ_S(bss, zsu) +DO_LD1_ZPZ_S(bss, zss) +DO_LD1_ZPZ_D(bds, zsu) +DO_LD1_ZPZ_D(bds, zss) +DO_LD1_ZPZ_D(bds, zd) + +DO_LD1_ZPZ_S(hsu_le, zsu) +DO_LD1_ZPZ_S(hsu_le, zss) +DO_LD1_ZPZ_D(hdu_le, zsu) +DO_LD1_ZPZ_D(hdu_le, zss) +DO_LD1_ZPZ_D(hdu_le, zd) + +DO_LD1_ZPZ_S(hsu_be, zsu) +DO_LD1_ZPZ_S(hsu_be, zss) +DO_LD1_ZPZ_D(hdu_be, zsu) +DO_LD1_ZPZ_D(hdu_be, zss) +DO_LD1_ZPZ_D(hdu_be, zd) + +DO_LD1_ZPZ_S(hss_le, zsu) +DO_LD1_ZPZ_S(hss_le, zss) +DO_LD1_ZPZ_D(hds_le, zsu) +DO_LD1_ZPZ_D(hds_le, zss) +DO_LD1_ZPZ_D(hds_le, zd) + +DO_LD1_ZPZ_S(hss_be, zsu) +DO_LD1_ZPZ_S(hss_be, zss) +DO_LD1_ZPZ_D(hds_be, zsu) +DO_LD1_ZPZ_D(hds_be, zss) +DO_LD1_ZPZ_D(hds_be, zd) + +DO_LD1_ZPZ_S(ss_le, zsu) +DO_LD1_ZPZ_S(ss_le, zss) +DO_LD1_ZPZ_D(sdu_le, zsu) +DO_LD1_ZPZ_D(sdu_le, zss) +DO_LD1_ZPZ_D(sdu_le, zd) + +DO_LD1_ZPZ_S(ss_be, zsu) +DO_LD1_ZPZ_S(ss_be, zss) +DO_LD1_ZPZ_D(sdu_be, zsu) +DO_LD1_ZPZ_D(sdu_be, zss) +DO_LD1_ZPZ_D(sdu_be, zd) + +DO_LD1_ZPZ_D(sds_le, zsu) +DO_LD1_ZPZ_D(sds_le, zss) +DO_LD1_ZPZ_D(sds_le, zd) + +DO_LD1_ZPZ_D(sds_be, zsu) +DO_LD1_ZPZ_D(sds_be, zss) +DO_LD1_ZPZ_D(sds_be, zd) + +DO_LD1_ZPZ_D(dd_le, zsu) +DO_LD1_ZPZ_D(dd_le, zss) +DO_LD1_ZPZ_D(dd_le, zd) + +DO_LD1_ZPZ_D(dd_be, zsu) +DO_LD1_ZPZ_D(dd_be, zss) +DO_LD1_ZPZ_D(dd_be, zd) + +#undef DO_LD1_ZPZ_S +#undef DO_LD1_ZPZ_D + +/* First fault loads with a vector index. */ + +/* Load one element into VD+REG_OFF from (ENV,VADDR) without faulting. + * The controlling predicate is known to be true. Return true if the + * load was successful. + */ +typedef bool sve_ld1_nf_fn(CPUARMState *env, void *vd, intptr_t reg_off, + target_ulong vaddr, int mmu_idx); + +#ifdef CONFIG_SOFTMMU +#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \ +static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, int mmu_idx) \ +{ \ + target_ulong next_page = -(addr | TARGET_PAGE_MASK); \ + if (likely(next_page - addr >= sizeof(TYPEM))) { \ + void *host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx); \ + if (likely(host)) { \ + TYPEM val = HOST(host); \ + *(TYPEE *)(vd + H(reg_off)) = val; \ + return true; \ + } \ + } \ + return false; \ +} #else +#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \ +static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, int mmu_idx) \ +{ \ + if (likely(page_check_range(addr, sizeof(TYPEM), PAGE_READ))) { \ + TYPEM val = HOST(g2h(addr)); \ + *(TYPEE *)(vd + H(reg_off)) = val; \ + return true; \ + } \ + return false; \ +} +#endif -#define DO_LDFF1_ZPZ(NAME, TYPEE, TYPEI, TYPEM, FN, H) \ -void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - g_assert_not_reached(); \ +DO_LD_NF(bsu, H1_4, uint32_t, uint8_t, ldub_p) +DO_LD_NF(bss, H1_4, uint32_t, int8_t, ldsb_p) +DO_LD_NF(bdu, , uint64_t, uint8_t, ldub_p) +DO_LD_NF(bds, , uint64_t, int8_t, ldsb_p) + +DO_LD_NF(hsu_le, H1_4, uint32_t, uint16_t, lduw_le_p) +DO_LD_NF(hss_le, H1_4, uint32_t, int16_t, ldsw_le_p) +DO_LD_NF(hsu_be, H1_4, uint32_t, uint16_t, lduw_be_p) +DO_LD_NF(hss_be, H1_4, uint32_t, int16_t, ldsw_be_p) +DO_LD_NF(hdu_le, , uint64_t, uint16_t, lduw_le_p) +DO_LD_NF(hds_le, , uint64_t, int16_t, ldsw_le_p) +DO_LD_NF(hdu_be, , uint64_t, uint16_t, lduw_be_p) +DO_LD_NF(hds_be, , uint64_t, int16_t, ldsw_be_p) + +DO_LD_NF(ss_le, H1_4, uint32_t, uint32_t, ldl_le_p) +DO_LD_NF(ss_be, H1_4, uint32_t, uint32_t, ldl_be_p) +DO_LD_NF(sdu_le, , uint64_t, uint32_t, ldl_le_p) +DO_LD_NF(sds_le, , uint64_t, int32_t, ldl_le_p) +DO_LD_NF(sdu_be, , uint64_t, uint32_t, ldl_be_p) +DO_LD_NF(sds_be, , uint64_t, int32_t, ldl_be_p) + +DO_LD_NF(dd_le, , uint64_t, uint64_t, ldq_le_p) +DO_LD_NF(dd_be, , uint64_t, uint64_t, ldq_be_p) + +/* + * Common helper for all gather first-faulting loads. + */ +static inline void sve_ldff1_zs(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn, + sve_ld1_nf_fn *nonfault_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int mmu_idx = get_mmuidx(oi); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t reg_off, reg_max = simd_oprsz(desc); + target_ulong addr; + + /* Skip to the first true predicate. */ + reg_off = find_next_active(vg, 0, reg_max, MO_32); + if (likely(reg_off < reg_max)) { + /* Perform one normal read, which will fault or not. */ + set_helper_retaddr(ra); + addr = off_fn(vm, reg_off); + addr = base + (addr << scale); + tlb_fn(env, vd, reg_off, addr, oi, ra); + + /* The rest of the reads will be non-faulting. */ + set_helper_retaddr(0); + } + + /* After any fault, zero the leading predicated false elements. */ + swap_memzero(vd, reg_off); + + while (likely((reg_off += 4) < reg_max)) { + uint64_t pg = *(uint64_t *)(vg + (reg_off >> 6) * 8); + if (likely((pg >> (reg_off & 63)) & 1)) { + addr = off_fn(vm, reg_off); + addr = base + (addr << scale); + if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) { + record_fault(env, reg_off, reg_max); + break; + } + } else { + *(uint32_t *)(vd + H1_4(reg_off)) = 0; + } + } } -#endif +static inline void sve_ldff1_zd(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn, + sve_ld1_nf_fn *nonfault_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int mmu_idx = get_mmuidx(oi); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t reg_off, reg_max = simd_oprsz(desc); + target_ulong addr; + + /* Skip to the first true predicate. */ + reg_off = find_next_active(vg, 0, reg_max, MO_64); + if (likely(reg_off < reg_max)) { + /* Perform one normal read, which will fault or not. */ + set_helper_retaddr(ra); + addr = off_fn(vm, reg_off); + addr = base + (addr << scale); + tlb_fn(env, vd, reg_off, addr, oi, ra); + + /* The rest of the reads will be non-faulting. */ + set_helper_retaddr(0); + } -#define DO_LDFF1_ZPZ_S(NAME, TYPEI, TYPEM, FN) \ - DO_LDFF1_ZPZ(NAME, uint32_t, TYPEI, TYPEM, FN, H1_4) -#define DO_LDFF1_ZPZ_D(NAME, TYPEI, TYPEM, FN) \ - DO_LDFF1_ZPZ(NAME, uint64_t, TYPEI, TYPEM, FN, ) - -DO_LDFF1_ZPZ_S(sve_ldffbsu_zsu, uint32_t, uint8_t, cpu_ldub_data_ra) -DO_LDFF1_ZPZ_S(sve_ldffhsu_zsu, uint32_t, uint16_t, cpu_lduw_data_ra) -DO_LDFF1_ZPZ_S(sve_ldffssu_zsu, uint32_t, uint32_t, cpu_ldl_data_ra) -DO_LDFF1_ZPZ_S(sve_ldffbss_zsu, uint32_t, int8_t, cpu_ldub_data_ra) -DO_LDFF1_ZPZ_S(sve_ldffhss_zsu, uint32_t, int16_t, cpu_lduw_data_ra) - -DO_LDFF1_ZPZ_S(sve_ldffbsu_zss, int32_t, uint8_t, cpu_ldub_data_ra) -DO_LDFF1_ZPZ_S(sve_ldffhsu_zss, int32_t, uint16_t, cpu_lduw_data_ra) -DO_LDFF1_ZPZ_S(sve_ldffssu_zss, int32_t, uint32_t, cpu_ldl_data_ra) -DO_LDFF1_ZPZ_S(sve_ldffbss_zss, int32_t, int8_t, cpu_ldub_data_ra) -DO_LDFF1_ZPZ_S(sve_ldffhss_zss, int32_t, int16_t, cpu_lduw_data_ra) - -DO_LDFF1_ZPZ_D(sve_ldffbdu_zsu, uint32_t, uint8_t, cpu_ldub_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffhdu_zsu, uint32_t, uint16_t, cpu_lduw_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffsdu_zsu, uint32_t, uint32_t, cpu_ldl_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffddu_zsu, uint32_t, uint64_t, cpu_ldq_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffbds_zsu, uint32_t, int8_t, cpu_ldub_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffhds_zsu, uint32_t, int16_t, cpu_lduw_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffsds_zsu, uint32_t, int32_t, cpu_ldl_data_ra) - -DO_LDFF1_ZPZ_D(sve_ldffbdu_zss, int32_t, uint8_t, cpu_ldub_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffhdu_zss, int32_t, uint16_t, cpu_lduw_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffsdu_zss, int32_t, uint32_t, cpu_ldl_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffddu_zss, int32_t, uint64_t, cpu_ldq_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffbds_zss, int32_t, int8_t, cpu_ldub_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffhds_zss, int32_t, int16_t, cpu_lduw_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffsds_zss, int32_t, int32_t, cpu_ldl_data_ra) - -DO_LDFF1_ZPZ_D(sve_ldffbdu_zd, uint64_t, uint8_t, cpu_ldub_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffhdu_zd, uint64_t, uint16_t, cpu_lduw_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffsdu_zd, uint64_t, uint32_t, cpu_ldl_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffddu_zd, uint64_t, uint64_t, cpu_ldq_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffbds_zd, uint64_t, int8_t, cpu_ldub_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffhds_zd, uint64_t, int16_t, cpu_lduw_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffsds_zd, uint64_t, int32_t, cpu_ldl_data_ra) + /* After any fault, zero the leading predicated false elements. */ + swap_memzero(vd, reg_off); -/* Stores with a vector index. */ + while (likely((reg_off += 8) < reg_max)) { + uint8_t pg = *(uint8_t *)(vg + H1(reg_off >> 3)); + if (likely(pg & 1)) { + addr = off_fn(vm, reg_off); + addr = base + (addr << scale); + if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) { + record_fault(env, reg_off, reg_max); + break; + } + } else { + *(uint64_t *)(vd + reg_off) = 0; + } + } +} -#define DO_ST1_ZPZ_S(NAME, TYPEI, FN) \ -void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ +#define DO_LDFF1_ZPZ_S(MEM, OFS) \ +void HELPER(sve_ldff##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ { \ - intptr_t i, oprsz = simd_oprsz(desc); \ - unsigned scale = simd_data(desc); \ - uintptr_t ra = GETPC(); \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - if (likely(pg & 1)) { \ - target_ulong off = *(TYPEI *)(vm + H1_4(i)); \ - uint32_t d = *(uint32_t *)(vd + H1_4(i)); \ - FN(env, base + (off << scale), d, ra); \ - } \ - i += sizeof(uint32_t), pg >>= sizeof(uint32_t); \ - } while (i & 15); \ - } \ + sve_ldff1_zs(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_s, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf); \ } -#define DO_ST1_ZPZ_D(NAME, TYPEI, FN) \ -void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ +#define DO_LDFF1_ZPZ_D(MEM, OFS) \ +void HELPER(sve_ldff##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ { \ - intptr_t i, oprsz = simd_oprsz(desc) / 8; \ - unsigned scale = simd_data(desc); \ - uintptr_t ra = GETPC(); \ - uint64_t *d = vd, *m = vm; uint8_t *pg = vg; \ - for (i = 0; i < oprsz; i++) { \ - if (likely(pg[H1(i)] & 1)) { \ - target_ulong off = (target_ulong)(TYPEI)m[i] << scale; \ - FN(env, base + off, d[i], ra); \ - } \ - } \ -} + sve_ldff1_zd(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_d, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf); \ +} + +DO_LDFF1_ZPZ_S(bsu, zsu) +DO_LDFF1_ZPZ_S(bsu, zss) +DO_LDFF1_ZPZ_D(bdu, zsu) +DO_LDFF1_ZPZ_D(bdu, zss) +DO_LDFF1_ZPZ_D(bdu, zd) + +DO_LDFF1_ZPZ_S(bss, zsu) +DO_LDFF1_ZPZ_S(bss, zss) +DO_LDFF1_ZPZ_D(bds, zsu) +DO_LDFF1_ZPZ_D(bds, zss) +DO_LDFF1_ZPZ_D(bds, zd) + +DO_LDFF1_ZPZ_S(hsu_le, zsu) +DO_LDFF1_ZPZ_S(hsu_le, zss) +DO_LDFF1_ZPZ_D(hdu_le, zsu) +DO_LDFF1_ZPZ_D(hdu_le, zss) +DO_LDFF1_ZPZ_D(hdu_le, zd) + +DO_LDFF1_ZPZ_S(hsu_be, zsu) +DO_LDFF1_ZPZ_S(hsu_be, zss) +DO_LDFF1_ZPZ_D(hdu_be, zsu) +DO_LDFF1_ZPZ_D(hdu_be, zss) +DO_LDFF1_ZPZ_D(hdu_be, zd) + +DO_LDFF1_ZPZ_S(hss_le, zsu) +DO_LDFF1_ZPZ_S(hss_le, zss) +DO_LDFF1_ZPZ_D(hds_le, zsu) +DO_LDFF1_ZPZ_D(hds_le, zss) +DO_LDFF1_ZPZ_D(hds_le, zd) + +DO_LDFF1_ZPZ_S(hss_be, zsu) +DO_LDFF1_ZPZ_S(hss_be, zss) +DO_LDFF1_ZPZ_D(hds_be, zsu) +DO_LDFF1_ZPZ_D(hds_be, zss) +DO_LDFF1_ZPZ_D(hds_be, zd) + +DO_LDFF1_ZPZ_S(ss_le, zsu) +DO_LDFF1_ZPZ_S(ss_le, zss) +DO_LDFF1_ZPZ_D(sdu_le, zsu) +DO_LDFF1_ZPZ_D(sdu_le, zss) +DO_LDFF1_ZPZ_D(sdu_le, zd) + +DO_LDFF1_ZPZ_S(ss_be, zsu) +DO_LDFF1_ZPZ_S(ss_be, zss) +DO_LDFF1_ZPZ_D(sdu_be, zsu) +DO_LDFF1_ZPZ_D(sdu_be, zss) +DO_LDFF1_ZPZ_D(sdu_be, zd) + +DO_LDFF1_ZPZ_D(sds_le, zsu) +DO_LDFF1_ZPZ_D(sds_le, zss) +DO_LDFF1_ZPZ_D(sds_le, zd) + +DO_LDFF1_ZPZ_D(sds_be, zsu) +DO_LDFF1_ZPZ_D(sds_be, zss) +DO_LDFF1_ZPZ_D(sds_be, zd) + +DO_LDFF1_ZPZ_D(dd_le, zsu) +DO_LDFF1_ZPZ_D(dd_le, zss) +DO_LDFF1_ZPZ_D(dd_le, zd) + +DO_LDFF1_ZPZ_D(dd_be, zsu) +DO_LDFF1_ZPZ_D(dd_be, zss) +DO_LDFF1_ZPZ_D(dd_be, zd) -DO_ST1_ZPZ_S(sve_stbs_zsu, uint32_t, cpu_stb_data_ra) -DO_ST1_ZPZ_S(sve_sths_zsu, uint32_t, cpu_stw_data_ra) -DO_ST1_ZPZ_S(sve_stss_zsu, uint32_t, cpu_stl_data_ra) +/* Stores with a vector index. */ -DO_ST1_ZPZ_S(sve_stbs_zss, int32_t, cpu_stb_data_ra) -DO_ST1_ZPZ_S(sve_sths_zss, int32_t, cpu_stw_data_ra) -DO_ST1_ZPZ_S(sve_stss_zss, int32_t, cpu_stl_data_ra) +static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t i, oprsz = simd_oprsz(desc); -DO_ST1_ZPZ_D(sve_stbd_zsu, uint32_t, cpu_stb_data_ra) -DO_ST1_ZPZ_D(sve_sthd_zsu, uint32_t, cpu_stw_data_ra) -DO_ST1_ZPZ_D(sve_stsd_zsu, uint32_t, cpu_stl_data_ra) -DO_ST1_ZPZ_D(sve_stdd_zsu, uint32_t, cpu_stq_data_ra) + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + do { + if (likely(pg & 1)) { + target_ulong off = off_fn(vm, i); + tlb_fn(env, vd, i, base + (off << scale), oi, ra); + } + i += 4, pg >>= 4; + } while (i & 15); + } + set_helper_retaddr(0); +} -DO_ST1_ZPZ_D(sve_stbd_zss, int32_t, cpu_stb_data_ra) -DO_ST1_ZPZ_D(sve_sthd_zss, int32_t, cpu_stw_data_ra) -DO_ST1_ZPZ_D(sve_stsd_zss, int32_t, cpu_stl_data_ra) -DO_ST1_ZPZ_D(sve_stdd_zss, int32_t, cpu_stq_data_ra) +static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t i, oprsz = simd_oprsz(desc) / 8; -DO_ST1_ZPZ_D(sve_stbd_zd, uint64_t, cpu_stb_data_ra) -DO_ST1_ZPZ_D(sve_sthd_zd, uint64_t, cpu_stw_data_ra) -DO_ST1_ZPZ_D(sve_stsd_zd, uint64_t, cpu_stl_data_ra) -DO_ST1_ZPZ_D(sve_stdd_zd, uint64_t, cpu_stq_data_ra) + set_helper_retaddr(ra); + for (i = 0; i < oprsz; i++) { + uint8_t pg = *(uint8_t *)(vg + H1(i)); + if (likely(pg & 1)) { + target_ulong off = off_fn(vm, i * 8); + tlb_fn(env, vd, i * 8, base + (off << scale), oi, ra); + } + } + set_helper_retaddr(0); +} + +#define DO_ST1_ZPZ_S(MEM, OFS) \ +void __attribute__((flatten)) HELPER(sve_st##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_st1_zs(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_s, sve_st1##MEM##_tlb); \ +} + +#define DO_ST1_ZPZ_D(MEM, OFS) \ +void __attribute__((flatten)) HELPER(sve_st##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_st1_zd(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_d, sve_st1##MEM##_tlb); \ +} + +DO_ST1_ZPZ_S(bs, zsu) +DO_ST1_ZPZ_S(hs_le, zsu) +DO_ST1_ZPZ_S(hs_be, zsu) +DO_ST1_ZPZ_S(ss_le, zsu) +DO_ST1_ZPZ_S(ss_be, zsu) + +DO_ST1_ZPZ_S(bs, zss) +DO_ST1_ZPZ_S(hs_le, zss) +DO_ST1_ZPZ_S(hs_be, zss) +DO_ST1_ZPZ_S(ss_le, zss) +DO_ST1_ZPZ_S(ss_be, zss) + +DO_ST1_ZPZ_D(bd, zsu) +DO_ST1_ZPZ_D(hd_le, zsu) +DO_ST1_ZPZ_D(hd_be, zsu) +DO_ST1_ZPZ_D(sd_le, zsu) +DO_ST1_ZPZ_D(sd_be, zsu) +DO_ST1_ZPZ_D(dd_le, zsu) +DO_ST1_ZPZ_D(dd_be, zsu) + +DO_ST1_ZPZ_D(bd, zss) +DO_ST1_ZPZ_D(hd_le, zss) +DO_ST1_ZPZ_D(hd_be, zss) +DO_ST1_ZPZ_D(sd_le, zss) +DO_ST1_ZPZ_D(sd_be, zss) +DO_ST1_ZPZ_D(dd_le, zss) +DO_ST1_ZPZ_D(dd_be, zss) + +DO_ST1_ZPZ_D(bd, zd) +DO_ST1_ZPZ_D(hd_le, zd) +DO_ST1_ZPZ_D(hd_be, zd) +DO_ST1_ZPZ_D(sd_le, zd) +DO_ST1_ZPZ_D(sd_be, zd) +DO_ST1_ZPZ_D(dd_le, zd) +DO_ST1_ZPZ_D(dd_be, zd) + +#undef DO_ST1_ZPZ_S +#undef DO_ST1_ZPZ_D diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 8ca3876707..88195ab949 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -37,6 +37,7 @@ #include "trace-tcg.h" #include "translate-a64.h" +#include "qemu/atomic128.h" static TCGv_i64 cpu_X[32]; static TCGv_i64 cpu_pc; @@ -166,11 +167,15 @@ void aarch64_cpu_dump_state(CPUState *cs, FILE *f, cpu_fprintf(f, "\n"); return; } + if (fp_exception_el(env, el) != 0) { + cpu_fprintf(f, " FPU disabled\n"); + return; + } cpu_fprintf(f, " FPCR=%08x FPSR=%08x\n", vfp_get_fpcr(env), vfp_get_fpsr(env)); - if (arm_feature(env, ARM_FEATURE_SVE)) { - int j, zcr_len = env->vfp.zcr_el[1] & 0xf; /* fix for system mode */ + if (cpu_isar_feature(aa64_sve, cpu) && sve_exception_el(env, el) == 0) { + int j, zcr_len = sve_zcr_len_for_el(env, el); for (i = 0; i <= FFR_PRED_NUM; i++) { bool eol; @@ -1196,25 +1201,23 @@ static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, /* Store from vector register to memory */ static void do_vec_st(DisasContext *s, int srcidx, int element, - TCGv_i64 tcg_addr, int size) + TCGv_i64 tcg_addr, int size, TCGMemOp endian) { - TCGMemOp memop = s->be_data + size; TCGv_i64 tcg_tmp = tcg_temp_new_i64(); read_vec_element(s, tcg_tmp, srcidx, element, size); - tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop); + tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size); tcg_temp_free_i64(tcg_tmp); } /* Load from memory to vector register */ static void do_vec_ld(DisasContext *s, int destidx, int element, - TCGv_i64 tcg_addr, int size) + TCGv_i64 tcg_addr, int size, TCGMemOp endian) { - TCGMemOp memop = s->be_data + size; TCGv_i64 tcg_tmp = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop); + tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size); write_vec_element(s, tcg_tmp, destidx, element, size); tcg_temp_free_i64(tcg_tmp); @@ -2082,26 +2085,27 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, get_mem_index(s), MO_64 | MO_ALIGN | s->be_data); tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); - } else if (s->be_data == MO_LE) { - if (tb_cflags(s->base.tb) & CF_PARALLEL) { + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (!HAVE_CMPXCHG128) { + gen_helper_exit_atomic(cpu_env); + s->base.is_jmp = DISAS_NORETURN; + } else if (s->be_data == MO_LE) { gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env, cpu_exclusive_addr, cpu_reg(s, rt), cpu_reg(s, rt2)); } else { - gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr, - cpu_reg(s, rt), cpu_reg(s, rt2)); - } - } else { - if (tb_cflags(s->base.tb) & CF_PARALLEL) { gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env, cpu_exclusive_addr, cpu_reg(s, rt), cpu_reg(s, rt2)); - } else { - gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr, - cpu_reg(s, rt), cpu_reg(s, rt2)); } + } else if (s->be_data == MO_LE) { + gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr, + cpu_reg(s, rt), cpu_reg(s, rt2)); + } else { + gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr, + cpu_reg(s, rt), cpu_reg(s, rt2)); } } else { tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, @@ -2171,14 +2175,18 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, } tcg_temp_free_i64(cmp); } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { - TCGv_i32 tcg_rs = tcg_const_i32(rs); - - if (s->be_data == MO_LE) { - gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2); + if (HAVE_CMPXCHG128) { + TCGv_i32 tcg_rs = tcg_const_i32(rs); + if (s->be_data == MO_LE) { + gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2); + } else { + gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2); + } + tcg_temp_free_i32(tcg_rs); } else { - gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2); + gen_helper_exit_atomic(cpu_env); + s->base.is_jmp = DISAS_NORETURN; } - tcg_temp_free_i32(tcg_rs); } else { TCGv_i64 d1 = tcg_temp_new_i64(); TCGv_i64 d2 = tcg_temp_new_i64(); @@ -2318,7 +2326,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) } if (rt2 == 31 && ((rt | rs) & 1) == 0 - && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + && dc_isar_feature(aa64_atomics, s)) { /* CASP / CASPL */ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); return; @@ -2340,7 +2348,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) } if (rt2 == 31 && ((rt | rs) & 1) == 0 - && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + && dc_isar_feature(aa64_atomics, s)) { /* CASPA / CASPAL */ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); return; @@ -2351,7 +2359,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) case 0xb: /* CASL */ case 0xe: /* CASA */ case 0xf: /* CASAL */ - if (rt2 == 31 && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) { gen_compare_and_swap(s, rs, rt, rn, size); return; } @@ -2890,11 +2898,10 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, int rs = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int o3_opc = extract32(insn, 12, 4); - int feature = ARM_FEATURE_V8_ATOMICS; TCGv_i64 tcg_rn, tcg_rs; AtomicThreeOpFn *fn; - if (is_vector) { + if (is_vector || !dc_isar_feature(aa64_atomics, s)) { unallocated_encoding(s); return; } @@ -2930,10 +2937,6 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, unallocated_encoding(s); return; } - if (!arm_dc_feature(s, feature)) { - unallocated_encoding(s); - return; - } if (rn == 31) { gen_check_sp_alignment(s); @@ -3013,10 +3016,11 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) bool is_store = !extract32(insn, 22, 1); bool is_postidx = extract32(insn, 23, 1); bool is_q = extract32(insn, 30, 1); - TCGv_i64 tcg_addr, tcg_rn; + TCGv_i64 tcg_addr, tcg_rn, tcg_ebytes; + TCGMemOp endian = s->be_data; - int ebytes = 1 << size; - int elements = (is_q ? 128 : 64) / (8 << size); + int ebytes; /* bytes per element */ + int elements; /* elements per vector */ int rpt; /* num iterations */ int selem; /* structure elements */ int r; @@ -3075,39 +3079,55 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) gen_check_sp_alignment(s); } + /* For our purposes, bytes are always little-endian. */ + if (size == 0) { + endian = MO_LE; + } + + /* Consecutive little-endian elements from a single register + * can be promoted to a larger little-endian operation. + */ + if (selem == 1 && endian == MO_LE) { + size = 3; + } + ebytes = 1 << size; + elements = (is_q ? 16 : 8) / ebytes; + tcg_rn = cpu_reg_sp(s, rn); tcg_addr = tcg_temp_new_i64(); tcg_gen_mov_i64(tcg_addr, tcg_rn); + tcg_ebytes = tcg_const_i64(ebytes); for (r = 0; r < rpt; r++) { int e; for (e = 0; e < elements; e++) { - int tt = (rt + r) % 32; int xs; for (xs = 0; xs < selem; xs++) { + int tt = (rt + r + xs) % 32; if (is_store) { - do_vec_st(s, tt, e, tcg_addr, size); + do_vec_st(s, tt, e, tcg_addr, size, endian); } else { - do_vec_ld(s, tt, e, tcg_addr, size); - - /* For non-quad operations, setting a slice of the low - * 64 bits of the register clears the high 64 bits (in - * the ARM ARM pseudocode this is implicit in the fact - * that 'rval' is a 64 bit wide variable). - * For quad operations, we might still need to zero the - * high bits of SVE. We optimize by noticing that we only - * need to do this the first time we touch a register. - */ - if (e == 0 && (r == 0 || xs == selem - 1)) { - clear_vec_high(s, is_q, tt); - } + do_vec_ld(s, tt, e, tcg_addr, size, endian); } - tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes); - tt = (tt + 1) % 32; + tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_ebytes); } } } + if (!is_store) { + /* For non-quad operations, setting a slice of the low + * 64 bits of the register clears the high 64 bits (in + * the ARM ARM pseudocode this is implicit in the fact + * that 'rval' is a 64 bit wide variable). + * For quad operations, we might still need to zero the + * high bits of SVE. + */ + for (r = 0; r < rpt * selem; r++) { + int tt = (rt + r) % 32; + clear_vec_high(s, is_q, tt); + } + } + if (is_postidx) { int rm = extract32(insn, 16, 5); if (rm == 31) { @@ -3116,6 +3136,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); } } + tcg_temp_free_i64(tcg_ebytes); tcg_temp_free_i64(tcg_addr); } @@ -3158,7 +3179,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) bool replicate = false; int index = is_q << 3 | S << 2 | size; int ebytes, xs; - TCGv_i64 tcg_addr, tcg_rn; + TCGv_i64 tcg_addr, tcg_rn, tcg_ebytes; switch (scale) { case 3: @@ -3211,49 +3232,28 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) tcg_rn = cpu_reg_sp(s, rn); tcg_addr = tcg_temp_new_i64(); tcg_gen_mov_i64(tcg_addr, tcg_rn); + tcg_ebytes = tcg_const_i64(ebytes); for (xs = 0; xs < selem; xs++) { if (replicate) { /* Load and replicate to all elements */ - uint64_t mulconst; TCGv_i64 tcg_tmp = tcg_temp_new_i64(); tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), s->be_data + scale); - switch (scale) { - case 0: - mulconst = 0x0101010101010101ULL; - break; - case 1: - mulconst = 0x0001000100010001ULL; - break; - case 2: - mulconst = 0x0000000100000001ULL; - break; - case 3: - mulconst = 0; - break; - default: - g_assert_not_reached(); - } - if (mulconst) { - tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst); - } - write_vec_element(s, tcg_tmp, rt, 0, MO_64); - if (is_q) { - write_vec_element(s, tcg_tmp, rt, 1, MO_64); - } + tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt), + (is_q + 1) * 8, vec_full_reg_size(s), + tcg_tmp); tcg_temp_free_i64(tcg_tmp); - clear_vec_high(s, is_q, rt); } else { /* Load/store one element per register */ if (is_load) { - do_vec_ld(s, rt, index, tcg_addr, scale); + do_vec_ld(s, rt, index, tcg_addr, scale, s->be_data); } else { - do_vec_st(s, rt, index, tcg_addr, scale); + do_vec_st(s, rt, index, tcg_addr, scale, s->be_data); } } - tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes); + tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_ebytes); rt = (rt + 1) % 32; } @@ -3265,6 +3265,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); } } + tcg_temp_free_i64(tcg_ebytes); tcg_temp_free_i64(tcg_addr); } @@ -4564,7 +4565,7 @@ static void handle_crc32(DisasContext *s, TCGv_i64 tcg_acc, tcg_val; TCGv_i32 tcg_bytes; - if (!arm_dc_feature(s, ARM_FEATURE_CRC) + if (!dc_isar_feature(aa64_crc32, s) || (sf == 1 && sz != 3) || (sf == 0 && sz == 3)) { unallocated_encoding(s); @@ -4806,7 +4807,7 @@ static void disas_fp_compare(DisasContext *s, uint32_t insn) break; case 3: size = MO_16; - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -4857,7 +4858,7 @@ static void disas_fp_ccomp(DisasContext *s, uint32_t insn) break; case 3: size = MO_16; - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -4923,7 +4924,7 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn) break; case 3: sz = MO_16; - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -5256,7 +5257,7 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn) handle_fp_1src_double(s, opcode, rd, rn); break; case 3: - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -5471,7 +5472,7 @@ static void disas_fp_2src(DisasContext *s, uint32_t insn) handle_fp_2src_double(s, opcode, rd, rn, rm); break; case 3: - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -5629,7 +5630,7 @@ static void disas_fp_3src(DisasContext *s, uint32_t insn) handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra); break; case 3: - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -5699,7 +5700,7 @@ static void disas_fp_imm(DisasContext *s, uint32_t insn) break; case 3: sz = MO_16; - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -5924,7 +5925,7 @@ static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn) case 1: /* float64 */ break; case 3: /* float16 */ - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -6054,7 +6055,7 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn) break; case 0x6: /* 16-bit float, 32-bit int */ case 0xe: /* 16-bit float, 64-bit int */ - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -6081,7 +6082,7 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn) case 1: /* float64 */ break; case 3: /* float16 */ - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -6518,7 +6519,7 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) */ is_min = extract32(size, 1, 1); is_fp = true; - if (!is_u && arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!is_u && dc_isar_feature(aa64_fp16, s)) { size = 1; } else if (!is_u || !is_q || extract32(size, 0, 1)) { unallocated_encoding(s); @@ -6914,7 +6915,7 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) { /* Check for FMOV (vector, immediate) - half-precision */ - if (!(arm_dc_feature(s, ARM_FEATURE_V8_FP16) && o2 && cmode == 0xf)) { + if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) { unallocated_encoding(s); return; } @@ -7081,7 +7082,7 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) case 0x2f: /* FMINP */ /* FP op, size[0] is 32 or 64 bit*/ if (!u) { - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } else { @@ -7726,7 +7727,7 @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, size = MO_32; } else if (immh & 2) { size = MO_16; - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -7771,7 +7772,7 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, size = MO_32; } else if (immh & 0x2) { size = MO_16; - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -8036,28 +8037,6 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) } } -/* CMTST : test is "if (X & Y != 0)". */ -static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_and_i32(d, a, b); - tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0); - tcg_gen_neg_i32(d, d); -} - -static void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_and_i64(d, a, b); - tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0); - tcg_gen_neg_i64(d, d); -} - -static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_and_vec(vece, d, a, b); - tcg_gen_dupi_vec(vece, a, 0); - tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); -} - static void handle_3same_64(DisasContext *s, int opcode, bool u, TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm) { @@ -8535,7 +8514,7 @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s, return; } - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); } @@ -8608,7 +8587,7 @@ static void disas_simd_scalar_three_reg_same_extra(DisasContext *s, bool u = extract32(insn, 29, 1); TCGv_i32 ele1, ele2, ele3; TCGv_i64 res; - int feature; + bool feature; switch (u * 16 + opcode) { case 0x10: /* SQRDMLAH (vector) */ @@ -8617,13 +8596,13 @@ static void disas_simd_scalar_three_reg_same_extra(DisasContext *s, unallocated_encoding(s); return; } - feature = ARM_FEATURE_V8_RDM; + feature = dc_isar_feature(aa64_rdm, s); break; default: unallocated_encoding(s); return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -9397,191 +9376,10 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) } } -static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_sar8i_i64(a, a, shift); - tcg_gen_vec_add8_i64(d, d, a); -} - -static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_sar16i_i64(a, a, shift); - tcg_gen_vec_add16_i64(d, d, a); -} - -static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_sari_i32(a, a, shift); - tcg_gen_add_i32(d, d, a); -} - -static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_sari_i64(a, a, shift); - tcg_gen_add_i64(d, d, a); -} - -static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - tcg_gen_sari_vec(vece, a, a, sh); - tcg_gen_add_vec(vece, d, d, a); -} - -static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_shr8i_i64(a, a, shift); - tcg_gen_vec_add8_i64(d, d, a); -} - -static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_shr16i_i64(a, a, shift); - tcg_gen_vec_add16_i64(d, d, a); -} - -static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_shri_i32(a, a, shift); - tcg_gen_add_i32(d, d, a); -} - -static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_shri_i64(a, a, shift); - tcg_gen_add_i64(d, d, a); -} - -static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - tcg_gen_shri_vec(vece, a, a, sh); - tcg_gen_add_vec(vece, d, d, a); -} - -static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_8, 0xff >> shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shri_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); -} - -static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_16, 0xffff >> shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shri_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); -} - -static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_shri_i32(a, a, shift); - tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); -} - -static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_shri_i64(a, a, shift); - tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); -} - -static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - uint64_t mask = (2ull << ((8 << vece) - 1)) - 1; - TCGv_vec t = tcg_temp_new_vec_matching(d); - TCGv_vec m = tcg_temp_new_vec_matching(d); - - tcg_gen_dupi_vec(vece, m, mask ^ (mask >> sh)); - tcg_gen_shri_vec(vece, t, a, sh); - tcg_gen_and_vec(vece, d, d, m); - tcg_gen_or_vec(vece, d, d, t); - - tcg_temp_free_vec(t); - tcg_temp_free_vec(m); -} - /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, int immh, int immb, int opcode, int rn, int rd) { - static const GVecGen2i ssra_op[4] = { - { .fni8 = gen_ssra8_i64, - .fniv = gen_ssra_vec, - .load_dest = true, - .opc = INDEX_op_sari_vec, - .vece = MO_8 }, - { .fni8 = gen_ssra16_i64, - .fniv = gen_ssra_vec, - .load_dest = true, - .opc = INDEX_op_sari_vec, - .vece = MO_16 }, - { .fni4 = gen_ssra32_i32, - .fniv = gen_ssra_vec, - .load_dest = true, - .opc = INDEX_op_sari_vec, - .vece = MO_32 }, - { .fni8 = gen_ssra64_i64, - .fniv = gen_ssra_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opc = INDEX_op_sari_vec, - .vece = MO_64 }, - }; - static const GVecGen2i usra_op[4] = { - { .fni8 = gen_usra8_i64, - .fniv = gen_usra_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_8, }, - { .fni8 = gen_usra16_i64, - .fniv = gen_usra_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_16, }, - { .fni4 = gen_usra32_i32, - .fniv = gen_usra_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_32, }, - { .fni8 = gen_usra64_i64, - .fniv = gen_usra_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_64, }, - }; - static const GVecGen2i sri_op[4] = { - { .fni8 = gen_shr8_ins_i64, - .fniv = gen_shr_ins_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_8 }, - { .fni8 = gen_shr16_ins_i64, - .fniv = gen_shr_ins_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_16 }, - { .fni4 = gen_shr32_ins_i32, - .fniv = gen_shr_ins_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_32 }, - { .fni8 = gen_shr64_ins_i64, - .fniv = gen_shr_ins_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_64 }, - }; - int size = 32 - clz32(immh) - 1; int immhb = immh << 3 | immb; int shift = 2 * (8 << size) - immhb; @@ -9677,85 +9475,10 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, clear_vec_high(s, is_q, rd); } -static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_8, 0xff << shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shli_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); -} - -static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_16, 0xffff << shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shli_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); -} - -static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); -} - -static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); -} - -static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - uint64_t mask = (1ull << sh) - 1; - TCGv_vec t = tcg_temp_new_vec_matching(d); - TCGv_vec m = tcg_temp_new_vec_matching(d); - - tcg_gen_dupi_vec(vece, m, mask); - tcg_gen_shli_vec(vece, t, a, sh); - tcg_gen_and_vec(vece, d, d, m); - tcg_gen_or_vec(vece, d, d, t); - - tcg_temp_free_vec(t); - tcg_temp_free_vec(m); -} - /* SHL/SLI - Vector shift left */ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, int immh, int immb, int opcode, int rn, int rd) { - static const GVecGen2i shi_op[4] = { - { .fni8 = gen_shl8_ins_i64, - .fniv = gen_shl_ins_vec, - .opc = INDEX_op_shli_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_8 }, - { .fni8 = gen_shl16_ins_i64, - .fniv = gen_shl_ins_vec, - .opc = INDEX_op_shli_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_shl32_ins_i32, - .fniv = gen_shl_ins_vec, - .opc = INDEX_op_shli_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_shl64_ins_i64, - .fniv = gen_shl_ins_vec, - .opc = INDEX_op_shli_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_64 }, - }; int size = 32 - clz32(immh) - 1; int immhb = immh << 3 | immb; int shift = immhb - (8 << size); @@ -9775,7 +9498,7 @@ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, } if (insert) { - gen_gvec_op2i(s, is_q, rd, rn, shift, &shi_op[size]); + gen_gvec_op2i(s, is_q, rd, rn, shift, &sli_op[size]); } else { gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size); } @@ -10352,7 +10075,7 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) return; } if (size == 3) { - if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) { + if (!dc_isar_feature(aa64_pmull, s)) { unallocated_encoding(s); return; } @@ -10397,70 +10120,9 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) } } -static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) -{ - tcg_gen_xor_i64(rn, rn, rm); - tcg_gen_and_i64(rn, rn, rd); - tcg_gen_xor_i64(rd, rm, rn); -} - -static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) -{ - tcg_gen_xor_i64(rn, rn, rd); - tcg_gen_and_i64(rn, rn, rm); - tcg_gen_xor_i64(rd, rd, rn); -} - -static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) -{ - tcg_gen_xor_i64(rn, rn, rd); - tcg_gen_andc_i64(rn, rn, rm); - tcg_gen_xor_i64(rd, rd, rn); -} - -static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) -{ - tcg_gen_xor_vec(vece, rn, rn, rm); - tcg_gen_and_vec(vece, rn, rn, rd); - tcg_gen_xor_vec(vece, rd, rm, rn); -} - -static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) -{ - tcg_gen_xor_vec(vece, rn, rn, rd); - tcg_gen_and_vec(vece, rn, rn, rm); - tcg_gen_xor_vec(vece, rd, rd, rn); -} - -static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) -{ - tcg_gen_xor_vec(vece, rn, rn, rd); - tcg_gen_andc_vec(vece, rn, rn, rm); - tcg_gen_xor_vec(vece, rd, rd, rn); -} - /* Logic op (opcode == 3) subgroup of C3.6.16. */ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) { - static const GVecGen3 bsl_op = { - .fni8 = gen_bsl_i64, - .fniv = gen_bsl_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true - }; - static const GVecGen3 bit_op = { - .fni8 = gen_bit_i64, - .fniv = gen_bit_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true - }; - static const GVecGen3 bif_op = { - .fni8 = gen_bif_i64, - .fniv = gen_bif_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true - }; - int rd = extract32(insn, 0, 5); int rn = extract32(insn, 5, 5); int rm = extract32(insn, 16, 5); @@ -10732,131 +10394,9 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn) } } -static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u8(a, a, b); - gen_helper_neon_add_u8(d, d, a); -} - -static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u16(a, a, b); - gen_helper_neon_add_u16(d, d, a); -} - -static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_mul_i32(a, a, b); - tcg_gen_add_i32(d, d, a); -} - -static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_mul_i64(a, a, b); - tcg_gen_add_i64(d, d, a); -} - -static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_mul_vec(vece, a, a, b); - tcg_gen_add_vec(vece, d, d, a); -} - -static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u8(a, a, b); - gen_helper_neon_sub_u8(d, d, a); -} - -static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u16(a, a, b); - gen_helper_neon_sub_u16(d, d, a); -} - -static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_mul_i32(a, a, b); - tcg_gen_sub_i32(d, d, a); -} - -static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_mul_i64(a, a, b); - tcg_gen_sub_i64(d, d, a); -} - -static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_mul_vec(vece, a, a, b); - tcg_gen_sub_vec(vece, d, d, a); -} - /* Integer op subgroup of C3.6.16. */ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) { - static const GVecGen3 cmtst_op[4] = { - { .fni4 = gen_helper_neon_tst_u8, - .fniv = gen_cmtst_vec, - .vece = MO_8 }, - { .fni4 = gen_helper_neon_tst_u16, - .fniv = gen_cmtst_vec, - .vece = MO_16 }, - { .fni4 = gen_cmtst_i32, - .fniv = gen_cmtst_vec, - .vece = MO_32 }, - { .fni8 = gen_cmtst_i64, - .fniv = gen_cmtst_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .vece = MO_64 }, - }; - static const GVecGen3 mla_op[4] = { - { .fni4 = gen_mla8_i32, - .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_8 }, - { .fni4 = gen_mla16_i32, - .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_mla32_i32, - .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_mla64_i64, - .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_64 }, - }; - static const GVecGen3 mls_op[4] = { - { .fni4 = gen_mls8_i32, - .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_8 }, - { .fni4 = gen_mls16_i32, - .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_mls32_i32, - .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_mls64_i64, - .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_64 }, - }; - int is_q = extract32(insn, 30, 1); int u = extract32(insn, 29, 1); int size = extract32(insn, 22, 2); @@ -11216,7 +10756,7 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) TCGv_ptr fpst; bool pairwise = false; - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -11404,7 +10944,8 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) int size = extract32(insn, 22, 2); bool u = extract32(insn, 29, 1); bool is_q = extract32(insn, 30, 1); - int feature, rot; + bool feature; + int rot; switch (u * 16 + opcode) { case 0x10: /* SQRDMLAH (vector) */ @@ -11413,7 +10954,7 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - feature = ARM_FEATURE_V8_RDM; + feature = dc_isar_feature(aa64_rdm, s); break; case 0x02: /* SDOT (vector) */ case 0x12: /* UDOT (vector) */ @@ -11421,7 +10962,7 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - feature = ARM_FEATURE_V8_DOTPROD; + feature = dc_isar_feature(aa64_dp, s); break; case 0x18: /* FCMLA, #0 */ case 0x19: /* FCMLA, #90 */ @@ -11430,18 +10971,18 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) case 0x1c: /* FCADD, #90 */ case 0x1e: /* FCADD, #270 */ if (size == 0 - || (size == 1 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16)) + || (size == 1 && !dc_isar_feature(aa64_fp16, s)) || (size == 3 && !is_q)) { unallocated_encoding(s); return; } - feature = ARM_FEATURE_V8_FCMA; + feature = dc_isar_feature(aa64_fcma, s); break; default: unallocated_encoding(s); return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -12310,7 +11851,7 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) bool need_fpst = true; int rmode; - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -12655,14 +12196,14 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) break; case 0x1d: /* SQRDMLAH */ case 0x1f: /* SQRDMLSH */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_RDM)) { + if (!dc_isar_feature(aa64_rdm, s)) { unallocated_encoding(s); return; } break; case 0x0e: /* SDOT */ case 0x1e: /* UDOT */ - if (size != MO_32 || !arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) { + if (size != MO_32 || !dc_isar_feature(aa64_dp, s)) { unallocated_encoding(s); return; } @@ -12671,7 +12212,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) case 0x13: /* FCMLA #90 */ case 0x15: /* FCMLA #180 */ case 0x17: /* FCMLA #270 */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) { + if (!dc_isar_feature(aa64_fcma, s)) { unallocated_encoding(s); return; } @@ -12727,7 +12268,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } break; } - if (is_fp16 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -13198,8 +12739,7 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) TCGv_i32 tcg_decrypt; CryptoThreeOpIntFn *genfn; - if (!arm_dc_feature(s, ARM_FEATURE_V8_AES) - || size != 0) { + if (!dc_isar_feature(aa64_aes, s) || size != 0) { unallocated_encoding(s); return; } @@ -13256,7 +12796,7 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) int rd = extract32(insn, 0, 5); CryptoThreeOpFn *genfn; TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; - int feature = ARM_FEATURE_V8_SHA256; + bool feature; if (size != 0) { unallocated_encoding(s); @@ -13269,23 +12809,26 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) case 2: /* SHA1M */ case 3: /* SHA1SU0 */ genfn = NULL; - feature = ARM_FEATURE_V8_SHA1; + feature = dc_isar_feature(aa64_sha1, s); break; case 4: /* SHA256H */ genfn = gen_helper_crypto_sha256h; + feature = dc_isar_feature(aa64_sha256, s); break; case 5: /* SHA256H2 */ genfn = gen_helper_crypto_sha256h2; + feature = dc_isar_feature(aa64_sha256, s); break; case 6: /* SHA256SU1 */ genfn = gen_helper_crypto_sha256su1; + feature = dc_isar_feature(aa64_sha256, s); break; default: unallocated_encoding(s); return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -13326,7 +12869,7 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); CryptoTwoOpFn *genfn; - int feature; + bool feature; TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; if (size != 0) { @@ -13336,15 +12879,15 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) switch (opcode) { case 0: /* SHA1H */ - feature = ARM_FEATURE_V8_SHA1; + feature = dc_isar_feature(aa64_sha1, s); genfn = gen_helper_crypto_sha1h; break; case 1: /* SHA1SU1 */ - feature = ARM_FEATURE_V8_SHA1; + feature = dc_isar_feature(aa64_sha1, s); genfn = gen_helper_crypto_sha1su1; break; case 2: /* SHA256SU0 */ - feature = ARM_FEATURE_V8_SHA256; + feature = dc_isar_feature(aa64_sha256, s); genfn = gen_helper_crypto_sha256su0; break; default: @@ -13352,7 +12895,7 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -13383,40 +12926,40 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) int rm = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - int feature; + bool feature; CryptoThreeOpFn *genfn; if (o == 0) { switch (opcode) { case 0: /* SHA512H */ - feature = ARM_FEATURE_V8_SHA512; + feature = dc_isar_feature(aa64_sha512, s); genfn = gen_helper_crypto_sha512h; break; case 1: /* SHA512H2 */ - feature = ARM_FEATURE_V8_SHA512; + feature = dc_isar_feature(aa64_sha512, s); genfn = gen_helper_crypto_sha512h2; break; case 2: /* SHA512SU1 */ - feature = ARM_FEATURE_V8_SHA512; + feature = dc_isar_feature(aa64_sha512, s); genfn = gen_helper_crypto_sha512su1; break; case 3: /* RAX1 */ - feature = ARM_FEATURE_V8_SHA3; + feature = dc_isar_feature(aa64_sha3, s); genfn = NULL; break; } } else { switch (opcode) { case 0: /* SM3PARTW1 */ - feature = ARM_FEATURE_V8_SM3; + feature = dc_isar_feature(aa64_sm3, s); genfn = gen_helper_crypto_sm3partw1; break; case 1: /* SM3PARTW2 */ - feature = ARM_FEATURE_V8_SM3; + feature = dc_isar_feature(aa64_sm3, s); genfn = gen_helper_crypto_sm3partw2; break; case 2: /* SM4EKEY */ - feature = ARM_FEATURE_V8_SM4; + feature = dc_isar_feature(aa64_sm4, s); genfn = gen_helper_crypto_sm4ekey; break; default: @@ -13425,7 +12968,7 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) } } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -13484,16 +13027,16 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; - int feature; + bool feature; CryptoTwoOpFn *genfn; switch (opcode) { case 0: /* SHA512SU0 */ - feature = ARM_FEATURE_V8_SHA512; + feature = dc_isar_feature(aa64_sha512, s); genfn = gen_helper_crypto_sha512su0; break; case 1: /* SM4E */ - feature = ARM_FEATURE_V8_SM4; + feature = dc_isar_feature(aa64_sm4, s); genfn = gen_helper_crypto_sm4e; break; default: @@ -13501,7 +13044,7 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -13532,22 +13075,22 @@ static void disas_crypto_four_reg(DisasContext *s, uint32_t insn) int ra = extract32(insn, 10, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - int feature; + bool feature; switch (op0) { case 0: /* EOR3 */ case 1: /* BCAX */ - feature = ARM_FEATURE_V8_SHA3; + feature = dc_isar_feature(aa64_sha3, s); break; case 2: /* SM3SS1 */ - feature = ARM_FEATURE_V8_SM3; + feature = dc_isar_feature(aa64_sm3, s); break; default: unallocated_encoding(s); return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -13634,7 +13177,7 @@ static void disas_crypto_xar(DisasContext *s, uint32_t insn) TCGv_i64 tcg_op1, tcg_op2, tcg_res[2]; int pass; - if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA3)) { + if (!dc_isar_feature(aa64_sha3, s)) { unallocated_encoding(s); return; } @@ -13680,7 +13223,7 @@ static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; TCGv_i32 tcg_imm2, tcg_opcode; - if (!arm_dc_feature(s, ARM_FEATURE_V8_SM3)) { + if (!dc_isar_feature(aa64_sm3, s)) { unallocated_encoding(s); return; } @@ -13788,7 +13331,7 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s) unallocated_encoding(s); break; case 0x2: - if (!arm_dc_feature(s, ARM_FEATURE_SVE) || !disas_sve(s, insn)) { + if (!dc_isar_feature(aa64_sve, s) || !disas_sve(s, insn)) { unallocated_encoding(s); } break; @@ -13829,6 +13372,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, ARMCPU *arm_cpu = arm_env_get_cpu(env); int bound; + dc->isar = &arm_cpu->isar; dc->pc = dc->base.pc_first; dc->condjmp = 0; @@ -13892,7 +13436,6 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) { - tcg_clear_temp_count(); } static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 667879564f..fe7aebdc19 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -4600,62 +4600,97 @@ static const uint8_t dtype_esz[16] = { 3, 2, 1, 3 }; +static TCGMemOpIdx sve_memopidx(DisasContext *s, int dtype) +{ + return make_memop_idx(s->be_data | dtype_mop[dtype], get_mem_index(s)); +} + static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, - gen_helper_gvec_mem *fn) + int dtype, gen_helper_gvec_mem *fn) { unsigned vsz = vec_full_reg_size(s); TCGv_ptr t_pg; - TCGv_i32 desc; + TCGv_i32 t_desc; + int desc; /* For e.g. LD4, there are not enough arguments to pass all 4 * registers as pointers, so encode the regno into the data field. * For consistency, do this even for LD1. */ - desc = tcg_const_i32(simd_desc(vsz, vsz, zt)); + desc = sve_memopidx(s, dtype); + desc |= zt << MEMOPIDX_SHIFT; + desc = simd_desc(vsz, vsz, desc); + t_desc = tcg_const_i32(desc); t_pg = tcg_temp_new_ptr(); tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); - fn(cpu_env, t_pg, addr, desc); + fn(cpu_env, t_pg, addr, t_desc); tcg_temp_free_ptr(t_pg); - tcg_temp_free_i32(desc); + tcg_temp_free_i32(t_desc); } static void do_ld_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype, int nreg) { - static gen_helper_gvec_mem * const fns[16][4] = { - { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, - gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, - { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1sds_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r, - gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r }, - { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1hds_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hss_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r, - gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r }, - { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r, - gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r }, + static gen_helper_gvec_mem * const fns[2][16][4] = { + /* Little-endian */ + { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, + gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, + { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, + gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, + { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, + gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, + { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, + gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, + + /* Big-endian */ + { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, + gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, + { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, + gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, + { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, + gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, + { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, + gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }; - gen_helper_gvec_mem *fn = fns[dtype][nreg]; + gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg]; /* While there are holes in the table, they are not * accessible via the instruction encoding. */ assert(fn != NULL); - do_mem_zpa(s, zt, pg, addr, fn); + do_mem_zpa(s, zt, pg, addr, dtype, fn); } static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn) @@ -4689,59 +4724,104 @@ static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn) static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn) { - static gen_helper_gvec_mem * const fns[16] = { - gen_helper_sve_ldff1bb_r, - gen_helper_sve_ldff1bhu_r, - gen_helper_sve_ldff1bsu_r, - gen_helper_sve_ldff1bdu_r, - - gen_helper_sve_ldff1sds_r, - gen_helper_sve_ldff1hh_r, - gen_helper_sve_ldff1hsu_r, - gen_helper_sve_ldff1hdu_r, - - gen_helper_sve_ldff1hds_r, - gen_helper_sve_ldff1hss_r, - gen_helper_sve_ldff1ss_r, - gen_helper_sve_ldff1sdu_r, - - gen_helper_sve_ldff1bds_r, - gen_helper_sve_ldff1bss_r, - gen_helper_sve_ldff1bhs_r, - gen_helper_sve_ldff1dd_r, + static gen_helper_gvec_mem * const fns[2][16] = { + /* Little-endian */ + { gen_helper_sve_ldff1bb_r, + gen_helper_sve_ldff1bhu_r, + gen_helper_sve_ldff1bsu_r, + gen_helper_sve_ldff1bdu_r, + + gen_helper_sve_ldff1sds_le_r, + gen_helper_sve_ldff1hh_le_r, + gen_helper_sve_ldff1hsu_le_r, + gen_helper_sve_ldff1hdu_le_r, + + gen_helper_sve_ldff1hds_le_r, + gen_helper_sve_ldff1hss_le_r, + gen_helper_sve_ldff1ss_le_r, + gen_helper_sve_ldff1sdu_le_r, + + gen_helper_sve_ldff1bds_r, + gen_helper_sve_ldff1bss_r, + gen_helper_sve_ldff1bhs_r, + gen_helper_sve_ldff1dd_le_r }, + + /* Big-endian */ + { gen_helper_sve_ldff1bb_r, + gen_helper_sve_ldff1bhu_r, + gen_helper_sve_ldff1bsu_r, + gen_helper_sve_ldff1bdu_r, + + gen_helper_sve_ldff1sds_be_r, + gen_helper_sve_ldff1hh_be_r, + gen_helper_sve_ldff1hsu_be_r, + gen_helper_sve_ldff1hdu_be_r, + + gen_helper_sve_ldff1hds_be_r, + gen_helper_sve_ldff1hss_be_r, + gen_helper_sve_ldff1ss_be_r, + gen_helper_sve_ldff1sdu_be_r, + + gen_helper_sve_ldff1bds_r, + gen_helper_sve_ldff1bss_r, + gen_helper_sve_ldff1bhs_r, + gen_helper_sve_ldff1dd_be_r }, }; if (sve_access_check(s)) { TCGv_i64 addr = new_tmp_a64(s); tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); - do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]); + do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, + fns[s->be_data == MO_BE][a->dtype]); } return true; } static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn) { - static gen_helper_gvec_mem * const fns[16] = { - gen_helper_sve_ldnf1bb_r, - gen_helper_sve_ldnf1bhu_r, - gen_helper_sve_ldnf1bsu_r, - gen_helper_sve_ldnf1bdu_r, - - gen_helper_sve_ldnf1sds_r, - gen_helper_sve_ldnf1hh_r, - gen_helper_sve_ldnf1hsu_r, - gen_helper_sve_ldnf1hdu_r, - - gen_helper_sve_ldnf1hds_r, - gen_helper_sve_ldnf1hss_r, - gen_helper_sve_ldnf1ss_r, - gen_helper_sve_ldnf1sdu_r, - - gen_helper_sve_ldnf1bds_r, - gen_helper_sve_ldnf1bss_r, - gen_helper_sve_ldnf1bhs_r, - gen_helper_sve_ldnf1dd_r, + static gen_helper_gvec_mem * const fns[2][16] = { + /* Little-endian */ + { gen_helper_sve_ldnf1bb_r, + gen_helper_sve_ldnf1bhu_r, + gen_helper_sve_ldnf1bsu_r, + gen_helper_sve_ldnf1bdu_r, + + gen_helper_sve_ldnf1sds_le_r, + gen_helper_sve_ldnf1hh_le_r, + gen_helper_sve_ldnf1hsu_le_r, + gen_helper_sve_ldnf1hdu_le_r, + + gen_helper_sve_ldnf1hds_le_r, + gen_helper_sve_ldnf1hss_le_r, + gen_helper_sve_ldnf1ss_le_r, + gen_helper_sve_ldnf1sdu_le_r, + + gen_helper_sve_ldnf1bds_r, + gen_helper_sve_ldnf1bss_r, + gen_helper_sve_ldnf1bhs_r, + gen_helper_sve_ldnf1dd_le_r }, + + /* Big-endian */ + { gen_helper_sve_ldnf1bb_r, + gen_helper_sve_ldnf1bhu_r, + gen_helper_sve_ldnf1bsu_r, + gen_helper_sve_ldnf1bdu_r, + + gen_helper_sve_ldnf1sds_be_r, + gen_helper_sve_ldnf1hh_be_r, + gen_helper_sve_ldnf1hsu_be_r, + gen_helper_sve_ldnf1hdu_be_r, + + gen_helper_sve_ldnf1hds_be_r, + gen_helper_sve_ldnf1hss_be_r, + gen_helper_sve_ldnf1ss_be_r, + gen_helper_sve_ldnf1sdu_be_r, + + gen_helper_sve_ldnf1bds_r, + gen_helper_sve_ldnf1bss_r, + gen_helper_sve_ldnf1bhs_r, + gen_helper_sve_ldnf1dd_be_r }, }; if (sve_access_check(s)) { @@ -4751,30 +4831,57 @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn) TCGv_i64 addr = new_tmp_a64(s); tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off); - do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]); + do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, + fns[s->be_data == MO_BE][a->dtype]); } return true; } static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz) { - static gen_helper_gvec_mem * const fns[4] = { - gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r, - gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r, + static gen_helper_gvec_mem * const fns[2][4] = { + { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_le_r, + gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r }, + { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_be_r, + gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r }, }; unsigned vsz = vec_full_reg_size(s); TCGv_ptr t_pg; - TCGv_i32 desc; + TCGv_i32 t_desc; + int desc, poff; /* Load the first quadword using the normal predicated load helpers. */ - desc = tcg_const_i32(simd_desc(16, 16, zt)); + desc = sve_memopidx(s, msz_dtype(msz)); + desc |= zt << MEMOPIDX_SHIFT; + desc = simd_desc(16, 16, desc); + t_desc = tcg_const_i32(desc); + + poff = pred_full_reg_offset(s, pg); + if (vsz > 16) { + /* + * Zero-extend the first 16 bits of the predicate into a temporary. + * This avoids triggering an assert making sure we don't have bits + * set within a predicate beyond VQ, but we have lowered VQ to 1 + * for this load operation. + */ + TCGv_i64 tmp = tcg_temp_new_i64(); +#ifdef HOST_WORDS_BIGENDIAN + poff += 6; +#endif + tcg_gen_ld16u_i64(tmp, cpu_env, poff); + + poff = offsetof(CPUARMState, vfp.preg_tmp); + tcg_gen_st_i64(tmp, cpu_env, poff); + tcg_temp_free_i64(tmp); + } + t_pg = tcg_temp_new_ptr(); + tcg_gen_addi_ptr(t_pg, cpu_env, poff); - tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); - fns[msz](cpu_env, t_pg, addr, desc); + fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc); tcg_temp_free_ptr(t_pg); - tcg_temp_free_i32(desc); + tcg_temp_free_i32(t_desc); /* Replicate that first quadword. */ if (vsz > 16) { @@ -4860,35 +4967,73 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn) static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz, int esz, int nreg) { - static gen_helper_gvec_mem * const fn_single[4][4] = { - { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r, - gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r }, - { NULL, gen_helper_sve_st1hh_r, - gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r }, - { NULL, NULL, - gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r }, - { NULL, NULL, NULL, gen_helper_sve_st1dd_r }, + static gen_helper_gvec_mem * const fn_single[2][4][4] = { + { { gen_helper_sve_st1bb_r, + gen_helper_sve_st1bh_r, + gen_helper_sve_st1bs_r, + gen_helper_sve_st1bd_r }, + { NULL, + gen_helper_sve_st1hh_le_r, + gen_helper_sve_st1hs_le_r, + gen_helper_sve_st1hd_le_r }, + { NULL, NULL, + gen_helper_sve_st1ss_le_r, + gen_helper_sve_st1sd_le_r }, + { NULL, NULL, NULL, + gen_helper_sve_st1dd_le_r } }, + { { gen_helper_sve_st1bb_r, + gen_helper_sve_st1bh_r, + gen_helper_sve_st1bs_r, + gen_helper_sve_st1bd_r }, + { NULL, + gen_helper_sve_st1hh_be_r, + gen_helper_sve_st1hs_be_r, + gen_helper_sve_st1hd_be_r }, + { NULL, NULL, + gen_helper_sve_st1ss_be_r, + gen_helper_sve_st1sd_be_r }, + { NULL, NULL, NULL, + gen_helper_sve_st1dd_be_r } }, }; - static gen_helper_gvec_mem * const fn_multiple[3][4] = { - { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r, - gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r }, - { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r, - gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r }, - { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r, - gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r }, + static gen_helper_gvec_mem * const fn_multiple[2][3][4] = { + { { gen_helper_sve_st2bb_r, + gen_helper_sve_st2hh_le_r, + gen_helper_sve_st2ss_le_r, + gen_helper_sve_st2dd_le_r }, + { gen_helper_sve_st3bb_r, + gen_helper_sve_st3hh_le_r, + gen_helper_sve_st3ss_le_r, + gen_helper_sve_st3dd_le_r }, + { gen_helper_sve_st4bb_r, + gen_helper_sve_st4hh_le_r, + gen_helper_sve_st4ss_le_r, + gen_helper_sve_st4dd_le_r } }, + { { gen_helper_sve_st2bb_r, + gen_helper_sve_st2hh_be_r, + gen_helper_sve_st2ss_be_r, + gen_helper_sve_st2dd_be_r }, + { gen_helper_sve_st3bb_r, + gen_helper_sve_st3hh_be_r, + gen_helper_sve_st3ss_be_r, + gen_helper_sve_st3dd_be_r }, + { gen_helper_sve_st4bb_r, + gen_helper_sve_st4hh_be_r, + gen_helper_sve_st4ss_be_r, + gen_helper_sve_st4dd_be_r } }, }; gen_helper_gvec_mem *fn; + int be = s->be_data == MO_BE; if (nreg == 0) { /* ST1 */ - fn = fn_single[msz][esz]; + fn = fn_single[be][msz][esz]; } else { /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ assert(msz == esz); - fn = fn_multiple[nreg - 1][msz]; + fn = fn_multiple[be][nreg - 1][msz]; } assert(fn != NULL); - do_mem_zpa(s, zt, pg, addr, fn); + do_mem_zpa(s, zt, pg, addr, msz_dtype(msz), fn); } static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn) @@ -4926,111 +5071,203 @@ static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn) *** SVE gather loads / scatter stores */ -static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale, - TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn) +static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, + int scale, TCGv_i64 scalar, int msz, + gen_helper_gvec_mem_scatter *fn) { unsigned vsz = vec_full_reg_size(s); - TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, scale)); TCGv_ptr t_zm = tcg_temp_new_ptr(); TCGv_ptr t_pg = tcg_temp_new_ptr(); TCGv_ptr t_zt = tcg_temp_new_ptr(); + TCGv_i32 t_desc; + int desc; + + desc = sve_memopidx(s, msz_dtype(msz)); + desc |= scale << MEMOPIDX_SHIFT; + desc = simd_desc(vsz, vsz, desc); + t_desc = tcg_const_i32(desc); tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm)); tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt)); - fn(cpu_env, t_zt, t_pg, t_zm, scalar, desc); + fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc); tcg_temp_free_ptr(t_zt); tcg_temp_free_ptr(t_zm); tcg_temp_free_ptr(t_pg); - tcg_temp_free_i32(desc); + tcg_temp_free_i32(t_desc); } -/* Indexed by [ff][xs][u][msz]. */ -static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][3] = { - { { { gen_helper_sve_ldbss_zsu, - gen_helper_sve_ldhss_zsu, - NULL, }, - { gen_helper_sve_ldbsu_zsu, - gen_helper_sve_ldhsu_zsu, - gen_helper_sve_ldssu_zsu, } }, - { { gen_helper_sve_ldbss_zss, - gen_helper_sve_ldhss_zss, - NULL, }, - { gen_helper_sve_ldbsu_zss, - gen_helper_sve_ldhsu_zss, - gen_helper_sve_ldssu_zss, } } }, - - { { { gen_helper_sve_ldffbss_zsu, - gen_helper_sve_ldffhss_zsu, - NULL, }, - { gen_helper_sve_ldffbsu_zsu, - gen_helper_sve_ldffhsu_zsu, - gen_helper_sve_ldffssu_zsu, } }, - { { gen_helper_sve_ldffbss_zss, - gen_helper_sve_ldffhss_zss, - NULL, }, - { gen_helper_sve_ldffbsu_zss, - gen_helper_sve_ldffhsu_zss, - gen_helper_sve_ldffssu_zss, } } } +/* Indexed by [be][ff][xs][u][msz]. */ +static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = { + /* Little-endian */ + { { { { gen_helper_sve_ldbss_zsu, + gen_helper_sve_ldhss_le_zsu, + NULL, }, + { gen_helper_sve_ldbsu_zsu, + gen_helper_sve_ldhsu_le_zsu, + gen_helper_sve_ldss_le_zsu, } }, + { { gen_helper_sve_ldbss_zss, + gen_helper_sve_ldhss_le_zss, + NULL, }, + { gen_helper_sve_ldbsu_zss, + gen_helper_sve_ldhsu_le_zss, + gen_helper_sve_ldss_le_zss, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu, + gen_helper_sve_ldffhss_le_zsu, + NULL, }, + { gen_helper_sve_ldffbsu_zsu, + gen_helper_sve_ldffhsu_le_zsu, + gen_helper_sve_ldffss_le_zsu, } }, + { { gen_helper_sve_ldffbss_zss, + gen_helper_sve_ldffhss_le_zss, + NULL, }, + { gen_helper_sve_ldffbsu_zss, + gen_helper_sve_ldffhsu_le_zss, + gen_helper_sve_ldffss_le_zss, } } } }, + + /* Big-endian */ + { { { { gen_helper_sve_ldbss_zsu, + gen_helper_sve_ldhss_be_zsu, + NULL, }, + { gen_helper_sve_ldbsu_zsu, + gen_helper_sve_ldhsu_be_zsu, + gen_helper_sve_ldss_be_zsu, } }, + { { gen_helper_sve_ldbss_zss, + gen_helper_sve_ldhss_be_zss, + NULL, }, + { gen_helper_sve_ldbsu_zss, + gen_helper_sve_ldhsu_be_zss, + gen_helper_sve_ldss_be_zss, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu, + gen_helper_sve_ldffhss_be_zsu, + NULL, }, + { gen_helper_sve_ldffbsu_zsu, + gen_helper_sve_ldffhsu_be_zsu, + gen_helper_sve_ldffss_be_zsu, } }, + { { gen_helper_sve_ldffbss_zss, + gen_helper_sve_ldffhss_be_zss, + NULL, }, + { gen_helper_sve_ldffbsu_zss, + gen_helper_sve_ldffhsu_be_zss, + gen_helper_sve_ldffss_be_zss, } } } }, }; /* Note that we overload xs=2 to indicate 64-bit offset. */ -static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][3][2][4] = { - { { { gen_helper_sve_ldbds_zsu, - gen_helper_sve_ldhds_zsu, - gen_helper_sve_ldsds_zsu, - NULL, }, - { gen_helper_sve_ldbdu_zsu, - gen_helper_sve_ldhdu_zsu, - gen_helper_sve_ldsdu_zsu, - gen_helper_sve_ldddu_zsu, } }, - { { gen_helper_sve_ldbds_zss, - gen_helper_sve_ldhds_zss, - gen_helper_sve_ldsds_zss, - NULL, }, - { gen_helper_sve_ldbdu_zss, - gen_helper_sve_ldhdu_zss, - gen_helper_sve_ldsdu_zss, - gen_helper_sve_ldddu_zss, } }, - { { gen_helper_sve_ldbds_zd, - gen_helper_sve_ldhds_zd, - gen_helper_sve_ldsds_zd, - NULL, }, - { gen_helper_sve_ldbdu_zd, - gen_helper_sve_ldhdu_zd, - gen_helper_sve_ldsdu_zd, - gen_helper_sve_ldddu_zd, } } }, - - { { { gen_helper_sve_ldffbds_zsu, - gen_helper_sve_ldffhds_zsu, - gen_helper_sve_ldffsds_zsu, - NULL, }, - { gen_helper_sve_ldffbdu_zsu, - gen_helper_sve_ldffhdu_zsu, - gen_helper_sve_ldffsdu_zsu, - gen_helper_sve_ldffddu_zsu, } }, - { { gen_helper_sve_ldffbds_zss, - gen_helper_sve_ldffhds_zss, - gen_helper_sve_ldffsds_zss, - NULL, }, - { gen_helper_sve_ldffbdu_zss, - gen_helper_sve_ldffhdu_zss, - gen_helper_sve_ldffsdu_zss, - gen_helper_sve_ldffddu_zss, } }, - { { gen_helper_sve_ldffbds_zd, - gen_helper_sve_ldffhds_zd, - gen_helper_sve_ldffsds_zd, - NULL, }, - { gen_helper_sve_ldffbdu_zd, - gen_helper_sve_ldffhdu_zd, - gen_helper_sve_ldffsdu_zd, - gen_helper_sve_ldffddu_zd, } } } +static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = { + /* Little-endian */ + { { { { gen_helper_sve_ldbds_zsu, + gen_helper_sve_ldhds_le_zsu, + gen_helper_sve_ldsds_le_zsu, + NULL, }, + { gen_helper_sve_ldbdu_zsu, + gen_helper_sve_ldhdu_le_zsu, + gen_helper_sve_ldsdu_le_zsu, + gen_helper_sve_lddd_le_zsu, } }, + { { gen_helper_sve_ldbds_zss, + gen_helper_sve_ldhds_le_zss, + gen_helper_sve_ldsds_le_zss, + NULL, }, + { gen_helper_sve_ldbdu_zss, + gen_helper_sve_ldhdu_le_zss, + gen_helper_sve_ldsdu_le_zss, + gen_helper_sve_lddd_le_zss, } }, + { { gen_helper_sve_ldbds_zd, + gen_helper_sve_ldhds_le_zd, + gen_helper_sve_ldsds_le_zd, + NULL, }, + { gen_helper_sve_ldbdu_zd, + gen_helper_sve_ldhdu_le_zd, + gen_helper_sve_ldsdu_le_zd, + gen_helper_sve_lddd_le_zd, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu, + gen_helper_sve_ldffhds_le_zsu, + gen_helper_sve_ldffsds_le_zsu, + NULL, }, + { gen_helper_sve_ldffbdu_zsu, + gen_helper_sve_ldffhdu_le_zsu, + gen_helper_sve_ldffsdu_le_zsu, + gen_helper_sve_ldffdd_le_zsu, } }, + { { gen_helper_sve_ldffbds_zss, + gen_helper_sve_ldffhds_le_zss, + gen_helper_sve_ldffsds_le_zss, + NULL, }, + { gen_helper_sve_ldffbdu_zss, + gen_helper_sve_ldffhdu_le_zss, + gen_helper_sve_ldffsdu_le_zss, + gen_helper_sve_ldffdd_le_zss, } }, + { { gen_helper_sve_ldffbds_zd, + gen_helper_sve_ldffhds_le_zd, + gen_helper_sve_ldffsds_le_zd, + NULL, }, + { gen_helper_sve_ldffbdu_zd, + gen_helper_sve_ldffhdu_le_zd, + gen_helper_sve_ldffsdu_le_zd, + gen_helper_sve_ldffdd_le_zd, } } } }, + + /* Big-endian */ + { { { { gen_helper_sve_ldbds_zsu, + gen_helper_sve_ldhds_be_zsu, + gen_helper_sve_ldsds_be_zsu, + NULL, }, + { gen_helper_sve_ldbdu_zsu, + gen_helper_sve_ldhdu_be_zsu, + gen_helper_sve_ldsdu_be_zsu, + gen_helper_sve_lddd_be_zsu, } }, + { { gen_helper_sve_ldbds_zss, + gen_helper_sve_ldhds_be_zss, + gen_helper_sve_ldsds_be_zss, + NULL, }, + { gen_helper_sve_ldbdu_zss, + gen_helper_sve_ldhdu_be_zss, + gen_helper_sve_ldsdu_be_zss, + gen_helper_sve_lddd_be_zss, } }, + { { gen_helper_sve_ldbds_zd, + gen_helper_sve_ldhds_be_zd, + gen_helper_sve_ldsds_be_zd, + NULL, }, + { gen_helper_sve_ldbdu_zd, + gen_helper_sve_ldhdu_be_zd, + gen_helper_sve_ldsdu_be_zd, + gen_helper_sve_lddd_be_zd, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu, + gen_helper_sve_ldffhds_be_zsu, + gen_helper_sve_ldffsds_be_zsu, + NULL, }, + { gen_helper_sve_ldffbdu_zsu, + gen_helper_sve_ldffhdu_be_zsu, + gen_helper_sve_ldffsdu_be_zsu, + gen_helper_sve_ldffdd_be_zsu, } }, + { { gen_helper_sve_ldffbds_zss, + gen_helper_sve_ldffhds_be_zss, + gen_helper_sve_ldffsds_be_zss, + NULL, }, + { gen_helper_sve_ldffbdu_zss, + gen_helper_sve_ldffhdu_be_zss, + gen_helper_sve_ldffsdu_be_zss, + gen_helper_sve_ldffdd_be_zss, } }, + { { gen_helper_sve_ldffbds_zd, + gen_helper_sve_ldffhds_be_zd, + gen_helper_sve_ldffsds_be_zd, + NULL, }, + { gen_helper_sve_ldffbdu_zd, + gen_helper_sve_ldffhdu_be_zd, + gen_helper_sve_ldffsdu_be_zd, + gen_helper_sve_ldffdd_be_zd, } } } }, }; static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn) { gen_helper_gvec_mem_scatter *fn = NULL; + int be = s->be_data == MO_BE; if (!sve_access_check(s)) { return true; @@ -5038,22 +5275,23 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn) switch (a->esz) { case MO_32: - fn = gather_load_fn32[a->ff][a->xs][a->u][a->msz]; + fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz]; break; case MO_64: - fn = gather_load_fn64[a->ff][a->xs][a->u][a->msz]; + fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz]; break; } assert(fn != NULL); do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, - cpu_reg_sp(s, a->rn), fn); + cpu_reg_sp(s, a->rn), a->msz, fn); return true; } static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn) { gen_helper_gvec_mem_scatter *fn = NULL; + int be = s->be_data == MO_BE; TCGv_i64 imm; if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { @@ -5065,10 +5303,10 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn) switch (a->esz) { case MO_32: - fn = gather_load_fn32[a->ff][0][a->u][a->msz]; + fn = gather_load_fn32[be][a->ff][0][a->u][a->msz]; break; case MO_64: - fn = gather_load_fn64[a->ff][2][a->u][a->msz]; + fn = gather_load_fn64[be][a->ff][2][a->u][a->msz]; break; } assert(fn != NULL); @@ -5077,40 +5315,63 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn) * by loading the immediate into the scalar parameter. */ imm = tcg_const_i64(a->imm << a->msz); - do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn); + do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn); tcg_temp_free_i64(imm); return true; } -/* Indexed by [xs][msz]. */ -static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][3] = { - { gen_helper_sve_stbs_zsu, - gen_helper_sve_sths_zsu, - gen_helper_sve_stss_zsu, }, - { gen_helper_sve_stbs_zss, - gen_helper_sve_sths_zss, - gen_helper_sve_stss_zss, }, +/* Indexed by [be][xs][msz]. */ +static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = { + /* Little-endian */ + { { gen_helper_sve_stbs_zsu, + gen_helper_sve_sths_le_zsu, + gen_helper_sve_stss_le_zsu, }, + { gen_helper_sve_stbs_zss, + gen_helper_sve_sths_le_zss, + gen_helper_sve_stss_le_zss, } }, + /* Big-endian */ + { { gen_helper_sve_stbs_zsu, + gen_helper_sve_sths_be_zsu, + gen_helper_sve_stss_be_zsu, }, + { gen_helper_sve_stbs_zss, + gen_helper_sve_sths_be_zss, + gen_helper_sve_stss_be_zss, } }, }; /* Note that we overload xs=2 to indicate 64-bit offset. */ -static gen_helper_gvec_mem_scatter * const scatter_store_fn64[3][4] = { - { gen_helper_sve_stbd_zsu, - gen_helper_sve_sthd_zsu, - gen_helper_sve_stsd_zsu, - gen_helper_sve_stdd_zsu, }, - { gen_helper_sve_stbd_zss, - gen_helper_sve_sthd_zss, - gen_helper_sve_stsd_zss, - gen_helper_sve_stdd_zss, }, - { gen_helper_sve_stbd_zd, - gen_helper_sve_sthd_zd, - gen_helper_sve_stsd_zd, - gen_helper_sve_stdd_zd, }, +static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = { + /* Little-endian */ + { { gen_helper_sve_stbd_zsu, + gen_helper_sve_sthd_le_zsu, + gen_helper_sve_stsd_le_zsu, + gen_helper_sve_stdd_le_zsu, }, + { gen_helper_sve_stbd_zss, + gen_helper_sve_sthd_le_zss, + gen_helper_sve_stsd_le_zss, + gen_helper_sve_stdd_le_zss, }, + { gen_helper_sve_stbd_zd, + gen_helper_sve_sthd_le_zd, + gen_helper_sve_stsd_le_zd, + gen_helper_sve_stdd_le_zd, } }, + /* Big-endian */ + { { gen_helper_sve_stbd_zsu, + gen_helper_sve_sthd_be_zsu, + gen_helper_sve_stsd_be_zsu, + gen_helper_sve_stdd_be_zsu, }, + { gen_helper_sve_stbd_zss, + gen_helper_sve_sthd_be_zss, + gen_helper_sve_stsd_be_zss, + gen_helper_sve_stdd_be_zss, }, + { gen_helper_sve_stbd_zd, + gen_helper_sve_sthd_be_zd, + gen_helper_sve_stsd_be_zd, + gen_helper_sve_stdd_be_zd, } }, }; static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn) { gen_helper_gvec_mem_scatter *fn; + int be = s->be_data == MO_BE; if (a->esz < a->msz || (a->msz == 0 && a->scale)) { return false; @@ -5120,22 +5381,23 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn) } switch (a->esz) { case MO_32: - fn = scatter_store_fn32[a->xs][a->msz]; + fn = scatter_store_fn32[be][a->xs][a->msz]; break; case MO_64: - fn = scatter_store_fn64[a->xs][a->msz]; + fn = scatter_store_fn64[be][a->xs][a->msz]; break; default: g_assert_not_reached(); } do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, - cpu_reg_sp(s, a->rn), fn); + cpu_reg_sp(s, a->rn), a->msz, fn); return true; } static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn) { gen_helper_gvec_mem_scatter *fn = NULL; + int be = s->be_data == MO_BE; TCGv_i64 imm; if (a->esz < a->msz) { @@ -5147,10 +5409,10 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn) switch (a->esz) { case MO_32: - fn = scatter_store_fn32[0][a->msz]; + fn = scatter_store_fn32[be][0][a->msz]; break; case MO_64: - fn = scatter_store_fn64[2][a->msz]; + fn = scatter_store_fn64[be][2][a->msz]; break; } assert(fn != NULL); @@ -5159,7 +5421,7 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn) * by loading the immediate into the scalar parameter. */ imm = tcg_const_i64(a->imm << a->msz); - do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn); + do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn); tcg_temp_free_i64(imm); return true; } diff --git a/target/arm/translate.c b/target/arm/translate.c index c6a5d2ac44..7c4675ffd8 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -42,7 +42,7 @@ #define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5) /* currently all emulated v5 cores are also v5TE, so don't bother */ #define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5) -#define ENABLE_ARCH_5J arm_dc_feature(s, ARM_FEATURE_JAZELLE) +#define ENABLE_ARCH_5J dc_isar_feature(jazelle, s) #define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6) #define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K) #define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2) @@ -72,7 +72,7 @@ static TCGv_i64 cpu_F0d, cpu_F1d; #include "exec/gen-icount.h" -static const char *regnames[] = +static const char * const regnames[] = { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" }; @@ -239,6 +239,23 @@ static void store_reg(DisasContext *s, int reg, TCGv_i32 var) tcg_temp_free_i32(var); } +/* + * Variant of store_reg which applies v8M stack-limit checks before updating + * SP. If the check fails this will result in an exception being taken. + * We disable the stack checks for CONFIG_USER_ONLY because we have + * no idea what the stack limits should be in that case. + * If stack checking is not being done this just acts like store_reg(). + */ +static void store_sp_checked(DisasContext *s, TCGv_i32 var) +{ +#ifndef CONFIG_USER_ONLY + if (s->v8m_stackcheck) { + gen_helper_v8m_stackcheck(cpu_env, var); + } +#endif + store_reg(s, 13, var); +} + /* Value extensions. */ #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var) #define gen_uxth(var) tcg_gen_ext16u_i32(var, var) @@ -1568,6 +1585,25 @@ neon_reg_offset (int reg, int n) return vfp_reg_offset(0, sreg); } +/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE, + * where 0 is the least significant end of the register. + */ +static inline long +neon_element_offset(int reg, int element, TCGMemOp size) +{ + int element_size = 1 << size; + int ofs = element * element_size; +#ifdef HOST_WORDS_BIGENDIAN + /* Calculate the offset assuming fully little-endian, + * then XOR to account for the order of the 8-byte units. + */ + if (element_size < 8) { + ofs ^= 8 - element_size; + } +#endif + return neon_reg_offset(reg, 0) + ofs; +} + static TCGv_i32 neon_load_reg(int reg, int pass) { TCGv_i32 tmp = tcg_temp_new_i32(); @@ -1575,12 +1611,94 @@ static TCGv_i32 neon_load_reg(int reg, int pass) return tmp; } +static void neon_load_element(TCGv_i32 var, int reg, int ele, TCGMemOp mop) +{ + long offset = neon_element_offset(reg, ele, mop & MO_SIZE); + + switch (mop) { + case MO_UB: + tcg_gen_ld8u_i32(var, cpu_env, offset); + break; + case MO_UW: + tcg_gen_ld16u_i32(var, cpu_env, offset); + break; + case MO_UL: + tcg_gen_ld_i32(var, cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + +static void neon_load_element64(TCGv_i64 var, int reg, int ele, TCGMemOp mop) +{ + long offset = neon_element_offset(reg, ele, mop & MO_SIZE); + + switch (mop) { + case MO_UB: + tcg_gen_ld8u_i64(var, cpu_env, offset); + break; + case MO_UW: + tcg_gen_ld16u_i64(var, cpu_env, offset); + break; + case MO_UL: + tcg_gen_ld32u_i64(var, cpu_env, offset); + break; + case MO_Q: + tcg_gen_ld_i64(var, cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + static void neon_store_reg(int reg, int pass, TCGv_i32 var) { tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass)); tcg_temp_free_i32(var); } +static void neon_store_element(int reg, int ele, TCGMemOp size, TCGv_i32 var) +{ + long offset = neon_element_offset(reg, ele, size); + + switch (size) { + case MO_8: + tcg_gen_st8_i32(var, cpu_env, offset); + break; + case MO_16: + tcg_gen_st16_i32(var, cpu_env, offset); + break; + case MO_32: + tcg_gen_st_i32(var, cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + +static void neon_store_element64(int reg, int ele, TCGMemOp size, TCGv_i64 var) +{ + long offset = neon_element_offset(reg, ele, size); + + switch (size) { + case MO_8: + tcg_gen_st8_i64(var, cpu_env, offset); + break; + case MO_16: + tcg_gen_st16_i64(var, cpu_env, offset); + break; + case MO_32: + tcg_gen_st32_i64(var, cpu_env, offset); + break; + case MO_64: + tcg_gen_st_i64(var, cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + static inline void neon_load_reg64(TCGv_i64 var, int reg) { tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg)); @@ -2957,19 +3075,6 @@ static void gen_vfp_msr(TCGv_i32 tmp) tcg_temp_free_i32(tmp); } -static void gen_neon_dup_u8(TCGv_i32 var, int shift) -{ - TCGv_i32 tmp = tcg_temp_new_i32(); - if (shift) - tcg_gen_shri_i32(var, var, shift); - tcg_gen_ext8u_i32(var, var); - tcg_gen_shli_i32(tmp, var, 8); - tcg_gen_or_i32(var, var, tmp); - tcg_gen_shli_i32(tmp, var, 16); - tcg_gen_or_i32(var, var, tmp); - tcg_temp_free_i32(tmp); -} - static void gen_neon_dup_low16(TCGv_i32 var) { TCGv_i32 tmp = tcg_temp_new_i32(); @@ -2988,28 +3093,6 @@ static void gen_neon_dup_high16(TCGv_i32 var) tcg_temp_free_i32(tmp); } -static TCGv_i32 gen_load_and_replicate(DisasContext *s, TCGv_i32 addr, int size) -{ - /* Load a single Neon element and replicate into a 32 bit TCG reg */ - TCGv_i32 tmp = tcg_temp_new_i32(); - switch (size) { - case 0: - gen_aa32_ld8u(s, tmp, addr, get_mem_index(s)); - gen_neon_dup_u8(tmp, 0); - break; - case 1: - gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); - gen_neon_dup_low16(tmp); - break; - case 2: - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - break; - default: /* Avoid compiler warnings. */ - abort(); - } - return tmp; -} - static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm, uint32_t dp) { @@ -3415,17 +3498,10 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn) tmp = load_reg(s, rd); if (insn & (1 << 23)) { /* VDUP */ - if (size == 0) { - gen_neon_dup_u8(tmp, 0); - } else if (size == 1) { - gen_neon_dup_low16(tmp); - } - for (n = 0; n <= pass * 2; n++) { - tmp2 = tcg_temp_new_i32(); - tcg_gen_mov_i32(tmp2, tmp); - neon_store_reg(rn, n, tmp2); - } - neon_store_reg(rn, n, tmp); + int vec_size = pass ? 16 : 8; + tcg_gen_gvec_dup_i32(size, neon_reg_offset(rn, 0), + vec_size, vec_size, tmp); + tcg_temp_free_i32(tmp); } else { /* VMOV */ switch (size) { @@ -4212,6 +4288,18 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn) if (insn & (1 << 24)) /* pre-decrement */ tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2)); + if (s->v8m_stackcheck && rn == 13 && w) { + /* + * Here 'addr' is the lowest address we will store to, + * and is either the old SP (if post-increment) or + * the new SP (if pre-decrement). For post-increment + * where the old value is below the limit and the new + * value is above, it is UNKNOWN whether the limit check + * triggers; we choose to trigger. + */ + gen_helper_v8m_stackcheck(cpu_env, addr); + } + if (dp) offset = 8; else @@ -4878,17 +4966,17 @@ static struct { int nregs; int interleave; int spacing; -} neon_ls_element_type[11] = { - {4, 4, 1}, - {4, 4, 2}, +} const neon_ls_element_type[11] = { + {1, 4, 1}, + {1, 4, 2}, {4, 1, 1}, - {4, 2, 1}, - {3, 3, 1}, - {3, 3, 2}, + {2, 2, 2}, + {1, 3, 1}, + {1, 3, 2}, {3, 1, 1}, {1, 1, 1}, - {2, 2, 1}, - {2, 2, 2}, + {1, 2, 1}, + {1, 2, 2}, {2, 1, 1} }; @@ -4904,10 +4992,11 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) int stride; int size; int reg; - int pass; int load; - int shift; int n; + int vec_size; + int mmu_idx; + TCGMemOp endian; TCGv_i32 addr; TCGv_i32 tmp; TCGv_i32 tmp2; @@ -4919,7 +5008,7 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) */ if (s->fp_excp_el) { gen_exception_insn(s, 4, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); return 0; } @@ -4929,6 +5018,8 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) rn = (insn >> 16) & 0xf; rm = insn & 0xf; load = (insn & (1 << 21)) != 0; + endian = s->be_data; + mmu_idx = get_mem_index(s); if ((insn & (1 << 23)) == 0) { /* Load store all elements. */ op = (insn >> 8) & 0xf; @@ -4953,104 +5044,44 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) nregs = neon_ls_element_type[op].nregs; interleave = neon_ls_element_type[op].interleave; spacing = neon_ls_element_type[op].spacing; - if (size == 3 && (interleave | spacing) != 1) + if (size == 3 && (interleave | spacing) != 1) { return 1; + } + /* For our purposes, bytes are always little-endian. */ + if (size == 0) { + endian = MO_LE; + } + /* Consecutive little-endian elements from a single register + * can be promoted to a larger little-endian operation. + */ + if (interleave == 1 && endian == MO_LE) { + size = 3; + } + tmp64 = tcg_temp_new_i64(); addr = tcg_temp_new_i32(); + tmp2 = tcg_const_i32(1 << size); load_reg_var(s, addr, rn); - stride = (1 << size) * interleave; for (reg = 0; reg < nregs; reg++) { - if (interleave > 2 || (interleave == 2 && nregs == 2)) { - load_reg_var(s, addr, rn); - tcg_gen_addi_i32(addr, addr, (1 << size) * reg); - } else if (interleave == 2 && nregs == 4 && reg == 2) { - load_reg_var(s, addr, rn); - tcg_gen_addi_i32(addr, addr, 1 << size); - } - if (size == 3) { - tmp64 = tcg_temp_new_i64(); - if (load) { - gen_aa32_ld64(s, tmp64, addr, get_mem_index(s)); - neon_store_reg64(tmp64, rd); - } else { - neon_load_reg64(tmp64, rd); - gen_aa32_st64(s, tmp64, addr, get_mem_index(s)); - } - tcg_temp_free_i64(tmp64); - tcg_gen_addi_i32(addr, addr, stride); - } else { - for (pass = 0; pass < 2; pass++) { - if (size == 2) { - if (load) { - tmp = tcg_temp_new_i32(); - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - neon_store_reg(rd, pass, tmp); - } else { - tmp = neon_load_reg(rd, pass); - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); - tcg_temp_free_i32(tmp); - } - tcg_gen_addi_i32(addr, addr, stride); - } else if (size == 1) { - if (load) { - tmp = tcg_temp_new_i32(); - gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); - tcg_gen_addi_i32(addr, addr, stride); - tmp2 = tcg_temp_new_i32(); - gen_aa32_ld16u(s, tmp2, addr, get_mem_index(s)); - tcg_gen_addi_i32(addr, addr, stride); - tcg_gen_shli_i32(tmp2, tmp2, 16); - tcg_gen_or_i32(tmp, tmp, tmp2); - tcg_temp_free_i32(tmp2); - neon_store_reg(rd, pass, tmp); - } else { - tmp = neon_load_reg(rd, pass); - tmp2 = tcg_temp_new_i32(); - tcg_gen_shri_i32(tmp2, tmp, 16); - gen_aa32_st16(s, tmp, addr, get_mem_index(s)); - tcg_temp_free_i32(tmp); - tcg_gen_addi_i32(addr, addr, stride); - gen_aa32_st16(s, tmp2, addr, get_mem_index(s)); - tcg_temp_free_i32(tmp2); - tcg_gen_addi_i32(addr, addr, stride); - } - } else /* size == 0 */ { - if (load) { - tmp2 = NULL; - for (n = 0; n < 4; n++) { - tmp = tcg_temp_new_i32(); - gen_aa32_ld8u(s, tmp, addr, get_mem_index(s)); - tcg_gen_addi_i32(addr, addr, stride); - if (n == 0) { - tmp2 = tmp; - } else { - tcg_gen_shli_i32(tmp, tmp, n * 8); - tcg_gen_or_i32(tmp2, tmp2, tmp); - tcg_temp_free_i32(tmp); - } - } - neon_store_reg(rd, pass, tmp2); - } else { - tmp2 = neon_load_reg(rd, pass); - for (n = 0; n < 4; n++) { - tmp = tcg_temp_new_i32(); - if (n == 0) { - tcg_gen_mov_i32(tmp, tmp2); - } else { - tcg_gen_shri_i32(tmp, tmp2, n * 8); - } - gen_aa32_st8(s, tmp, addr, get_mem_index(s)); - tcg_temp_free_i32(tmp); - tcg_gen_addi_i32(addr, addr, stride); - } - tcg_temp_free_i32(tmp2); - } + for (n = 0; n < 8 >> size; n++) { + int xs; + for (xs = 0; xs < interleave; xs++) { + int tt = rd + reg + spacing * xs; + + if (load) { + gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size); + neon_store_element64(tt, n, size, tmp64); + } else { + neon_load_element64(tmp64, tt, n, size); + gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size); } + tcg_gen_add_i32(addr, addr, tmp2); } } - rd += spacing; } tcg_temp_free_i32(addr); - stride = nregs * 8; + tcg_temp_free_i32(tmp2); + tcg_temp_free_i64(tmp64); + stride = nregs * interleave * 8; } else { size = (insn >> 10) & 3; if (size == 3) { @@ -5077,45 +5108,50 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) } addr = tcg_temp_new_i32(); load_reg_var(s, addr, rn); - if (nregs == 1) { - /* VLD1 to all lanes: bit 5 indicates how many Dregs to write */ - tmp = gen_load_and_replicate(s, addr, size); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0)); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1)); - if (insn & (1 << 5)) { - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0)); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1)); - } - tcg_temp_free_i32(tmp); - } else { - /* VLD2/3/4 to all lanes: bit 5 indicates register stride */ - stride = (insn & (1 << 5)) ? 2 : 1; - for (reg = 0; reg < nregs; reg++) { - tmp = gen_load_and_replicate(s, addr, size); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0)); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1)); - tcg_temp_free_i32(tmp); - tcg_gen_addi_i32(addr, addr, 1 << size); - rd += stride; + + /* VLD1 to all lanes: bit 5 indicates how many Dregs to write. + * VLD2/3/4 to all lanes: bit 5 indicates register stride. + */ + stride = (insn & (1 << 5)) ? 2 : 1; + vec_size = nregs == 1 ? stride * 8 : 8; + + tmp = tcg_temp_new_i32(); + for (reg = 0; reg < nregs; reg++) { + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), + s->be_data | size); + if ((rd & 1) && vec_size == 16) { + /* We cannot write 16 bytes at once because the + * destination is unaligned. + */ + tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0), + 8, 8, tmp); + tcg_gen_gvec_mov(0, neon_reg_offset(rd + 1, 0), + neon_reg_offset(rd, 0), 8, 8); + } else { + tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0), + vec_size, vec_size, tmp); } + tcg_gen_addi_i32(addr, addr, 1 << size); + rd += stride; } + tcg_temp_free_i32(tmp); tcg_temp_free_i32(addr); stride = (1 << size) * nregs; } else { /* Single element. */ int idx = (insn >> 4) & 0xf; - pass = (insn >> 7) & 1; + int reg_idx; switch (size) { case 0: - shift = ((insn >> 5) & 3) * 8; + reg_idx = (insn >> 5) & 7; stride = 1; break; case 1: - shift = ((insn >> 6) & 1) * 16; + reg_idx = (insn >> 6) & 3; stride = (insn & (1 << 5)) ? 2 : 1; break; case 2: - shift = 0; + reg_idx = (insn >> 7) & 1; stride = (insn & (1 << 6)) ? 2 : 1; break; default: @@ -5155,52 +5191,24 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) */ return 1; } + tmp = tcg_temp_new_i32(); addr = tcg_temp_new_i32(); load_reg_var(s, addr, rn); for (reg = 0; reg < nregs; reg++) { if (load) { - tmp = tcg_temp_new_i32(); - switch (size) { - case 0: - gen_aa32_ld8u(s, tmp, addr, get_mem_index(s)); - break; - case 1: - gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); - break; - case 2: - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - break; - default: /* Avoid compiler warnings. */ - abort(); - } - if (size != 2) { - tmp2 = neon_load_reg(rd, pass); - tcg_gen_deposit_i32(tmp, tmp2, tmp, - shift, size ? 16 : 8); - tcg_temp_free_i32(tmp2); - } - neon_store_reg(rd, pass, tmp); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), + s->be_data | size); + neon_store_element(rd, reg_idx, size, tmp); } else { /* Store */ - tmp = neon_load_reg(rd, pass); - if (shift) - tcg_gen_shri_i32(tmp, tmp, shift); - switch (size) { - case 0: - gen_aa32_st8(s, tmp, addr, get_mem_index(s)); - break; - case 1: - gen_aa32_st16(s, tmp, addr, get_mem_index(s)); - break; - case 2: - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); - break; - } - tcg_temp_free_i32(tmp); + neon_load_element(tmp, rd, reg_idx, size); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), + s->be_data | size); } rd += stride; tcg_gen_addi_i32(addr, addr, 1 << size); } tcg_temp_free_i32(addr); + tcg_temp_free_i32(tmp); stride = nregs * (1 << size); } } @@ -5221,14 +5229,6 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) return 0; } -/* Bitwise select. dest = c ? t : f. Clobbers T and F. */ -static void gen_neon_bsl(TCGv_i32 dest, TCGv_i32 t, TCGv_i32 f, TCGv_i32 c) -{ - tcg_gen_and_i32(t, t, c); - tcg_gen_andc_i32(f, f, c); - tcg_gen_or_i32(dest, t, f); -} - static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src) { switch (size) { @@ -5435,7 +5435,7 @@ static void gen_neon_narrow_op(int op, int u, int size, #define NEON_3R_VABA 15 #define NEON_3R_VADD_VSUB 16 #define NEON_3R_VTST_VCEQ 17 -#define NEON_3R_VML 18 /* VMLA, VMLAL, VMLS, VMLSL */ +#define NEON_3R_VML 18 /* VMLA, VMLS */ #define NEON_3R_VMUL 19 #define NEON_3R_VPMAX 20 #define NEON_3R_VPMIN 21 @@ -5660,7 +5660,7 @@ static const uint8_t neon_2rm_sizes[] = { static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn, int q, int rd, int rn, int rm) { - if (arm_dc_feature(s, ARM_FEATURE_V8_RDM)) { + if (dc_isar_feature(aa32_rdm, s)) { int opr_sz = (1 + q) * 8; tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), vfp_reg_offset(1, rn), @@ -5671,6 +5671,483 @@ static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn, return 1; } +/* + * Expanders for VBitOps_VBIF, VBIT, VBSL. + */ +static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + tcg_gen_xor_i64(rn, rn, rm); + tcg_gen_and_i64(rn, rn, rd); + tcg_gen_xor_i64(rd, rm, rn); +} + +static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + tcg_gen_xor_i64(rn, rn, rd); + tcg_gen_and_i64(rn, rn, rm); + tcg_gen_xor_i64(rd, rd, rn); +} + +static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + tcg_gen_xor_i64(rn, rn, rd); + tcg_gen_andc_i64(rn, rn, rm); + tcg_gen_xor_i64(rd, rd, rn); +} + +static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) +{ + tcg_gen_xor_vec(vece, rn, rn, rm); + tcg_gen_and_vec(vece, rn, rn, rd); + tcg_gen_xor_vec(vece, rd, rm, rn); +} + +static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) +{ + tcg_gen_xor_vec(vece, rn, rn, rd); + tcg_gen_and_vec(vece, rn, rn, rm); + tcg_gen_xor_vec(vece, rd, rd, rn); +} + +static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) +{ + tcg_gen_xor_vec(vece, rn, rn, rd); + tcg_gen_andc_vec(vece, rn, rn, rm); + tcg_gen_xor_vec(vece, rd, rd, rn); +} + +const GVecGen3 bsl_op = { + .fni8 = gen_bsl_i64, + .fniv = gen_bsl_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true +}; + +const GVecGen3 bit_op = { + .fni8 = gen_bit_i64, + .fniv = gen_bit_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true +}; + +const GVecGen3 bif_op = { + .fni8 = gen_bif_i64, + .fniv = gen_bif_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true +}; + +static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_sar8i_i64(a, a, shift); + tcg_gen_vec_add8_i64(d, d, a); +} + +static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_sar16i_i64(a, a, shift); + tcg_gen_vec_add16_i64(d, d, a); +} + +static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_sari_i32(a, a, shift); + tcg_gen_add_i32(d, d, a); +} + +static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_sari_i64(a, a, shift); + tcg_gen_add_i64(d, d, a); +} + +static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + tcg_gen_sari_vec(vece, a, a, sh); + tcg_gen_add_vec(vece, d, d, a); +} + +const GVecGen2i ssra_op[4] = { + { .fni8 = gen_ssra8_i64, + .fniv = gen_ssra_vec, + .load_dest = true, + .opc = INDEX_op_sari_vec, + .vece = MO_8 }, + { .fni8 = gen_ssra16_i64, + .fniv = gen_ssra_vec, + .load_dest = true, + .opc = INDEX_op_sari_vec, + .vece = MO_16 }, + { .fni4 = gen_ssra32_i32, + .fniv = gen_ssra_vec, + .load_dest = true, + .opc = INDEX_op_sari_vec, + .vece = MO_32 }, + { .fni8 = gen_ssra64_i64, + .fniv = gen_ssra_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opc = INDEX_op_sari_vec, + .vece = MO_64 }, +}; + +static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_shr8i_i64(a, a, shift); + tcg_gen_vec_add8_i64(d, d, a); +} + +static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_shr16i_i64(a, a, shift); + tcg_gen_vec_add16_i64(d, d, a); +} + +static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_shri_i32(a, a, shift); + tcg_gen_add_i32(d, d, a); +} + +static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_shri_i64(a, a, shift); + tcg_gen_add_i64(d, d, a); +} + +static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + tcg_gen_shri_vec(vece, a, a, sh); + tcg_gen_add_vec(vece, d, d, a); +} + +const GVecGen2i usra_op[4] = { + { .fni8 = gen_usra8_i64, + .fniv = gen_usra_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_8, }, + { .fni8 = gen_usra16_i64, + .fniv = gen_usra_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_16, }, + { .fni4 = gen_usra32_i32, + .fniv = gen_usra_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_32, }, + { .fni8 = gen_usra64_i64, + .fniv = gen_usra_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_64, }, +}; + +static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_8, 0xff >> shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); + tcg_temp_free_i64(t); +} + +static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_16, 0xffff >> shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); + tcg_temp_free_i64(t); +} + +static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_shri_i32(a, a, shift); + tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); +} + +static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_shri_i64(a, a, shift); + tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); +} + +static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + if (sh == 0) { + tcg_gen_mov_vec(d, a); + } else { + TCGv_vec t = tcg_temp_new_vec_matching(d); + TCGv_vec m = tcg_temp_new_vec_matching(d); + + tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); + tcg_gen_shri_vec(vece, t, a, sh); + tcg_gen_and_vec(vece, d, d, m); + tcg_gen_or_vec(vece, d, d, t); + + tcg_temp_free_vec(t); + tcg_temp_free_vec(m); + } +} + +const GVecGen2i sri_op[4] = { + { .fni8 = gen_shr8_ins_i64, + .fniv = gen_shr_ins_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_8 }, + { .fni8 = gen_shr16_ins_i64, + .fniv = gen_shr_ins_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_16 }, + { .fni4 = gen_shr32_ins_i32, + .fniv = gen_shr_ins_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_32 }, + { .fni8 = gen_shr64_ins_i64, + .fniv = gen_shr_ins_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_64 }, +}; + +static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_8, 0xff << shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shli_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); + tcg_temp_free_i64(t); +} + +static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_16, 0xffff << shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shli_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); + tcg_temp_free_i64(t); +} + +static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); +} + +static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); +} + +static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + if (sh == 0) { + tcg_gen_mov_vec(d, a); + } else { + TCGv_vec t = tcg_temp_new_vec_matching(d); + TCGv_vec m = tcg_temp_new_vec_matching(d); + + tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); + tcg_gen_shli_vec(vece, t, a, sh); + tcg_gen_and_vec(vece, d, d, m); + tcg_gen_or_vec(vece, d, d, t); + + tcg_temp_free_vec(t); + tcg_temp_free_vec(m); + } +} + +const GVecGen2i sli_op[4] = { + { .fni8 = gen_shl8_ins_i64, + .fniv = gen_shl_ins_vec, + .load_dest = true, + .opc = INDEX_op_shli_vec, + .vece = MO_8 }, + { .fni8 = gen_shl16_ins_i64, + .fniv = gen_shl_ins_vec, + .load_dest = true, + .opc = INDEX_op_shli_vec, + .vece = MO_16 }, + { .fni4 = gen_shl32_ins_i32, + .fniv = gen_shl_ins_vec, + .load_dest = true, + .opc = INDEX_op_shli_vec, + .vece = MO_32 }, + { .fni8 = gen_shl64_ins_i64, + .fniv = gen_shl_ins_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opc = INDEX_op_shli_vec, + .vece = MO_64 }, +}; + +static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u8(a, a, b); + gen_helper_neon_add_u8(d, d, a); +} + +static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u8(a, a, b); + gen_helper_neon_sub_u8(d, d, a); +} + +static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u16(a, a, b); + gen_helper_neon_add_u16(d, d, a); +} + +static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u16(a, a, b); + gen_helper_neon_sub_u16(d, d, a); +} + +static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_mul_i32(a, a, b); + tcg_gen_add_i32(d, d, a); +} + +static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_mul_i32(a, a, b); + tcg_gen_sub_i32(d, d, a); +} + +static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_mul_i64(a, a, b); + tcg_gen_add_i64(d, d, a); +} + +static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_mul_i64(a, a, b); + tcg_gen_sub_i64(d, d, a); +} + +static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_mul_vec(vece, a, a, b); + tcg_gen_add_vec(vece, d, d, a); +} + +static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_mul_vec(vece, a, a, b); + tcg_gen_sub_vec(vece, d, d, a); +} + +/* Note that while NEON does not support VMLA and VMLS as 64-bit ops, + * these tables are shared with AArch64 which does support them. + */ +const GVecGen3 mla_op[4] = { + { .fni4 = gen_mla8_i32, + .fniv = gen_mla_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_8 }, + { .fni4 = gen_mla16_i32, + .fniv = gen_mla_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_mla32_i32, + .fniv = gen_mla_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_mla64_i64, + .fniv = gen_mla_vec, + .opc = INDEX_op_mul_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .vece = MO_64 }, +}; + +const GVecGen3 mls_op[4] = { + { .fni4 = gen_mls8_i32, + .fniv = gen_mls_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_8 }, + { .fni4 = gen_mls16_i32, + .fniv = gen_mls_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_mls32_i32, + .fniv = gen_mls_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_mls64_i64, + .fniv = gen_mls_vec, + .opc = INDEX_op_mul_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .vece = MO_64 }, +}; + +/* CMTST : test is "if (X & Y != 0)". */ +static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_and_i32(d, a, b); + tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0); + tcg_gen_neg_i32(d, d); +} + +void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_and_i64(d, a, b); + tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0); + tcg_gen_neg_i64(d, d); +} + +static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_and_vec(vece, d, a, b); + tcg_gen_dupi_vec(vece, a, 0); + tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); +} + +const GVecGen3 cmtst_op[4] = { + { .fni4 = gen_helper_neon_tst_u8, + .fniv = gen_cmtst_vec, + .vece = MO_8 }, + { .fni4 = gen_helper_neon_tst_u16, + .fniv = gen_cmtst_vec, + .vece = MO_16 }, + { .fni4 = gen_cmtst_i32, + .fniv = gen_cmtst_vec, + .vece = MO_32 }, + { .fni8 = gen_cmtst_i64, + .fniv = gen_cmtst_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, +}; + /* Translate a NEON data processing instruction. Return nonzero if the instruction is invalid. We process data in a mixture of 32-bit and 64-bit chunks. @@ -5680,14 +6157,15 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) { int op; int q; - int rd, rn, rm; + int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs; int size; int shift; int pass; int count; int pairwise; int u; - uint32_t imm, mask; + int vec_size; + uint32_t imm; TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5; TCGv_ptr ptr1, ptr2, ptr3; TCGv_i64 tmp64; @@ -5698,7 +6176,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) */ if (s->fp_excp_el) { gen_exception_insn(s, 4, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); return 0; } @@ -5710,6 +6188,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) VFP_DREG_N(rn, insn); VFP_DREG_M(rm, insn); size = (insn >> 20) & 3; + vec_size = q ? 16 : 8; + rd_ofs = neon_reg_offset(rd, 0); + rn_ofs = neon_reg_offset(rn, 0); + rm_ofs = neon_reg_offset(rm, 0); + if ((insn & (1 << 23)) == 0) { /* Three register same length. */ op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1); @@ -5734,7 +6217,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 1; } if (!u) { /* SHA-1 */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) { + if (!dc_isar_feature(aa32_sha1, s)) { return 1; } ptr1 = vfp_reg_ptr(true, rd); @@ -5744,7 +6227,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4); tcg_temp_free_i32(tmp4); } else { /* SHA-256 */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256) || size == 3) { + if (!dc_isar_feature(aa32_sha2, s) || size == 3) { return 1; } ptr1 = vfp_reg_ptr(true, rd); @@ -5800,8 +6283,100 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) q, rd, rn, rm); } return 1; + + case NEON_3R_LOGIC: /* Logic ops. */ + switch ((u << 2) | size) { + case 0: /* VAND */ + tcg_gen_gvec_and(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + break; + case 1: /* VBIC */ + tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + break; + case 2: + if (rn == rm) { + /* VMOV */ + tcg_gen_gvec_mov(0, rd_ofs, rn_ofs, vec_size, vec_size); + } else { + /* VORR */ + tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } + break; + case 3: /* VORN */ + tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + break; + case 4: /* VEOR */ + tcg_gen_gvec_xor(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + break; + case 5: /* VBSL */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size, &bsl_op); + break; + case 6: /* VBIT */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size, &bit_op); + break; + case 7: /* VBIF */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size, &bif_op); + break; + } + return 0; + + case NEON_3R_VADD_VSUB: + if (u) { + tcg_gen_gvec_sub(size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } else { + tcg_gen_gvec_add(size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } + return 0; + + case NEON_3R_VMUL: /* VMUL */ + if (u) { + /* Polynomial case allows only P8 and is handled below. */ + if (size != 0) { + return 1; + } + } else { + tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + return 0; + } + break; + + case NEON_3R_VML: /* VMLA, VMLS */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size, + u ? &mls_op[size] : &mla_op[size]); + return 0; + + case NEON_3R_VTST_VCEQ: + if (u) { /* VCEQ */ + tcg_gen_gvec_cmp(TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } else { /* VTST */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size, &cmtst_op[size]); + } + return 0; + + case NEON_3R_VCGT: + tcg_gen_gvec_cmp(u ? TCG_COND_GTU : TCG_COND_GT, size, + rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); + return 0; + + case NEON_3R_VCGE: + tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size, + rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); + return 0; } - if (size == 3 && op != NEON_3R_LOGIC) { + + if (size == 3) { /* 64-bit element instructions. */ for (pass = 0; pass < (q ? 2 : 1); pass++) { neon_load_reg64(cpu_V0, rn + pass); @@ -5857,13 +6432,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) cpu_V1, cpu_V0); } break; - case NEON_3R_VADD_VSUB: - if (u) { - tcg_gen_sub_i64(CPU_V001); - } else { - tcg_gen_add_i64(CPU_V001); - } - break; default: abort(); } @@ -5913,12 +6481,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 1; } break; - case NEON_3R_VMUL: - if (u && (size != 0)) { - /* UNDEF on invalid size for polynomial subcase */ - return 1; - } - break; case NEON_3R_VFM_VQRDMLSH: if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) { return 1; @@ -5959,52 +6521,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_3R_VRHADD: GEN_NEON_INTEGER_OP(rhadd); break; - case NEON_3R_LOGIC: /* Logic ops. */ - switch ((u << 2) | size) { - case 0: /* VAND */ - tcg_gen_and_i32(tmp, tmp, tmp2); - break; - case 1: /* BIC */ - tcg_gen_andc_i32(tmp, tmp, tmp2); - break; - case 2: /* VORR */ - tcg_gen_or_i32(tmp, tmp, tmp2); - break; - case 3: /* VORN */ - tcg_gen_orc_i32(tmp, tmp, tmp2); - break; - case 4: /* VEOR */ - tcg_gen_xor_i32(tmp, tmp, tmp2); - break; - case 5: /* VBSL */ - tmp3 = neon_load_reg(rd, pass); - gen_neon_bsl(tmp, tmp, tmp2, tmp3); - tcg_temp_free_i32(tmp3); - break; - case 6: /* VBIT */ - tmp3 = neon_load_reg(rd, pass); - gen_neon_bsl(tmp, tmp, tmp3, tmp2); - tcg_temp_free_i32(tmp3); - break; - case 7: /* VBIF */ - tmp3 = neon_load_reg(rd, pass); - gen_neon_bsl(tmp, tmp3, tmp, tmp2); - tcg_temp_free_i32(tmp3); - break; - } - break; case NEON_3R_VHSUB: GEN_NEON_INTEGER_OP(hsub); break; case NEON_3R_VQSUB: GEN_NEON_INTEGER_OP_ENV(qsub); break; - case NEON_3R_VCGT: - GEN_NEON_INTEGER_OP(cgt); - break; - case NEON_3R_VCGE: - GEN_NEON_INTEGER_OP(cge); - break; case NEON_3R_VSHL: GEN_NEON_INTEGER_OP(shl); break; @@ -6032,61 +6554,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tmp2 = neon_load_reg(rd, pass); gen_neon_add(size, tmp, tmp2); break; - case NEON_3R_VADD_VSUB: - if (!u) { /* VADD */ - gen_neon_add(size, tmp, tmp2); - } else { /* VSUB */ - switch (size) { - case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break; - case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break; - default: abort(); - } - } - break; - case NEON_3R_VTST_VCEQ: - if (!u) { /* VTST */ - switch (size) { - case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break; - case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break; - default: abort(); - } - } else { /* VCEQ */ - switch (size) { - case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break; - case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break; - default: abort(); - } - } - break; - case NEON_3R_VML: /* VMLA, VMLAL, VMLS,VMLSL */ - switch (size) { - case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; - case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; - default: abort(); - } - tcg_temp_free_i32(tmp2); - tmp2 = neon_load_reg(rd, pass); - if (u) { /* VMLS */ - gen_neon_rsb(size, tmp, tmp2); - } else { /* VMLA */ - gen_neon_add(size, tmp, tmp2); - } - break; case NEON_3R_VMUL: - if (u) { /* polynomial */ - gen_helper_neon_mul_p8(tmp, tmp, tmp2); - } else { /* Integer */ - switch (size) { - case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; - case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; - default: abort(); - } - } + /* VMUL.P8; other cases already eliminated. */ + gen_helper_neon_mul_p8(tmp, tmp, tmp2); break; case NEON_3R_VPMAX: GEN_NEON_INTEGER_OP(pmax); @@ -6268,8 +6738,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) size--; } shift = (insn >> 16) & ((1 << (3 + size)) - 1); - /* To avoid excessive duplication of ops we implement shift - by immediate using the variable shift operations. */ if (op < 8) { /* Shift by immediate: VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */ @@ -6281,43 +6749,99 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } /* Right shifts are encoded as N - shift, where N is the element size in bits. */ - if (op <= 4) + if (op <= 4) { shift = shift - (1 << (size + 3)); + } + + switch (op) { + case 0: /* VSHR */ + /* Right shift comes here negative. */ + shift = -shift; + /* Shifts larger than the element size are architecturally + * valid. Unsigned results in all zeros; signed results + * in all sign bits. + */ + if (!u) { + tcg_gen_gvec_sari(size, rd_ofs, rm_ofs, + MIN(shift, (8 << size) - 1), + vec_size, vec_size); + } else if (shift >= 8 << size) { + tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0); + } else { + tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift, + vec_size, vec_size); + } + return 0; + + case 1: /* VSRA */ + /* Right shift comes here negative. */ + shift = -shift; + /* Shifts larger than the element size are architecturally + * valid. Unsigned results in all zeros; signed results + * in all sign bits. + */ + if (!u) { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size, + MIN(shift, (8 << size) - 1), + &ssra_op[size]); + } else if (shift >= 8 << size) { + /* rd += 0 */ + } else { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size, + shift, &usra_op[size]); + } + return 0; + + case 4: /* VSRI */ + if (!u) { + return 1; + } + /* Right shift comes here negative. */ + shift = -shift; + /* Shift out of range leaves destination unchanged. */ + if (shift < 8 << size) { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size, + shift, &sri_op[size]); + } + return 0; + + case 5: /* VSHL, VSLI */ + if (u) { /* VSLI */ + /* Shift out of range leaves destination unchanged. */ + if (shift < 8 << size) { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, + vec_size, shift, &sli_op[size]); + } + } else { /* VSHL */ + /* Shifts larger than the element size are + * architecturally valid and results in zero. + */ + if (shift >= 8 << size) { + tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0); + } else { + tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift, + vec_size, vec_size); + } + } + return 0; + } + if (size == 3) { count = q + 1; } else { count = q ? 4: 2; } - switch (size) { - case 0: - imm = (uint8_t) shift; - imm |= imm << 8; - imm |= imm << 16; - break; - case 1: - imm = (uint16_t) shift; - imm |= imm << 16; - break; - case 2: - case 3: - imm = shift; - break; - default: - abort(); - } + + /* To avoid excessive duplication of ops we implement shift + * by immediate using the variable shift operations. + */ + imm = dup_const(size, shift); for (pass = 0; pass < count; pass++) { if (size == 3) { neon_load_reg64(cpu_V0, rm + pass); tcg_gen_movi_i64(cpu_V1, imm); switch (op) { - case 0: /* VSHR */ - case 1: /* VSRA */ - if (u) - gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); - else - gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1); - break; case 2: /* VRSHR */ case 3: /* VRSRA */ if (u) @@ -6325,10 +6849,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) else gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1); break; - case 4: /* VSRI */ - case 5: /* VSHL, VSLI */ - gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); - break; case 6: /* VQSHLU */ gen_helper_neon_qshlu_s64(cpu_V0, cpu_env, cpu_V0, cpu_V1); @@ -6342,26 +6862,13 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) cpu_V0, cpu_V1); } break; + default: + g_assert_not_reached(); } - if (op == 1 || op == 3) { + if (op == 3) { /* Accumulate. */ neon_load_reg64(cpu_V1, rd + pass); tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1); - } else if (op == 4 || (op == 5 && u)) { - /* Insert */ - neon_load_reg64(cpu_V1, rd + pass); - uint64_t mask; - if (shift < -63 || shift > 63) { - mask = 0; - } else { - if (op == 4) { - mask = 0xffffffffffffffffull >> -shift; - } else { - mask = 0xffffffffffffffffull << shift; - } - } - tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask); - tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); } neon_store_reg64(cpu_V0, rd + pass); } else { /* size < 3 */ @@ -6370,23 +6877,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tmp2 = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp2, imm); switch (op) { - case 0: /* VSHR */ - case 1: /* VSRA */ - GEN_NEON_INTEGER_OP(shl); - break; case 2: /* VRSHR */ case 3: /* VRSRA */ GEN_NEON_INTEGER_OP(rshl); break; - case 4: /* VSRI */ - case 5: /* VSHL, VSLI */ - switch (size) { - case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break; - case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break; - default: abort(); - } - break; case 6: /* VQSHLU */ switch (size) { case 0: @@ -6408,50 +6902,16 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case 7: /* VQSHL */ GEN_NEON_INTEGER_OP_ENV(qshl); break; + default: + g_assert_not_reached(); } tcg_temp_free_i32(tmp2); - if (op == 1 || op == 3) { + if (op == 3) { /* Accumulate. */ tmp2 = neon_load_reg(rd, pass); gen_neon_add(size, tmp, tmp2); tcg_temp_free_i32(tmp2); - } else if (op == 4 || (op == 5 && u)) { - /* Insert */ - switch (size) { - case 0: - if (op == 4) - mask = 0xff >> -shift; - else - mask = (uint8_t)(0xff << shift); - mask |= mask << 8; - mask |= mask << 16; - break; - case 1: - if (op == 4) - mask = 0xffff >> -shift; - else - mask = (uint16_t)(0xffff << shift); - mask |= mask << 16; - break; - case 2: - if (shift < -31 || shift > 31) { - mask = 0; - } else { - if (op == 4) - mask = 0xffffffffu >> -shift; - else - mask = 0xffffffffu << shift; - } - break; - default: - abort(); - } - tmp2 = neon_load_reg(rd, pass); - tcg_gen_andi_i32(tmp, tmp, mask); - tcg_gen_andi_i32(tmp2, tmp2, ~mask); - tcg_gen_or_i32(tmp, tmp, tmp2); - tcg_temp_free_i32(tmp2); } neon_store_reg(rd, pass, tmp); } @@ -6600,7 +7060,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 1; } } else { /* (insn & 0x00380080) == 0 */ - int invert; + int invert, reg_ofs, vec_size; + if (q && (rd & 1)) { return 1; } @@ -6640,8 +7101,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) break; case 14: imm |= (imm << 8) | (imm << 16) | (imm << 24); - if (invert) + if (invert) { imm = ~imm; + } break; case 15: if (invert) { @@ -6651,36 +7113,45 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) | ((imm & 0x40) ? (0x1f << 25) : (1 << 30)); break; } - if (invert) + if (invert) { imm = ~imm; + } - for (pass = 0; pass < (q ? 4 : 2); pass++) { - if (op & 1 && op < 12) { - tmp = neon_load_reg(rd, pass); - if (invert) { - /* The immediate value has already been inverted, so - BIC becomes AND. */ - tcg_gen_andi_i32(tmp, tmp, imm); - } else { - tcg_gen_ori_i32(tmp, tmp, imm); - } + reg_ofs = neon_reg_offset(rd, 0); + vec_size = q ? 16 : 8; + + if (op & 1 && op < 12) { + if (invert) { + /* The immediate value has already been inverted, + * so BIC becomes AND. + */ + tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm, + vec_size, vec_size); } else { - /* VMOV, VMVN. */ - tmp = tcg_temp_new_i32(); - if (op == 14 && invert) { + tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm, + vec_size, vec_size); + } + } else { + /* VMOV, VMVN. */ + if (op == 14 && invert) { + TCGv_i64 t64 = tcg_temp_new_i64(); + + for (pass = 0; pass <= q; ++pass) { + uint64_t val = 0; int n; - uint32_t val; - val = 0; - for (n = 0; n < 4; n++) { - if (imm & (1 << (n + (pass & 1) * 4))) - val |= 0xff << (n * 8); + + for (n = 0; n < 8; n++) { + if (imm & (1 << (n + pass * 8))) { + val |= 0xffull << (n * 8); + } } - tcg_gen_movi_i32(tmp, val); - } else { - tcg_gen_movi_i32(tmp, imm); + tcg_gen_movi_i64(t64, val); + neon_store_reg64(t64, rd + pass); } + tcg_temp_free_i64(t64); + } else { + tcg_gen_gvec_dup32i(reg_ofs, vec_size, vec_size, imm); } - neon_store_reg(rd, pass, tmp); } } } else { /* (insn & 0x00800010 == 0x00800000) */ @@ -6739,7 +7210,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) if (op == 14 && size == 2) { TCGv_i64 tcg_rn, tcg_rm, tcg_rd; - if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) { + if (!dc_isar_feature(aa32_pmull, s)) { return 1; } tcg_rn = tcg_temp_new_i64(); @@ -7056,7 +7527,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) { NeonGenThreeOpEnvFn *fn; - if (!arm_dc_feature(s, ARM_FEATURE_V8_RDM)) { + if (!dc_isar_feature(aa32_rdm, s)) { return 1; } if (u && ((rd | rn) & 1)) { @@ -7330,8 +7801,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) break; } case NEON_2RM_AESE: case NEON_2RM_AESMC: - if (!arm_dc_feature(s, ARM_FEATURE_V8_AES) - || ((rm | rd) & 1)) { + if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) { return 1; } ptr1 = vfp_reg_ptr(true, rd); @@ -7352,8 +7822,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_temp_free_i32(tmp3); break; case NEON_2RM_SHA1H: - if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1) - || ((rm | rd) & 1)) { + if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) { return 1; } ptr1 = vfp_reg_ptr(true, rd); @@ -7370,10 +7839,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */ if (q) { - if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256)) { + if (!dc_isar_feature(aa32_sha2, s)) { return 1; } - } else if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) { + } else if (!dc_isar_feature(aa32_sha1, s)) { return 1; } ptr1 = vfp_reg_ptr(true, rd); @@ -7386,6 +7855,14 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_temp_free_ptr(ptr1); tcg_temp_free_ptr(ptr2); break; + + case NEON_2RM_VMVN: + tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size); + break; + case NEON_2RM_VNEG: + tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size); + break; + default: elementwise: for (pass = 0; pass < (q ? 4 : 2); pass++) { @@ -7426,9 +7903,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_2RM_VCNT: gen_helper_neon_cnt_u8(tmp, tmp); break; - case NEON_2RM_VMVN: - tcg_gen_not_i32(tmp, tmp); - break; case NEON_2RM_VQABS: switch (size) { case 0: @@ -7501,11 +7975,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) default: abort(); } break; - case NEON_2RM_VNEG: - tmp2 = tcg_const_i32(0); - gen_neon_rsb(size, tmp, tmp2); - tcg_temp_free_i32(tmp2); - break; case NEON_2RM_VCGT0_F: { TCGv_ptr fpstatus = get_fpstatus_ptr(1); @@ -7728,28 +8197,25 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_temp_free_i32(tmp); } else if ((insn & 0x380) == 0) { /* VDUP */ + int element; + TCGMemOp size; + if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) { return 1; } - if (insn & (1 << 19)) { - tmp = neon_load_reg(rm, 1); - } else { - tmp = neon_load_reg(rm, 0); - } if (insn & (1 << 16)) { - gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8); + size = MO_8; + element = (insn >> 17) & 7; } else if (insn & (1 << 17)) { - if ((insn >> 18) & 1) - gen_neon_dup_high16(tmp); - else - gen_neon_dup_low16(tmp); - } - for (pass = 0; pass < (q ? 4 : 2); pass++) { - tmp2 = tcg_temp_new_i32(); - tcg_gen_mov_i32(tmp2, tmp); - neon_store_reg(rd, pass, tmp2); + size = MO_16; + element = (insn >> 18) & 3; + } else { + size = MO_32; + element = (insn >> 19) & 1; } - tcg_temp_free_i32(tmp); + tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0), + neon_element_offset(rm, element, size), + q ? 16 : 8, q ? 16 : 8); } else { return 1; } @@ -7784,8 +8250,8 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn) /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */ int size = extract32(insn, 20, 1); data = extract32(insn, 23, 2); /* rot */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA) - || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) { + if (!dc_isar_feature(aa32_vcma, s) + || (!size && !dc_isar_feature(aa32_fp16_arith, s))) { return 1; } fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah; @@ -7793,15 +8259,15 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn) /* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */ int size = extract32(insn, 20, 1); data = extract32(insn, 24, 1); /* rot */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA) - || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) { + if (!dc_isar_feature(aa32_vcma, s) + || (!size && !dc_isar_feature(aa32_fp16_arith, s))) { return 1; } fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh; } else if ((insn & 0xfeb00f00) == 0xfc200d00) { /* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */ bool u = extract32(insn, 4, 1); - if (!arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) { + if (!dc_isar_feature(aa32_dp, s)) { return 1; } fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b; @@ -7811,7 +8277,7 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn) if (s->fp_excp_el) { gen_exception_insn(s, 4, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); return 0; } if (!s->vfp_enabled) { @@ -7863,11 +8329,11 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) int size = extract32(insn, 23, 1); int index; - if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) { + if (!dc_isar_feature(aa32_vcma, s)) { return 1; } if (size == 0) { - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa32_fp16_arith, s)) { return 1; } /* For fp16, rm is just Vm, and index is M. */ @@ -7884,7 +8350,7 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) } else if ((insn & 0xffb00f00) == 0xfe200d00) { /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */ int u = extract32(insn, 4, 1); - if (!arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) { + if (!dc_isar_feature(aa32_dp, s)) { return 1; } fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b; @@ -7897,7 +8363,7 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) if (s->fp_excp_el) { gen_exception_insn(s, 4, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); return 0; } if (!s->vfp_enabled) { @@ -8860,8 +9326,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) * op1 == 3 is UNPREDICTABLE but handle as UNDEFINED. * Bits 8, 10 and 11 should be zero. */ - if (!arm_dc_feature(s, ARM_FEATURE_CRC) || op1 == 0x3 || - (c & 0xd) != 0) { + if (!dc_isar_feature(aa32_crc32, s) || op1 == 0x3 || (c & 0xd) != 0) { goto illegal_op; } @@ -9729,7 +10194,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) case 1: case 3: /* SDIV, UDIV */ - if (!arm_dc_feature(s, ARM_FEATURE_ARM_DIV)) { + if (!dc_isar_feature(arm_div, s)) { goto illegal_op; } if (((insn >> 5) & 7) || (rd != 15)) { @@ -10261,6 +10726,8 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) * 0b1111_1001_x11x_xxxx_xxxx_xxxx_xxxx_xxxx * - load/store dual (pre-indexed) */ + bool wback = extract32(insn, 21, 1); + if (rn == 15) { if (insn & (1 << 21)) { /* UNPREDICTABLE */ @@ -10272,8 +10739,29 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) addr = load_reg(s, rn); } offset = (insn & 0xff) * 4; - if ((insn & (1 << 23)) == 0) + if ((insn & (1 << 23)) == 0) { offset = -offset; + } + + if (s->v8m_stackcheck && rn == 13 && wback) { + /* + * Here 'addr' is the current SP; if offset is +ve we're + * moving SP up, else down. It is UNKNOWN whether the limit + * check triggers when SP starts below the limit and ends + * up above it; check whichever of the current and final + * SP is lower, so QEMU will trigger in that situation. + */ + if ((int32_t)offset < 0) { + TCGv_i32 newsp = tcg_temp_new_i32(); + + tcg_gen_addi_i32(newsp, addr, offset); + gen_helper_v8m_stackcheck(cpu_env, newsp); + tcg_temp_free_i32(newsp); + } else { + gen_helper_v8m_stackcheck(cpu_env, addr); + } + } + if (insn & (1 << 24)) { tcg_gen_addi_i32(addr, addr, offset); offset = 0; @@ -10297,7 +10785,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) gen_aa32_st32(s, tmp, addr, get_mem_index(s)); tcg_temp_free_i32(tmp); } - if (insn & (1 << 21)) { + if (wback) { /* Base writeback. */ tcg_gen_addi_i32(addr, addr, offset - 4); store_reg(s, rn, addr); @@ -10484,6 +10972,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) } else { int i, loaded_base = 0; TCGv_i32 loaded_var; + bool wback = extract32(insn, 21, 1); /* Load/store multiple. */ addr = load_reg(s, rn); offset = 0; @@ -10491,10 +10980,26 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) if (insn & (1 << i)) offset += 4; } + if (insn & (1 << 24)) { tcg_gen_addi_i32(addr, addr, -offset); } + if (s->v8m_stackcheck && rn == 13 && wback) { + /* + * If the writeback is incrementing SP rather than + * decrementing it, and the initial SP is below the + * stack limit but the final written-back SP would + * be above, then then we must not perform any memory + * accesses, but it is IMPDEF whether we generate + * an exception. We choose to do so in this case. + * At this point 'addr' is the lowest address, so + * either the original SP (if incrementing) or our + * final SP (if decrementing), so that's what we check. + */ + gen_helper_v8m_stackcheck(cpu_env, addr); + } + loaded_var = NULL; for (i = 0; i < 16; i++) { if ((insn & (1 << i)) == 0) @@ -10522,7 +11027,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) if (loaded_base) { store_reg(s, rn, loaded_var); } - if (insn & (1 << 21)) { + if (wback) { /* Base register writeback. */ if (insn & (1 << 24)) { tcg_gen_addi_i32(addr, addr, -offset); @@ -10583,7 +11088,13 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2)) goto illegal_op; tcg_temp_free_i32(tmp2); - if (rd != 15) { + if (rd == 13 && + ((op == 2 && rn == 15) || + (op == 8 && rn == 13) || + (op == 13 && rn == 13))) { + /* MOV SP, ... or ADD SP, SP, ... or SUB SP, SP, ... */ + store_sp_checked(s, tmp); + } else if (rd != 15) { store_reg(s, rd, tmp); } else { tcg_temp_free_i32(tmp); @@ -10600,6 +11111,10 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) tmp2 = load_reg(s, rm); if ((insn & 0x70) != 0) goto illegal_op; + /* + * 0b1111_1010_0xxx_xxxx_1111_xxxx_0000_xxxx: + * - MOV, MOVS (register-shifted register), flagsetting + */ op = (insn >> 21) & 3; logic_cc = (insn & (1 << 20)) != 0; gen_arm_shift_reg(tmp, op, tmp2, logic_cc); @@ -10706,7 +11221,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) case 0x28: case 0x29: case 0x2a: - if (!arm_dc_feature(s, ARM_FEATURE_CRC)) { + if (!dc_isar_feature(aa32_crc32, s)) { goto illegal_op; } break; @@ -10887,7 +11402,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) tmp2 = load_reg(s, rm); if ((op & 0x50) == 0x10) { /* sdiv, udiv */ - if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DIV)) { + if (!dc_isar_feature(thumb_div, s)) { goto illegal_op; } if (op & 0x20) @@ -11267,8 +11782,15 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) gen_jmp(s, s->pc + offset); } } else { - /* Data processing immediate. */ + /* + * 0b1111_0xxx_xxxx_0xxx_xxxx_xxxx + * - Data-processing (modified immediate, plain binary immediate) + */ if (insn & (1 << 25)) { + /* + * 0b1111_0x1x_xxxx_0xxx_xxxx_xxxx + * - Data-processing (plain binary immediate) + */ if (insn & (1 << 24)) { if (insn & (1 << 20)) goto illegal_op; @@ -11364,6 +11886,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, imm); } + store_reg(s, rd, tmp); } else { /* Add/sub 12-bit immediate. */ if (rn == 15) { @@ -11374,17 +11897,27 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) offset += imm; tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, offset); + store_reg(s, rd, tmp); } else { tmp = load_reg(s, rn); if (insn & (1 << 23)) tcg_gen_subi_i32(tmp, tmp, imm); else tcg_gen_addi_i32(tmp, tmp, imm); + if (rn == 13 && rd == 13) { + /* ADD SP, SP, imm or SUB SP, SP, imm */ + store_sp_checked(s, tmp); + } else { + store_reg(s, rd, tmp); + } } } - store_reg(s, rd, tmp); } } else { + /* + * 0b1111_0x0x_xxxx_0xxx_xxxx_xxxx + * - Data-processing (modified immediate) + */ int shifter_out = 0; /* modified 12-bit immediate. */ shift = ((insn & 0x04000000) >> 23) | ((insn & 0x7000) >> 12); @@ -11426,7 +11959,11 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) goto illegal_op; tcg_temp_free_i32(tmp2); rd = (insn >> 8) & 0xf; - if (rd != 15) { + if (rd == 13 && rn == 13 + && (op == 8 || op == 13)) { + /* ADD(S) SP, SP, imm or SUB(S) SP, SP, imm */ + store_sp_checked(s, tmp); + } else if (rd != 15) { store_reg(s, rd, tmp); } else { tcg_temp_free_i32(tmp); @@ -11535,7 +12072,6 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) imm = -imm; /* Fall through. */ case 0xf: /* Pre-increment. */ - tcg_gen_addi_i32(addr, addr, imm); writeback = 1; break; default: @@ -11547,6 +12083,28 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) issinfo = writeback ? ISSInvalid : rs; + if (s->v8m_stackcheck && rn == 13 && writeback) { + /* + * Stackcheck. Here we know 'addr' is the current SP; + * if imm is +ve we're moving SP up, else down. It is + * UNKNOWN whether the limit check triggers when SP starts + * below the limit and ends up above it; we chose to do so. + */ + if ((int32_t)imm < 0) { + TCGv_i32 newsp = tcg_temp_new_i32(); + + tcg_gen_addi_i32(newsp, addr, imm); + gen_helper_v8m_stackcheck(cpu_env, newsp); + tcg_temp_free_i32(newsp); + } else { + gen_helper_v8m_stackcheck(cpu_env, addr); + } + } + + if (writeback && !postinc) { + tcg_gen_addi_i32(addr, addr, imm); + } + if (insn & (1 << 20)) { /* Load. */ tmp = tcg_temp_new_i32(); @@ -11629,7 +12187,11 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn) rd = insn & 7; op = (insn >> 11) & 3; if (op == 3) { - /* add/subtract */ + /* + * 0b0001_1xxx_xxxx_xxxx + * - Add, subtract (three low registers) + * - Add, subtract (two low registers and immediate) + */ rn = (insn >> 3) & 7; tmp = load_reg(s, rn); if (insn & (1 << 10)) { @@ -11666,7 +12228,10 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn) } break; case 2: case 3: - /* arithmetic large immediate */ + /* + * 0b001x_xxxx_xxxx_xxxx + * - Add, subtract, compare, move (one low register and immediate) + */ op = (insn >> 11) & 3; rd = (insn >> 8) & 0x7; if (op == 0) { /* mov */ @@ -11732,7 +12297,12 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn) tmp2 = load_reg(s, rm); tcg_gen_add_i32(tmp, tmp, tmp2); tcg_temp_free_i32(tmp2); - store_reg(s, rd, tmp); + if (rd == 13) { + /* ADD SP, SP, reg */ + store_sp_checked(s, tmp); + } else { + store_reg(s, rd, tmp); + } break; case 1: /* cmp */ tmp = load_reg(s, rd); @@ -11743,7 +12313,12 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn) break; case 2: /* mov/cpy */ tmp = load_reg(s, rm); - store_reg(s, rd, tmp); + if (rd == 13) { + /* MOV SP, reg */ + store_sp_checked(s, tmp); + } else { + store_reg(s, rd, tmp); + } break; case 3: { @@ -11793,7 +12368,10 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn) break; } - /* data processing register */ + /* + * 0b0100_00xx_xxxx_xxxx + * - Data-processing (two low registers) + */ rd = insn & 7; rm = (insn >> 3) & 7; op = (insn >> 6) & 0xf; @@ -12071,7 +12649,10 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn) break; case 10: - /* add to high reg */ + /* + * 0b1010_xxxx_xxxx_xxxx + * - Add PC/SP (immediate) + */ rd = (insn >> 8) & 7; if (insn & (1 << 11)) { /* SP */ @@ -12091,13 +12672,17 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn) op = (insn >> 8) & 0xf; switch (op) { case 0: - /* adjust stack pointer */ + /* + * 0b1011_0000_xxxx_xxxx + * - ADD (SP plus immediate) + * - SUB (SP minus immediate) + */ tmp = load_reg(s, 13); val = (insn & 0x7f) * 4; if (insn & (1 << 7)) val = -(int32_t)val; tcg_gen_addi_i32(tmp, tmp, val); - store_reg(s, 13, tmp); + store_sp_checked(s, tmp); break; case 2: /* sign/zero extend. */ @@ -12114,7 +12699,10 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn) store_reg(s, rd, tmp); break; case 4: case 5: case 0xc: case 0xd: - /* push/pop */ + /* + * 0b1011_x10x_xxxx_xxxx + * - push/pop + */ addr = load_reg(s, 13); if (insn & (1 << 8)) offset = 4; @@ -12127,6 +12715,17 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn) if ((insn & (1 << 11)) == 0) { tcg_gen_addi_i32(addr, addr, -offset); } + + if (s->v8m_stackcheck) { + /* + * Here 'addr' is the lower of "old SP" and "new SP"; + * if this is a pop that starts below the limit and ends + * above it, it is UNKNOWN whether the limit check triggers; + * we choose to trigger. + */ + gen_helper_v8m_stackcheck(cpu_env, addr); + } + for (i = 0; i < 8; i++) { if (insn & (1 << i)) { if (insn & (1 << 11)) { @@ -12423,6 +13022,7 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) CPUARMState *env = cs->env_ptr; ARMCPU *cpu = arm_env_get_cpu(env); + dc->isar = &cpu->isar; dc->pc = dc->base.pc_first; dc->condjmp = 0; @@ -12451,6 +13051,7 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) dc->v7m_handler_mode = ARM_TBFLAG_HANDLER(dc->base.tb->flags); dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) && regime_is_secure(env, dc->mmu_idx); + dc->v8m_stackcheck = ARM_TBFLAG_STACKCHECK(dc->base.tb->flags); dc->cp_regs = cpu->cp_regs; dc->features = env->features; @@ -12539,7 +13140,6 @@ static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu) tcg_gen_movi_i32(tmp, 0); store_cpu_field(tmp, condexec_bits); } - tcg_clear_temp_count(); } static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) @@ -12928,11 +13528,6 @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) translator_loop(ops, &dc.base, cpu, tb); } -static const char *cpu_mode_names[16] = { - "usr", "fiq", "irq", "svc", "???", "???", "mon", "abt", - "???", "???", "hyp", "und", "???", "???", "???", "sys" -}; - void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, int flags) { @@ -12998,7 +13593,7 @@ void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, psr & CPSR_V ? 'V' : '-', psr & CPSR_T ? 'T' : 'A', ns_status, - cpu_mode_names[psr & 0xf], (psr & 0x10) ? 32 : 26); + aarch32_mode_name(psr), (psr & 0x10) ? 32 : 26); } if (flags & CPU_DUMP_FPU) { diff --git a/target/arm/translate.h b/target/arm/translate.h index 45f04244be..1550aa8bc7 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -7,6 +7,7 @@ /* internal defines */ typedef struct DisasContext { DisasContextBase base; + const ARMISARegisters *isar; target_ulong pc; target_ulong page_start; @@ -38,6 +39,7 @@ typedef struct DisasContext { int vec_stride; bool v7m_handler_mode; bool v8m_secure; /* true if v8M and we're in Secure mode */ + bool v8m_stackcheck; /* true if we need to perform v8M stack limit checks */ /* Immediate value in AArch32 SVC insn; must be set if is_jmp == DISAS_SWI * so that top level loop can generate correct syndrome information. */ @@ -189,4 +191,24 @@ static inline TCGv_i32 get_ahp_flag(void) return ret; } + +/* Vector operations shared between ARM and AArch64. */ +extern const GVecGen3 bsl_op; +extern const GVecGen3 bit_op; +extern const GVecGen3 bif_op; +extern const GVecGen3 mla_op[4]; +extern const GVecGen3 mls_op[4]; +extern const GVecGen3 cmtst_op[4]; +extern const GVecGen2i ssra_op[4]; +extern const GVecGen2i usra_op[4]; +extern const GVecGen2i sri_op[4]; +extern const GVecGen2i sli_op[4]; +void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); + +/* + * Forward to the isar_feature_* tests given a DisasContext pointer. + */ +#define dc_isar_feature(name, ctx) \ + ({ DisasContext *ctx_ = (ctx); isar_feature_##name(ctx_->isar); }) + #endif /* TARGET_ARM_TRANSLATE_H */ diff --git a/target/cris/translate.c b/target/cris/translate.c index 4ae1c04daf..11b2c11174 100644 --- a/target/cris/translate.c +++ b/target/cris/translate.c @@ -137,11 +137,7 @@ typedef struct DisasContext { static void gen_BUG(DisasContext *dc, const char *file, int line) { - fprintf(stderr, "BUG: pc=%x %s %d\n", dc->pc, file, line); - if (qemu_log_separate()) { - qemu_log("BUG: pc=%x %s %d\n", dc->pc, file, line); - } - cpu_abort(CPU(dc->cpu), "%s:%d\n", file, line); + cpu_abort(CPU(dc->cpu), "%s:%d pc=%x\n", file, line, dc->pc); } static const char *regnames_v32[] = diff --git a/target/hppa/mem_helper.c b/target/hppa/mem_helper.c index ab160c2a74..aecf3075f6 100644 --- a/target/hppa/mem_helper.c +++ b/target/hppa/mem_helper.c @@ -137,7 +137,8 @@ int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx, if (unlikely(!(prot & type))) { /* The access isn't allowed -- Inst/Data Memory Protection Fault. */ - ret = (type & PAGE_EXEC ? EXCP_IMP : EXCP_DMP); + ret = (type & PAGE_EXEC ? EXCP_IMP : + prot & PAGE_READ ? EXCP_DMP : EXCP_DMAR); goto egress; } diff --git a/target/i386/Makefile.objs b/target/i386/Makefile.objs index 04678f5503..32bf966300 100644 --- a/target/i386/Makefile.objs +++ b/target/i386/Makefile.objs @@ -3,17 +3,20 @@ obj-$(CONFIG_TCG) += translate.o obj-$(CONFIG_TCG) += bpt_helper.o cc_helper.o excp_helper.o fpu_helper.o obj-$(CONFIG_TCG) += int_helper.o mem_helper.o misc_helper.o mpx_helper.o obj-$(CONFIG_TCG) += seg_helper.o smm_helper.o svm_helper.o -obj-$(CONFIG_SOFTMMU) += machine.o arch_memory_mapping.o arch_dump.o monitor.o -obj-$(CONFIG_KVM) += kvm.o hyperv.o -obj-$(CONFIG_SEV) += sev.o +ifeq ($(CONFIG_SOFTMMU),y) +obj-y += machine.o arch_memory_mapping.o arch_dump.o monitor.o +obj-$(CONFIG_KVM) += kvm.o obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o -obj-$(call lnot,$(CONFIG_SEV)) += sev-stub.o -# HAX support -ifdef CONFIG_WIN32 +obj-$(CONFIG_HYPERV) += hyperv.o +obj-$(call lnot,$(CONFIG_HYPERV)) += hyperv-stub.o +ifeq ($(CONFIG_WIN32),y) obj-$(CONFIG_HAX) += hax-all.o hax-mem.o hax-windows.o endif -ifdef CONFIG_DARWIN +ifeq ($(CONFIG_DARWIN),y) obj-$(CONFIG_HAX) += hax-all.o hax-mem.o hax-darwin.o obj-$(CONFIG_HVF) += hvf/ endif obj-$(CONFIG_WHPX) += whpx-all.o +endif +obj-$(CONFIG_SEV) += sev.o +obj-$(call lnot,$(CONFIG_SEV)) += sev-stub.o diff --git a/target/i386/cpu.c b/target/i386/cpu.c index f24295e6e4..1469a1be01 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5123,14 +5123,15 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) * NOTE: the following code has to follow qemu_init_vcpu(). Otherwise * cs->nr_threads hasn't be populated yet and the checking is incorrect. */ - if (IS_AMD_CPU(env) && - !(env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_TOPOEXT) && - cs->nr_threads > 1 && !ht_warned) { - error_report("This family of AMD CPU doesn't support " - "hyperthreading(%d). Please configure -smp " - "options properly or try enabling topoext feature.", - cs->nr_threads); - ht_warned = true; + if (IS_AMD_CPU(env) && + !(env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_TOPOEXT) && + cs->nr_threads > 1 && !ht_warned) { + warn_report("This family of AMD CPU doesn't support " + "hyperthreading(%d)", + cs->nr_threads); + error_printf("Please configure -smp options properly" + " or try enabling topoext feature.\n"); + ht_warned = true; } x86_cpu_apic_realize(cpu, &local_err); @@ -5429,20 +5430,51 @@ static void x86_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb) cpu->env.eip = tb->pc - tb->cs_base; } -static bool x86_cpu_has_work(CPUState *cs) +int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; - return ((cs->interrupt_request & (CPU_INTERRUPT_HARD | - CPU_INTERRUPT_POLL)) && - (env->eflags & IF_MASK)) || - (cs->interrupt_request & (CPU_INTERRUPT_NMI | - CPU_INTERRUPT_INIT | - CPU_INTERRUPT_SIPI | - CPU_INTERRUPT_MCE)) || - ((cs->interrupt_request & CPU_INTERRUPT_SMI) && - !(env->hflags & HF_SMM_MASK)); +#if !defined(CONFIG_USER_ONLY) + if (interrupt_request & CPU_INTERRUPT_POLL) { + return CPU_INTERRUPT_POLL; + } +#endif + if (interrupt_request & CPU_INTERRUPT_SIPI) { + return CPU_INTERRUPT_SIPI; + } + + if (env->hflags2 & HF2_GIF_MASK) { + if ((interrupt_request & CPU_INTERRUPT_SMI) && + !(env->hflags & HF_SMM_MASK)) { + return CPU_INTERRUPT_SMI; + } else if ((interrupt_request & CPU_INTERRUPT_NMI) && + !(env->hflags2 & HF2_NMI_MASK)) { + return CPU_INTERRUPT_NMI; + } else if (interrupt_request & CPU_INTERRUPT_MCE) { + return CPU_INTERRUPT_MCE; + } else if ((interrupt_request & CPU_INTERRUPT_HARD) && + (((env->hflags2 & HF2_VINTR_MASK) && + (env->hflags2 & HF2_HIF_MASK)) || + (!(env->hflags2 & HF2_VINTR_MASK) && + (env->eflags & IF_MASK && + !(env->hflags & HF_INHIBIT_IRQ_MASK))))) { + return CPU_INTERRUPT_HARD; +#if !defined(CONFIG_USER_ONLY) + } else if ((interrupt_request & CPU_INTERRUPT_VIRQ) && + (env->eflags & IF_MASK) && + !(env->hflags & HF_INHIBIT_IRQ_MASK)) { + return CPU_INTERRUPT_VIRQ; +#endif + } + } + + return 0; +} + +static bool x86_cpu_has_work(CPUState *cs) +{ + return x86_cpu_pending_interrupt(cs, cs->interrupt_request) != 0; } static void x86_disas_set_info(CPUState *cs, disassemble_info *info) @@ -5533,6 +5565,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false), DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false), DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false), + DEFINE_PROP_BOOL("hv-ipi", X86CPU, hyperv_ipi, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), @@ -5575,6 +5608,8 @@ static Property x86_cpu_properties[] = { * to the specific Windows version being used." */ DEFINE_PROP_INT32("x-hv-max-vps", X86CPU, hv_max_vps, -1), + DEFINE_PROP_BOOL("x-hv-synic-kvm-only", X86CPU, hyperv_synic_kvm_only, + false), DEFINE_PROP_END_OF_LIST() }; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index b572a8e4aa..663f3a5e67 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -171,7 +171,7 @@ typedef enum X86Seg { #define HF_AC_SHIFT 18 /* must be same as eflags */ #define HF_SMM_SHIFT 19 /* CPU in SMM mode */ #define HF_SVME_SHIFT 20 /* SVME enabled (copy of EFER.SVME) */ -#define HF_SVMI_SHIFT 21 /* SVM intercepts are active */ +#define HF_GUEST_SHIFT 21 /* SVM intercepts are active */ #define HF_OSFXSR_SHIFT 22 /* CR4.OSFXSR */ #define HF_SMAP_SHIFT 23 /* CR4.SMAP */ #define HF_IOBPT_SHIFT 24 /* an io breakpoint enabled */ @@ -196,7 +196,7 @@ typedef enum X86Seg { #define HF_AC_MASK (1 << HF_AC_SHIFT) #define HF_SMM_MASK (1 << HF_SMM_SHIFT) #define HF_SVME_MASK (1 << HF_SVME_SHIFT) -#define HF_SVMI_MASK (1 << HF_SVMI_SHIFT) +#define HF_GUEST_MASK (1 << HF_GUEST_SHIFT) #define HF_OSFXSR_MASK (1 << HF_OSFXSR_SHIFT) #define HF_SMAP_MASK (1 << HF_SMAP_SHIFT) #define HF_IOBPT_MASK (1 << HF_IOBPT_SHIFT) @@ -1327,7 +1327,9 @@ typedef struct CPUX86State { bool tsc_valid; int64_t tsc_khz; int64_t user_tsc_khz; /* for sanity check only */ - void *kvm_xsave_buf; +#if defined(CONFIG_KVM) || defined(CONFIG_HVF) + void *xsave_buf; +#endif #if defined(CONFIG_HVF) HVFX86EmulatorState *hvf_emul; #endif @@ -1376,10 +1378,12 @@ struct X86CPU { bool hyperv_vpindex; bool hyperv_runtime; bool hyperv_synic; + bool hyperv_synic_kvm_only; bool hyperv_stimer; bool hyperv_frequencies; bool hyperv_reenlightenment; bool hyperv_tlbflush; + bool hyperv_ipi; bool check_cpuid; bool enforce_cpuid; bool expose_kvm; @@ -1485,6 +1489,7 @@ extern struct VMStateDescription vmstate_x86_cpu; */ void x86_cpu_do_interrupt(CPUState *cpu); bool x86_cpu_exec_interrupt(CPUState *cpu, int int_req); +int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request); int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu, int cpuid, void *opaque); diff --git a/target/i386/excp_helper.c b/target/i386/excp_helper.c index 37a33d5ae0..49231f6b69 100644 --- a/target/i386/excp_helper.c +++ b/target/i386/excp_helper.c @@ -53,7 +53,7 @@ static int check_exception(CPUX86State *env, int intno, int *error_code, #if !defined(CONFIG_USER_ONLY) if (env->old_exception == EXCP08_DBLE) { - if (env->hflags & HF_SVMI_MASK) { + if (env->hflags & HF_GUEST_MASK) { cpu_vmexit(env, SVM_EXIT_SHUTDOWN, 0, retaddr); /* does not return */ } diff --git a/target/i386/hvf/README.md b/target/i386/hvf/README.md index 0d27a0d52b..2d33477aca 100644 --- a/target/i386/hvf/README.md +++ b/target/i386/hvf/README.md @@ -2,6 +2,6 @@ These sources (and ../hvf-all.c) are adapted from Veertu Inc's vdhh (Veertu Desktop Hosted Hypervisor) (last known location: https://github.com/veertuinc/vdhh) with some minor changes, the most significant of which were: -1. Adapt to our current QEMU's `CPUState` structure and `address_space_rw` API; many struct members have been moved around (emulated x86 state, kvm_xsave_buf) due to historical differences + QEMU needing to handle more emulation targets. +1. Adapt to our current QEMU's `CPUState` structure and `address_space_rw` API; many struct members have been moved around (emulated x86 state, xsave_buf) due to historical differences + QEMU needing to handle more emulation targets. 2. Removal of `apic_page` and hyperv-related functionality. 3. More relaxed use of `qemu_mutex_lock_iothread`. diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index df69e6d0a7..e193022c03 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -72,9 +72,7 @@ #include "sysemu/sysemu.h" #include "target/i386/cpu.h" -pthread_rwlock_t mem_lock = PTHREAD_RWLOCK_INITIALIZER; HVFState *hvf_state; -int hvf_disabled = 1; static void assert_hvf_ok(hv_return_t ret) { @@ -587,7 +585,7 @@ int hvf_init_vcpu(CPUState *cpu) hvf_reset_vcpu(cpu); x86cpu = X86_CPU(cpu); - x86cpu->env.kvm_xsave_buf = qemu_memalign(4096, 4096); + x86cpu->env.xsave_buf = qemu_memalign(4096, 4096); hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1); hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1); @@ -605,11 +603,6 @@ int hvf_init_vcpu(CPUState *cpu) return 0; } -void hvf_disable(int shouldDisable) -{ - hvf_disabled = shouldDisable; -} - static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info) { X86CPU *x86_cpu = X86_CPU(cpu); @@ -935,7 +928,7 @@ int hvf_vcpu_exec(CPUState *cpu) return ret; } -static bool hvf_allowed; +bool hvf_allowed; static int hvf_accel_init(MachineState *ms) { @@ -943,7 +936,6 @@ static int hvf_accel_init(MachineState *ms) hv_return_t ret; HVFState *s; - hvf_disable(0); ret = hv_vm_create(HV_VM_DEFAULT); assert_hvf_ok(ret); diff --git a/target/i386/hvf/x86_decode.c b/target/i386/hvf/x86_decode.c index 2d7540fe7c..2e33b69541 100644 --- a/target/i386/hvf/x86_decode.c +++ b/target/i386/hvf/x86_decode.c @@ -113,7 +113,8 @@ static void decode_modrm_reg(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = decode->modrm.reg; - op->ptr = get_reg_ref(env, op->reg, decode->rex.r, decode->operand_size); + op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.r, + decode->operand_size); } static void decode_rax(CPUX86State *env, struct x86_decode *decode, @@ -121,7 +122,8 @@ static void decode_rax(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = R_EAX; - op->ptr = get_reg_ref(env, op->reg, 0, decode->operand_size); + op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, 0, + decode->operand_size); } static inline void decode_immediate(CPUX86State *env, struct x86_decode *decode, @@ -263,16 +265,16 @@ static void decode_incgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x40; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_decgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x48; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_incgroup2(CPUX86State *env, struct x86_decode *decode) @@ -288,16 +290,16 @@ static void decode_pushgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x50; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_popgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x58; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_jxx(CPUX86State *env, struct x86_decode *decode) @@ -378,16 +380,16 @@ static void decode_xchgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x90; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_movgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0xb8; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); decode_immediate(env, decode, &decode->op[1], decode->operand_size); } @@ -402,8 +404,8 @@ static void decode_movgroup8(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0xb0; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); decode_immediate(env, decode, &decode->op[1], decode->operand_size); } @@ -412,7 +414,8 @@ static void decode_rcx(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = R_ECX; - op->ptr = get_reg_ref(env, op->reg, decode->rex.b, decode->operand_size); + op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.b, + decode->operand_size); } struct decode_tbl { @@ -639,8 +642,8 @@ static void decode_bswap(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[1] - 0xc8; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_d9_4(CPUX86State *env, struct x86_decode *decode) @@ -1686,7 +1689,8 @@ calc_addr: } } -target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size) +target_ulong get_reg_ref(CPUX86State *env, int reg, int rex, int is_extended, + int size) { target_ulong ptr = 0; int which = 0; @@ -1698,7 +1702,7 @@ target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size) switch (size) { case 1: - if (is_extended || reg < 4) { + if (is_extended || reg < 4 || rex) { which = 1; ptr = (target_ulong)&RL(env, reg); } else { @@ -1714,10 +1718,11 @@ target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size) return ptr; } -target_ulong get_reg_val(CPUX86State *env, int reg, int is_extended, int size) +target_ulong get_reg_val(CPUX86State *env, int reg, int rex, int is_extended, + int size) { target_ulong val = 0; - memcpy(&val, (void *)get_reg_ref(env, reg, is_extended, size), size); + memcpy(&val, (void *)get_reg_ref(env, reg, rex, is_extended, size), size); return val; } @@ -1739,7 +1744,8 @@ static target_ulong get_sib_val(CPUX86State *env, struct x86_decode *decode, if (base_reg == R_ESP || base_reg == R_EBP) { *sel = R_SS; } - base = get_reg_val(env, decode->sib.base, decode->rex.b, addr_size); + base = get_reg_val(env, decode->sib.base, decode->rex.rex, + decode->rex.b, addr_size); } if (decode->rex.x) { @@ -1747,7 +1753,8 @@ static target_ulong get_sib_val(CPUX86State *env, struct x86_decode *decode, } if (index_reg != R_ESP) { - scaled_index = get_reg_val(env, index_reg, decode->rex.x, addr_size) << + scaled_index = get_reg_val(env, index_reg, decode->rex.rex, + decode->rex.x, addr_size) << decode->sib.scale; } return base + scaled_index; @@ -1776,7 +1783,8 @@ void calc_modrm_operand32(CPUX86State *env, struct x86_decode *decode, if (decode->modrm.rm == R_EBP || decode->modrm.rm == R_ESP) { seg = R_SS; } - ptr += get_reg_val(env, decode->modrm.rm, decode->rex.b, addr_size); + ptr += get_reg_val(env, decode->modrm.rm, decode->rex.rex, + decode->rex.b, addr_size); } if (X86_DECODE_CMD_LEA == decode->cmd) { @@ -1805,7 +1813,8 @@ void calc_modrm_operand64(CPUX86State *env, struct x86_decode *decode, } else if (0 == mod && 5 == rm) { ptr = RIP(env) + decode->len + (int32_t) offset; } else { - ptr = get_reg_val(env, src, decode->rex.b, 8) + (int64_t) offset; + ptr = get_reg_val(env, src, decode->rex.rex, decode->rex.b, 8) + + (int64_t) offset; } if (X86_DECODE_CMD_LEA == decode->cmd) { @@ -1822,8 +1831,8 @@ void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode, if (3 == decode->modrm.mod) { op->reg = decode->modrm.reg; op->type = X86_VAR_REG; - op->ptr = get_reg_ref(env, decode->modrm.rm, decode->rex.b, - decode->operand_size); + op->ptr = get_reg_ref(env, decode->modrm.rm, decode->rex.rex, + decode->rex.b, decode->operand_size); return; } diff --git a/target/i386/hvf/x86_decode.h b/target/i386/hvf/x86_decode.h index 5ab6f31fa5..ef4bcab310 100644 --- a/target/i386/hvf/x86_decode.h +++ b/target/i386/hvf/x86_decode.h @@ -303,8 +303,10 @@ uint64_t sign(uint64_t val, int size); uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode); -target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size); -target_ulong get_reg_val(CPUX86State *env, int reg, int is_extended, int size); +target_ulong get_reg_ref(CPUX86State *env, int reg, int rex, int is_extended, + int size); +target_ulong get_reg_val(CPUX86State *env, int reg, int rex, int is_extended, + int size); void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode, struct x86_decode_op *op); target_ulong decode_linear_addr(CPUX86State *env, struct x86_decode *decode, diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c index 6c88939b96..df8e946fbc 100644 --- a/target/i386/hvf/x86hvf.c +++ b/target/i386/hvf/x86hvf.c @@ -75,7 +75,7 @@ void hvf_put_xsave(CPUState *cpu_state) struct X86XSaveArea *xsave; - xsave = X86_CPU(cpu_state)->env.kvm_xsave_buf; + xsave = X86_CPU(cpu_state)->env.xsave_buf; x86_cpu_xsave_all_areas(X86_CPU(cpu_state), xsave); @@ -163,7 +163,7 @@ void hvf_get_xsave(CPUState *cpu_state) { struct X86XSaveArea *xsave; - xsave = X86_CPU(cpu_state)->env.kvm_xsave_buf; + xsave = X86_CPU(cpu_state)->env.xsave_buf; if (hv_vcpu_read_fpstate(cpu_state->hvf_fd, (void*)xsave, 4096)) { abort(); diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h index d6d5a79293..8c572cd7c2 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/hyperv-proto.h @@ -1,7 +1,7 @@ /* - * Definitions for Hyper-V guest/hypervisor interaction + * Definitions for Hyper-V guest/hypervisor interaction - x86-specific part * - * Copyright (C) 2017 Parallels International GmbH + * Copyright (c) 2017-2018 Virtuozzo International GmbH. * * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. @@ -10,7 +10,7 @@ #ifndef TARGET_I386_HYPERV_PROTO_H #define TARGET_I386_HYPERV_PROTO_H -#include "qemu/bitmap.h" +#include "hw/hyperv/hyperv-proto.h" #define HV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000 #define HV_CPUID_INTERFACE 0x40000001 @@ -58,6 +58,7 @@ #define HV_APIC_ACCESS_RECOMMENDED (1u << 3) #define HV_SYSTEM_RESET_RECOMMENDED (1u << 4) #define HV_RELAXED_TIMING_RECOMMENDED (1u << 5) +#define HV_CLUSTER_IPI_RECOMMENDED (1u << 10) #define HV_EX_PROCESSOR_MASKS_RECOMMENDED (1u << 11) /* @@ -138,25 +139,6 @@ #define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 /* - * Hypercall status code - */ -#define HV_STATUS_SUCCESS 0 -#define HV_STATUS_INVALID_HYPERCALL_CODE 2 -#define HV_STATUS_INVALID_HYPERCALL_INPUT 3 -#define HV_STATUS_INVALID_ALIGNMENT 4 -#define HV_STATUS_INVALID_PARAMETER 5 -#define HV_STATUS_INSUFFICIENT_MEMORY 11 -#define HV_STATUS_INVALID_CONNECTION_ID 18 -#define HV_STATUS_INSUFFICIENT_BUFFERS 19 - -/* - * Hypercall numbers - */ -#define HV_POST_MESSAGE 0x005c -#define HV_SIGNAL_EVENT 0x005d -#define HV_HYPERCALL_FAST (1u << 16) - -/* * Hypercall MSR bits */ #define HV_HYPERCALL_ENABLE (1u << 0) @@ -165,7 +147,6 @@ * Synthetic interrupt controller definitions */ #define HV_SYNIC_VERSION 1 -#define HV_SINT_COUNT 16 #define HV_SYNIC_ENABLE (1u << 0) #define HV_SIMP_ENABLE (1u << 0) #define HV_SIEFP_ENABLE (1u << 0) @@ -175,94 +156,5 @@ #define HV_STIMER_COUNT 4 -/* - * Message size - */ -#define HV_MESSAGE_PAYLOAD_SIZE 240 - -/* - * Message types - */ -#define HV_MESSAGE_NONE 0x00000000 -#define HV_MESSAGE_VMBUS 0x00000001 -#define HV_MESSAGE_UNMAPPED_GPA 0x80000000 -#define HV_MESSAGE_GPA_INTERCEPT 0x80000001 -#define HV_MESSAGE_TIMER_EXPIRED 0x80000010 -#define HV_MESSAGE_INVALID_VP_REGISTER_VALUE 0x80000020 -#define HV_MESSAGE_UNRECOVERABLE_EXCEPTION 0x80000021 -#define HV_MESSAGE_UNSUPPORTED_FEATURE 0x80000022 -#define HV_MESSAGE_EVENTLOG_BUFFERCOMPLETE 0x80000040 -#define HV_MESSAGE_X64_IOPORT_INTERCEPT 0x80010000 -#define HV_MESSAGE_X64_MSR_INTERCEPT 0x80010001 -#define HV_MESSAGE_X64_CPUID_INTERCEPT 0x80010002 -#define HV_MESSAGE_X64_EXCEPTION_INTERCEPT 0x80010003 -#define HV_MESSAGE_X64_APIC_EOI 0x80010004 -#define HV_MESSAGE_X64_LEGACY_FP_ERROR 0x80010005 - -/* - * Message flags - */ -#define HV_MESSAGE_FLAG_PENDING 0x1 - -/* - * Event flags number per SINT - */ -#define HV_EVENT_FLAGS_COUNT (256 * 8) - -/* - * Connection id valid bits - */ -#define HV_CONNECTION_ID_MASK 0x00ffffff - -/* - * Input structure for POST_MESSAGE hypercall - */ -struct hyperv_post_message_input { - uint32_t connection_id; - uint32_t _reserved; - uint32_t message_type; - uint32_t payload_size; - uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE]; -}; - -/* - * Input structure for SIGNAL_EVENT hypercall - */ -struct hyperv_signal_event_input { - uint32_t connection_id; - uint16_t flag_number; - uint16_t _reserved_zero; -}; - -/* - * SynIC message structures - */ -struct hyperv_message_header { - uint32_t message_type; - uint8_t payload_size; - uint8_t message_flags; /* HV_MESSAGE_FLAG_XX */ - uint8_t _reserved[2]; - uint64_t sender; -}; - -struct hyperv_message { - struct hyperv_message_header header; - uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE]; -}; - -struct hyperv_message_page { - struct hyperv_message slot[HV_SINT_COUNT]; -}; - -/* - * SynIC event flags structures - */ -struct hyperv_event_flags { - DECLARE_BITMAP(flags, HV_EVENT_FLAGS_COUNT); -}; - -struct hyperv_event_flags_page { - struct hyperv_event_flags slot[HV_SINT_COUNT]; -}; #endif diff --git a/target/i386/hyperv-stub.c b/target/i386/hyperv-stub.c new file mode 100644 index 0000000000..fe548cbae2 --- /dev/null +++ b/target/i386/hyperv-stub.c @@ -0,0 +1,48 @@ +/* + * Stubs for CONFIG_HYPERV=n + * + * Copyright (c) 2015-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hyperv.h" + +#ifdef CONFIG_KVM +int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) +{ + switch (exit->type) { + case KVM_EXIT_HYPERV_SYNIC: + if (!cpu->hyperv_synic) { + return -1; + } + + /* + * Tracking the changes in the MSRs is unnecessary as there are no + * users for them beside save/load, which is handled nicely by the + * generic MSR save/load code + */ + return 0; + case KVM_EXIT_HYPERV_HCALL: + exit->u.hcall.result = HV_STATUS_INVALID_HYPERCALL_CODE; + return 0; + default: + return -1; + } +} +#endif + +int hyperv_x86_synic_add(X86CPU *cpu) +{ + return -ENOSYS; +} + +void hyperv_x86_synic_reset(X86CPU *cpu) +{ +} + +void hyperv_x86_synic_update(X86CPU *cpu) +{ +} diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c index 3065d765ed..b264a28620 100644 --- a/target/i386/hyperv.c +++ b/target/i386/hyperv.c @@ -14,16 +14,36 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" #include "hyperv.h" +#include "hw/hyperv/hyperv.h" #include "hyperv-proto.h" -uint32_t hyperv_vp_index(X86CPU *cpu) +int hyperv_x86_synic_add(X86CPU *cpu) { - return CPU(cpu)->cpu_index; + hyperv_synic_add(CPU(cpu)); + return 0; } -X86CPU *hyperv_find_vcpu(uint32_t vp_index) +void hyperv_x86_synic_reset(X86CPU *cpu) { - return X86_CPU(qemu_get_cpu(vp_index)); + hyperv_synic_reset(CPU(cpu)); +} + +void hyperv_x86_synic_update(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + bool enable = env->msr_hv_synic_control & HV_SYNIC_ENABLE; + hwaddr msg_page_addr = (env->msr_hv_synic_msg_page & HV_SIMP_ENABLE) ? + (env->msr_hv_synic_msg_page & TARGET_PAGE_MASK) : 0; + hwaddr event_page_addr = (env->msr_hv_synic_evt_page & HV_SIEFP_ENABLE) ? + (env->msr_hv_synic_evt_page & TARGET_PAGE_MASK) : 0; + hyperv_synic_update(CPU(cpu), enable, msg_page_addr, event_page_addr); +} + +static void async_synic_update(CPUState *cs, run_on_cpu_data data) +{ + qemu_mutex_lock_iothread(); + hyperv_x86_synic_update(X86_CPU(cs)); + qemu_mutex_unlock_iothread(); } int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) @@ -36,11 +56,6 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) return -1; } - /* - * For now just track changes in SynIC control and msg/evt pages msr's. - * When SynIC messaging/events processing will be added in future - * here we will do messages queues flushing and pages remapping. - */ switch (exit->u.synic.msr) { case HV_X64_MSR_SCONTROL: env->msr_hv_synic_control = exit->u.synic.control; @@ -54,98 +69,33 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) default: return -1; } + + /* + * this will run in this cpu thread before it returns to KVM, but in a + * safe environment (i.e. when all cpus are quiescent) -- this is + * necessary because memory hierarchy is being changed + */ + async_safe_run_on_cpu(CPU(cpu), async_synic_update, RUN_ON_CPU_NULL); + return 0; case KVM_EXIT_HYPERV_HCALL: { - uint16_t code; + uint16_t code = exit->u.hcall.input & 0xffff; + bool fast = exit->u.hcall.input & HV_HYPERCALL_FAST; + uint64_t param = exit->u.hcall.params[0]; - code = exit->u.hcall.input & 0xffff; switch (code) { case HV_POST_MESSAGE: + exit->u.hcall.result = hyperv_hcall_post_message(param, fast); + break; case HV_SIGNAL_EVENT: + exit->u.hcall.result = hyperv_hcall_signal_event(param, fast); + break; default: exit->u.hcall.result = HV_STATUS_INVALID_HYPERCALL_CODE; - return 0; } + return 0; } default: return -1; } } - -static void kvm_hv_sint_ack_handler(EventNotifier *notifier) -{ - HvSintRoute *sint_route = container_of(notifier, HvSintRoute, - sint_ack_notifier); - event_notifier_test_and_clear(notifier); - if (sint_route->sint_ack_clb) { - sint_route->sint_ack_clb(sint_route); - } -} - -HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, - HvSintAckClb sint_ack_clb) -{ - HvSintRoute *sint_route; - int r, gsi; - - sint_route = g_malloc0(sizeof(*sint_route)); - r = event_notifier_init(&sint_route->sint_set_notifier, false); - if (r) { - goto err; - } - - r = event_notifier_init(&sint_route->sint_ack_notifier, false); - if (r) { - goto err_sint_set_notifier; - } - - event_notifier_set_handler(&sint_route->sint_ack_notifier, - kvm_hv_sint_ack_handler); - - gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint); - if (gsi < 0) { - goto err_gsi; - } - - r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, - &sint_route->sint_set_notifier, - &sint_route->sint_ack_notifier, gsi); - if (r) { - goto err_irqfd; - } - sint_route->gsi = gsi; - sint_route->sint_ack_clb = sint_ack_clb; - sint_route->vp_index = vp_index; - sint_route->sint = sint; - - return sint_route; - -err_irqfd: - kvm_irqchip_release_virq(kvm_state, gsi); -err_gsi: - event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); - event_notifier_cleanup(&sint_route->sint_ack_notifier); -err_sint_set_notifier: - event_notifier_cleanup(&sint_route->sint_set_notifier); -err: - g_free(sint_route); - - return NULL; -} - -void kvm_hv_sint_route_destroy(HvSintRoute *sint_route) -{ - kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, - &sint_route->sint_set_notifier, - sint_route->gsi); - kvm_irqchip_release_virq(kvm_state, sint_route->gsi); - event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); - event_notifier_cleanup(&sint_route->sint_ack_notifier); - event_notifier_cleanup(&sint_route->sint_set_notifier); - g_free(sint_route); -} - -int kvm_hv_sint_route_set_sint(HvSintRoute *sint_route) -{ - return event_notifier_set(&sint_route->sint_set_notifier); -} diff --git a/target/i386/hyperv.h b/target/i386/hyperv.h index 00c9b454bb..67543296c3 100644 --- a/target/i386/hyperv.h +++ b/target/i386/hyperv.h @@ -16,30 +16,14 @@ #include "cpu.h" #include "sysemu/kvm.h" -#include "qemu/event_notifier.h" - -typedef struct HvSintRoute HvSintRoute; -typedef void (*HvSintAckClb)(HvSintRoute *sint_route); - -struct HvSintRoute { - uint32_t sint; - uint32_t vp_index; - int gsi; - EventNotifier sint_set_notifier; - EventNotifier sint_ack_notifier; - HvSintAckClb sint_ack_clb; -}; +#include "hw/hyperv/hyperv.h" +#ifdef CONFIG_KVM int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit); +#endif -HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, - HvSintAckClb sint_ack_clb); - -void kvm_hv_sint_route_destroy(HvSintRoute *sint_route); - -int kvm_hv_sint_route_set_sint(HvSintRoute *sint_route); - -uint32_t hyperv_vp_index(X86CPU *cpu); -X86CPU *hyperv_find_vcpu(uint32_t vp_index); +int hyperv_x86_synic_add(X86CPU *cpu); +void hyperv_x86_synic_reset(X86CPU *cpu); +void hyperv_x86_synic_update(X86CPU *cpu); #endif diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 0b2a07d3a4..115d8b4c14 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -608,7 +608,8 @@ static bool hyperv_enabled(X86CPU *cpu) cpu->hyperv_synic || cpu->hyperv_stimer || cpu->hyperv_reenlightenment || - cpu->hyperv_tlbflush); + cpu->hyperv_tlbflush || + cpu->hyperv_ipi); } static int kvm_arch_set_tsc_khz(CPUState *cs) @@ -733,9 +734,20 @@ static int hyperv_handle_properties(CPUState *cs) env->features[FEAT_HYPERV_EAX] |= HV_VP_RUNTIME_AVAILABLE; } if (cpu->hyperv_synic) { - if (!has_msr_hv_synic || - kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_SYNIC, 0)) { - fprintf(stderr, "Hyper-V SynIC is not supported by kernel\n"); + unsigned int cap = KVM_CAP_HYPERV_SYNIC; + if (!cpu->hyperv_synic_kvm_only) { + if (!cpu->hyperv_vpindex) { + fprintf(stderr, "Hyper-V SynIC " + "(requested by 'hv-synic' cpu flag) " + "requires Hyper-V VP_INDEX ('hv-vpindex')\n"); + return -ENOSYS; + } + cap = KVM_CAP_HYPERV_SYNIC2; + } + + if (!has_msr_hv_synic || !kvm_check_extension(cs->kvm_state, cap)) { + fprintf(stderr, "Hyper-V SynIC (requested by 'hv-synic' cpu flag) " + "is not supported by kernel\n"); return -ENOSYS; } @@ -753,12 +765,14 @@ static int hyperv_handle_properties(CPUState *cs) static int hyperv_init_vcpu(X86CPU *cpu) { + CPUState *cs = CPU(cpu); + int ret; + if (cpu->hyperv_vpindex && !hv_vpindex_settable) { /* * the kernel doesn't support setting vp_index; assert that its value * is in sync */ - int ret; struct { struct kvm_msrs info; struct kvm_msr_entry entries[1]; @@ -767,18 +781,38 @@ static int hyperv_init_vcpu(X86CPU *cpu) .entries[0].index = HV_X64_MSR_VP_INDEX, }; - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); + ret = kvm_vcpu_ioctl(cs, KVM_GET_MSRS, &msr_data); if (ret < 0) { return ret; } assert(ret == 1); - if (msr_data.entries[0].data != hyperv_vp_index(cpu)) { + if (msr_data.entries[0].data != hyperv_vp_index(CPU(cpu))) { error_report("kernel's vp_index != QEMU's vp_index"); return -ENXIO; } } + if (cpu->hyperv_synic) { + uint32_t synic_cap = cpu->hyperv_synic_kvm_only ? + KVM_CAP_HYPERV_SYNIC : KVM_CAP_HYPERV_SYNIC2; + ret = kvm_vcpu_enable_cap(cs, synic_cap, 0); + if (ret < 0) { + error_report("failed to turn on HyperV SynIC in KVM: %s", + strerror(-ret)); + return ret; + } + + if (!cpu->hyperv_synic_kvm_only) { + ret = hyperv_x86_synic_add(cpu); + if (ret < 0) { + error_report("failed to create HyperV SynIC: %s", + strerror(-ret)); + return ret; + } + } + } + return 0; } @@ -888,6 +922,17 @@ int kvm_arch_init_vcpu(CPUState *cs) c->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; } + if (cpu->hyperv_ipi) { + if (kvm_check_extension(cs->kvm_state, + KVM_CAP_HYPERV_SEND_IPI) <= 0) { + fprintf(stderr, "Hyper-V IPI send support " + "(requested by 'hv-ipi' cpu flag) " + " is not supported by kernel\n"); + return -ENOSYS; + } + c->eax |= HV_CLUSTER_IPI_RECOMMENDED; + c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; + } c->ebx = cpu->hyperv_spinlock_attempts; @@ -1153,7 +1198,7 @@ int kvm_arch_init_vcpu(CPUState *cs) if (local_err) { error_report_err(local_err); error_free(invtsc_mig_blocker); - goto fail; + return r; } /* for savevm */ vmstate_x86_cpu.unmigratable = 1; @@ -1189,7 +1234,7 @@ int kvm_arch_init_vcpu(CPUState *cs) } if (has_xsave) { - env->kvm_xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave)); + env->xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave)); } cpu->kvm_msr_buf = g_malloc0(MSR_BUF_SIZE); @@ -1226,6 +1271,8 @@ void kvm_arch_reset_vcpu(X86CPU *cpu) for (i = 0; i < ARRAY_SIZE(env->msr_hv_synic_sint); i++) { env->msr_hv_synic_sint[i] = HV_SINT_MASKED; } + + hyperv_x86_synic_reset(cpu); } } @@ -1639,7 +1686,7 @@ ASSERT_OFFSET(XSAVE_PKRU, pkru_state); static int kvm_put_xsave(X86CPU *cpu) { CPUX86State *env = &cpu->env; - X86XSaveArea *xsave = env->kvm_xsave_buf; + X86XSaveArea *xsave = env->xsave_buf; if (!has_xsave) { return kvm_put_fpu(cpu); @@ -1937,7 +1984,8 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_add(cpu, HV_X64_MSR_VP_RUNTIME, env->msr_hv_runtime); } if (cpu->hyperv_vpindex && hv_vpindex_settable) { - kvm_msr_entry_add(cpu, HV_X64_MSR_VP_INDEX, hyperv_vp_index(cpu)); + kvm_msr_entry_add(cpu, HV_X64_MSR_VP_INDEX, + hyperv_vp_index(CPU(cpu))); } if (cpu->hyperv_synic) { int j; @@ -2081,7 +2129,7 @@ static int kvm_get_fpu(X86CPU *cpu) static int kvm_get_xsave(X86CPU *cpu) { CPUX86State *env = &cpu->env; - X86XSaveArea *xsave = env->kvm_xsave_buf; + X86XSaveArea *xsave = env->xsave_buf; int ret; if (!has_xsave) { @@ -2686,7 +2734,6 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) events.exception.nr = env->exception_injected; events.exception.has_error_code = env->has_error_code; events.exception.error_code = env->error_code; - events.exception.pad = 0; events.interrupt.injected = (env->interrupt_injected >= 0); events.interrupt.nr = env->interrupt_injected; @@ -2695,7 +2742,6 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) events.nmi.injected = env->nmi_injected; events.nmi.pending = env->nmi_pending; events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK); - events.nmi.pad = 0; events.sipi_vector = env->sipi_vector; events.flags = 0; @@ -3669,6 +3715,10 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, MSIMessage src, dst; X86IOMMUClass *class = X86_IOMMU_GET_CLASS(iommu); + if (!class->int_remap) { + return 0; + } + src.address = route->u.msi.address_hi; src.address <<= VTD_MSI_ADDR_HI_SHIFT; src.address |= route->u.msi.address_lo; diff --git a/target/i386/machine.c b/target/i386/machine.c index 084c2c73a8..225b5d433b 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -7,6 +7,7 @@ #include "hw/i386/pc.h" #include "hw/isa/isa.h" #include "migration/cpu.h" +#include "hyperv.h" #include "sysemu/kvm.h" @@ -672,11 +673,19 @@ static bool hyperv_synic_enable_needed(void *opaque) return false; } +static int hyperv_synic_post_load(void *opaque, int version_id) +{ + X86CPU *cpu = opaque; + hyperv_x86_synic_update(cpu); + return 0; +} + static const VMStateDescription vmstate_msr_hyperv_synic = { .name = "cpu/msr_hyperv_synic", .version_id = 1, .minimum_version_id = 1, .needed = hyperv_synic_enable_needed, + .post_load = hyperv_synic_post_load, .fields = (VMStateField[]) { VMSTATE_UINT64(env.msr_hv_synic_control, X86CPU), VMSTATE_UINT64(env.msr_hv_synic_evt_page, X86CPU), diff --git a/target/i386/mem_helper.c b/target/i386/mem_helper.c index 30c26b9d9c..6cc53bcb40 100644 --- a/target/i386/mem_helper.c +++ b/target/i386/mem_helper.c @@ -23,6 +23,7 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "qemu/int128.h" +#include "qemu/atomic128.h" #include "tcg.h" void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0) @@ -137,10 +138,7 @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0) if ((a0 & 0xf) != 0) { raise_exception_ra(env, EXCP0D_GPF, ra); - } else { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else + } else if (HAVE_CMPXCHG128) { int eflags = cpu_cc_compute_all(env, CC_OP); Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]); @@ -159,7 +157,8 @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0) eflags &= ~CC_Z; } CC_SRC = eflags; -#endif + } else { + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); } } #endif diff --git a/target/i386/seg_helper.c b/target/i386/seg_helper.c index d1cbc6ebf0..33714bc6e1 100644 --- a/target/i386/seg_helper.c +++ b/target/i386/seg_helper.c @@ -1244,7 +1244,7 @@ static void do_interrupt_all(X86CPU *cpu, int intno, int is_int, } if (env->cr[0] & CR0_PE_MASK) { #if !defined(CONFIG_USER_ONLY) - if (env->hflags & HF_SVMI_MASK) { + if (env->hflags & HF_GUEST_MASK) { handle_even_inj(env, intno, is_int, error_code, is_hw, 0); } #endif @@ -1259,7 +1259,7 @@ static void do_interrupt_all(X86CPU *cpu, int intno, int is_int, } } else { #if !defined(CONFIG_USER_ONLY) - if (env->hflags & HF_SVMI_MASK) { + if (env->hflags & HF_GUEST_MASK) { handle_even_inj(env, intno, is_int, error_code, is_hw, 1); } #endif @@ -1267,7 +1267,7 @@ static void do_interrupt_all(X86CPU *cpu, int intno, int is_int, } #if !defined(CONFIG_USER_ONLY) - if (env->hflags & HF_SVMI_MASK) { + if (env->hflags & HF_GUEST_MASK) { CPUState *cs = CPU(cpu); uint32_t event_inj = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, @@ -1319,74 +1319,66 @@ bool x86_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; - bool ret = false; + int intno; + interrupt_request = x86_cpu_pending_interrupt(cs, interrupt_request); + if (!interrupt_request) { + return false; + } + + /* Don't process multiple interrupt requests in a single call. + * This is required to make icount-driven execution deterministic. + */ + switch (interrupt_request) { #if !defined(CONFIG_USER_ONLY) - if (interrupt_request & CPU_INTERRUPT_POLL) { + case CPU_INTERRUPT_POLL: cs->interrupt_request &= ~CPU_INTERRUPT_POLL; apic_poll_irq(cpu->apic_state); - /* Don't process multiple interrupt requests in a single call. - This is required to make icount-driven execution deterministic. */ - return true; - } + break; #endif - if (interrupt_request & CPU_INTERRUPT_SIPI) { + case CPU_INTERRUPT_SIPI: do_cpu_sipi(cpu); - ret = true; - } else if (env->hflags2 & HF2_GIF_MASK) { - if ((interrupt_request & CPU_INTERRUPT_SMI) && - !(env->hflags & HF_SMM_MASK)) { - cpu_svm_check_intercept_param(env, SVM_EXIT_SMI, 0, 0); - cs->interrupt_request &= ~CPU_INTERRUPT_SMI; - do_smm_enter(cpu); - ret = true; - } else if ((interrupt_request & CPU_INTERRUPT_NMI) && - !(env->hflags2 & HF2_NMI_MASK)) { - cpu_svm_check_intercept_param(env, SVM_EXIT_NMI, 0, 0); - cs->interrupt_request &= ~CPU_INTERRUPT_NMI; - env->hflags2 |= HF2_NMI_MASK; - do_interrupt_x86_hardirq(env, EXCP02_NMI, 1); - ret = true; - } else if (interrupt_request & CPU_INTERRUPT_MCE) { - cs->interrupt_request &= ~CPU_INTERRUPT_MCE; - do_interrupt_x86_hardirq(env, EXCP12_MCHK, 0); - ret = true; - } else if ((interrupt_request & CPU_INTERRUPT_HARD) && - (((env->hflags2 & HF2_VINTR_MASK) && - (env->hflags2 & HF2_HIF_MASK)) || - (!(env->hflags2 & HF2_VINTR_MASK) && - (env->eflags & IF_MASK && - !(env->hflags & HF_INHIBIT_IRQ_MASK))))) { - int intno; - cpu_svm_check_intercept_param(env, SVM_EXIT_INTR, 0, 0); - cs->interrupt_request &= ~(CPU_INTERRUPT_HARD | - CPU_INTERRUPT_VIRQ); - intno = cpu_get_pic_interrupt(env); - qemu_log_mask(CPU_LOG_TB_IN_ASM, - "Servicing hardware INT=0x%02x\n", intno); - do_interrupt_x86_hardirq(env, intno, 1); - /* ensure that no TB jump will be modified as - the program flow was changed */ - ret = true; + break; + case CPU_INTERRUPT_SMI: + cpu_svm_check_intercept_param(env, SVM_EXIT_SMI, 0, 0); + cs->interrupt_request &= ~CPU_INTERRUPT_SMI; + do_smm_enter(cpu); + break; + case CPU_INTERRUPT_NMI: + cpu_svm_check_intercept_param(env, SVM_EXIT_NMI, 0, 0); + cs->interrupt_request &= ~CPU_INTERRUPT_NMI; + env->hflags2 |= HF2_NMI_MASK; + do_interrupt_x86_hardirq(env, EXCP02_NMI, 1); + break; + case CPU_INTERRUPT_MCE: + cs->interrupt_request &= ~CPU_INTERRUPT_MCE; + do_interrupt_x86_hardirq(env, EXCP12_MCHK, 0); + break; + case CPU_INTERRUPT_HARD: + cpu_svm_check_intercept_param(env, SVM_EXIT_INTR, 0, 0); + cs->interrupt_request &= ~(CPU_INTERRUPT_HARD | + CPU_INTERRUPT_VIRQ); + intno = cpu_get_pic_interrupt(env); + qemu_log_mask(CPU_LOG_TB_IN_ASM, + "Servicing hardware INT=0x%02x\n", intno); + do_interrupt_x86_hardirq(env, intno, 1); + break; #if !defined(CONFIG_USER_ONLY) - } else if ((interrupt_request & CPU_INTERRUPT_VIRQ) && - (env->eflags & IF_MASK) && - !(env->hflags & HF_INHIBIT_IRQ_MASK)) { - int intno; - /* FIXME: this should respect TPR */ - cpu_svm_check_intercept_param(env, SVM_EXIT_VINTR, 0, 0); - intno = x86_ldl_phys(cs, env->vm_vmcb + case CPU_INTERRUPT_VIRQ: + /* FIXME: this should respect TPR */ + cpu_svm_check_intercept_param(env, SVM_EXIT_VINTR, 0, 0); + intno = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.int_vector)); - qemu_log_mask(CPU_LOG_TB_IN_ASM, - "Servicing virtual hardware INT=0x%02x\n", intno); - do_interrupt_x86_hardirq(env, intno, 1); - cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ; - ret = true; + qemu_log_mask(CPU_LOG_TB_IN_ASM, + "Servicing virtual hardware INT=0x%02x\n", intno); + do_interrupt_x86_hardirq(env, intno, 1); + cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ; + break; #endif - } } - return ret; + /* Ensure that no TB jump will be modified as the program flow was changed. */ + return true; } void helper_lldt(CPUX86State *env, int selector) diff --git a/target/i386/svm_helper.c b/target/i386/svm_helper.c index 342ece082f..9fd22a883b 100644 --- a/target/i386/svm_helper.c +++ b/target/i386/svm_helper.c @@ -228,7 +228,7 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) } /* enable intercepts */ - env->hflags |= HF_SVMI_MASK; + env->hflags |= HF_GUEST_MASK; env->tsc_offset = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.tsc_offset)); @@ -503,7 +503,7 @@ void cpu_svm_check_intercept_param(CPUX86State *env, uint32_t type, { CPUState *cs = CPU(x86_env_get_cpu(env)); - if (likely(!(env->hflags & HF_SVMI_MASK))) { + if (likely(!(env->hflags & HF_GUEST_MASK))) { return; } switch (type) { @@ -697,7 +697,7 @@ void do_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1) /* Reload the host state from vm_hsave */ env->hflags2 &= ~(HF2_HIF_MASK | HF2_VINTR_MASK); - env->hflags &= ~HF_SVMI_MASK; + env->hflags &= ~HF_GUEST_MASK; env->intercept = 0; env->intercept_exceptions = 0; cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ; diff --git a/target/i386/translate.c b/target/i386/translate.c index 1f9d1d9b24..83c1ebe491 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -72,27 +72,15 @@ //#define MACRO_TEST 1 /* global register indexes */ -static TCGv cpu_A0; -static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT; +static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2; static TCGv_i32 cpu_cc_op; static TCGv cpu_regs[CPU_NB_REGS]; static TCGv cpu_seg_base[6]; static TCGv_i64 cpu_bndl[4]; static TCGv_i64 cpu_bndu[4]; -/* local temps */ -static TCGv cpu_T0, cpu_T1; -/* local register indexes (only used inside old micro ops) */ -static TCGv cpu_tmp0, cpu_tmp4; -static TCGv_ptr cpu_ptr0, cpu_ptr1; -static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32; -static TCGv_i64 cpu_tmp1_i64; #include "exec/gen-icount.h" -#ifdef TARGET_X86_64 -static int x86_64_hregs; -#endif - typedef struct DisasContext { DisasContextBase base; @@ -117,6 +105,9 @@ typedef struct DisasContext { int ss32; /* 32 bit stack segment */ CCOp cc_op; /* current CC operation */ bool cc_op_dirty; +#ifdef TARGET_X86_64 + bool x86_64_hregs; +#endif int addseg; /* non zero if either DS/ES/SS have a non zero base */ int f_st; /* currently unused */ int vm86; /* vm86 mode */ @@ -135,6 +126,22 @@ typedef struct DisasContext { int cpuid_ext3_features; int cpuid_7_0_ebx_features; int cpuid_xsave_features; + + /* TCG local temps */ + TCGv cc_srcT; + TCGv A0; + TCGv T0; + TCGv T1; + + /* TCG local register indexes (only used inside old micro ops) */ + TCGv tmp0; + TCGv tmp4; + TCGv_ptr ptr0; + TCGv_ptr ptr1; + TCGv_i32 tmp2_i32; + TCGv_i32 tmp3_i32; + TCGv_i64 tmp1_i64; + sigjmp_buf jmpbuf; } DisasContext; @@ -244,7 +251,7 @@ static void set_cc_op(DisasContext *s, CCOp op) tcg_gen_discard_tl(cpu_cc_src2); } if (dead & USES_CC_SRCT) { - tcg_gen_discard_tl(cpu_cc_srcT); + tcg_gen_discard_tl(s->cc_srcT); } if (op == CC_OP_DYNAMIC) { @@ -299,13 +306,13 @@ static void gen_update_cc_op(DisasContext *s) * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return * true for this special case, false otherwise. */ -static inline bool byte_reg_is_xH(int reg) +static inline bool byte_reg_is_xH(DisasContext *s, int reg) { if (reg < 4) { return false; } #ifdef TARGET_X86_64 - if (reg >= 8 || x86_64_hregs) { + if (reg >= 8 || s->x86_64_hregs) { return false; } #endif @@ -352,11 +359,11 @@ static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot) return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8; } -static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0) +static void gen_op_mov_reg_v(DisasContext *s, TCGMemOp ot, int reg, TCGv t0) { switch(ot) { case MO_8: - if (!byte_reg_is_xH(reg)) { + if (!byte_reg_is_xH(s, reg)) { tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8); } else { tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8); @@ -380,9 +387,10 @@ static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0) } } -static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg) +static inline +void gen_op_mov_v_reg(DisasContext *s, TCGMemOp ot, TCGv t0, int reg) { - if (ot == MO_8 && byte_reg_is_xH(reg)) { + if (ot == MO_8 && byte_reg_is_xH(s, reg)) { tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8); } else { tcg_gen_mov_tl(t0, cpu_regs[reg]); @@ -391,9 +399,9 @@ static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg) static void gen_add_A0_im(DisasContext *s, int val) { - tcg_gen_addi_tl(cpu_A0, cpu_A0, val); + tcg_gen_addi_tl(s->A0, s->A0, val); if (!CODE64(s)) { - tcg_gen_ext32u_tl(cpu_A0, cpu_A0); + tcg_gen_ext32u_tl(s->A0, s->A0); } } @@ -402,16 +410,17 @@ static inline void gen_op_jmp_v(TCGv dest) tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip)); } -static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val) +static inline +void gen_op_add_reg_im(DisasContext *s, TCGMemOp size, int reg, int32_t val) { - tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val); - gen_op_mov_reg_v(size, reg, cpu_tmp0); + tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val); + gen_op_mov_reg_v(s, size, reg, s->tmp0); } -static inline void gen_op_add_reg_T0(TCGMemOp size, int reg) +static inline void gen_op_add_reg_T0(DisasContext *s, TCGMemOp size, int reg) { - tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T0); - gen_op_mov_reg_v(size, reg, cpu_tmp0); + tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0); + gen_op_mov_reg_v(s, size, reg, s->tmp0); } static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0) @@ -427,16 +436,16 @@ static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0) static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d) { if (d == OR_TMP0) { - gen_op_st_v(s, idx, cpu_T0, cpu_A0); + gen_op_st_v(s, idx, s->T0, s->A0); } else { - gen_op_mov_reg_v(idx, d, cpu_T0); + gen_op_mov_reg_v(s, idx, d, s->T0); } } -static inline void gen_jmp_im(target_ulong pc) +static inline void gen_jmp_im(DisasContext *s, target_ulong pc) { - tcg_gen_movi_tl(cpu_tmp0, pc); - gen_op_jmp_v(cpu_tmp0); + tcg_gen_movi_tl(s->tmp0, pc); + gen_op_jmp_v(s->tmp0); } /* Compute SEG:REG into A0. SEG is selected from the override segment @@ -449,7 +458,7 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0, #ifdef TARGET_X86_64 case MO_64: if (ovr_seg < 0) { - tcg_gen_mov_tl(cpu_A0, a0); + tcg_gen_mov_tl(s->A0, a0); return; } break; @@ -460,14 +469,14 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0, ovr_seg = def_seg; } if (ovr_seg < 0) { - tcg_gen_ext32u_tl(cpu_A0, a0); + tcg_gen_ext32u_tl(s->A0, a0); return; } break; case MO_16: /* 16 bit address */ - tcg_gen_ext16u_tl(cpu_A0, a0); - a0 = cpu_A0; + tcg_gen_ext16u_tl(s->A0, a0); + a0 = s->A0; if (ovr_seg < 0) { if (s->addseg) { ovr_seg = def_seg; @@ -484,13 +493,13 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0, TCGv seg = cpu_seg_base[ovr_seg]; if (aflag == MO_64) { - tcg_gen_add_tl(cpu_A0, a0, seg); + tcg_gen_add_tl(s->A0, a0, seg); } else if (CODE64(s)) { - tcg_gen_ext32u_tl(cpu_A0, a0); - tcg_gen_add_tl(cpu_A0, cpu_A0, seg); + tcg_gen_ext32u_tl(s->A0, a0); + tcg_gen_add_tl(s->A0, s->A0, seg); } else { - tcg_gen_add_tl(cpu_A0, a0, seg); - tcg_gen_ext32u_tl(cpu_A0, cpu_A0); + tcg_gen_add_tl(s->A0, a0, seg); + tcg_gen_ext32u_tl(s->A0, s->A0); } } } @@ -505,10 +514,10 @@ static inline void gen_string_movl_A0_EDI(DisasContext *s) gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1); } -static inline void gen_op_movl_T0_Dshift(TCGMemOp ot) +static inline void gen_op_movl_T0_Dshift(DisasContext *s, TCGMemOp ot) { - tcg_gen_ld32s_tl(cpu_T0, cpu_env, offsetof(CPUX86State, df)); - tcg_gen_shli_tl(cpu_T0, cpu_T0, ot); + tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df)); + tcg_gen_shli_tl(s->T0, s->T0, ot); }; static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign) @@ -552,18 +561,20 @@ static void gen_exts(TCGMemOp ot, TCGv reg) gen_ext_tl(reg, reg, ot, true); } -static inline void gen_op_jnz_ecx(TCGMemOp size, TCGLabel *label1) +static inline +void gen_op_jnz_ecx(DisasContext *s, TCGMemOp size, TCGLabel *label1) { - tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]); - gen_extu(size, cpu_tmp0); - tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1); + tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]); + gen_extu(size, s->tmp0); + tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1); } -static inline void gen_op_jz_ecx(TCGMemOp size, TCGLabel *label1) +static inline +void gen_op_jz_ecx(DisasContext *s, TCGMemOp size, TCGLabel *label1) { - tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]); - gen_extu(size, cpu_tmp0); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1); + tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]); + gen_extu(size, s->tmp0); + tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1); } static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n) @@ -606,28 +617,28 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip, target_ulong next_eip; if (s->pe && (s->cpl > s->iopl || s->vm86)) { - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); switch (ot) { case MO_8: - gen_helper_check_iob(cpu_env, cpu_tmp2_i32); + gen_helper_check_iob(cpu_env, s->tmp2_i32); break; case MO_16: - gen_helper_check_iow(cpu_env, cpu_tmp2_i32); + gen_helper_check_iow(cpu_env, s->tmp2_i32); break; case MO_32: - gen_helper_check_iol(cpu_env, cpu_tmp2_i32); + gen_helper_check_iol(cpu_env, s->tmp2_i32); break; default: tcg_abort(); } } - if(s->flags & HF_SVMI_MASK) { + if(s->flags & HF_GUEST_MASK) { gen_update_cc_op(s); - gen_jmp_im(cur_eip); + gen_jmp_im(s, cur_eip); svm_flags |= (1 << (4 + ot)); next_eip = s->pc - s->cs_base; - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); - gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32, + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + gen_helper_svm_check_io(cpu_env, s->tmp2_i32, tcg_const_i32(svm_flags), tcg_const_i32(next_eip - cur_eip)); } @@ -636,42 +647,42 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip, static inline void gen_movs(DisasContext *s, TCGMemOp ot) { gen_string_movl_A0_ESI(s); - gen_op_ld_v(s, ot, cpu_T0, cpu_A0); + gen_op_ld_v(s, ot, s->T0, s->A0); gen_string_movl_A0_EDI(s); - gen_op_st_v(s, ot, cpu_T0, cpu_A0); - gen_op_movl_T0_Dshift(ot); - gen_op_add_reg_T0(s->aflag, R_ESI); - gen_op_add_reg_T0(s->aflag, R_EDI); + gen_op_st_v(s, ot, s->T0, s->A0); + gen_op_movl_T0_Dshift(s, ot); + gen_op_add_reg_T0(s, s->aflag, R_ESI); + gen_op_add_reg_T0(s, s->aflag, R_EDI); } -static void gen_op_update1_cc(void) +static void gen_op_update1_cc(DisasContext *s) { - tcg_gen_mov_tl(cpu_cc_dst, cpu_T0); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); } -static void gen_op_update2_cc(void) +static void gen_op_update2_cc(DisasContext *s) { - tcg_gen_mov_tl(cpu_cc_src, cpu_T1); - tcg_gen_mov_tl(cpu_cc_dst, cpu_T0); + tcg_gen_mov_tl(cpu_cc_src, s->T1); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); } -static void gen_op_update3_cc(TCGv reg) +static void gen_op_update3_cc(DisasContext *s, TCGv reg) { tcg_gen_mov_tl(cpu_cc_src2, reg); - tcg_gen_mov_tl(cpu_cc_src, cpu_T1); - tcg_gen_mov_tl(cpu_cc_dst, cpu_T0); + tcg_gen_mov_tl(cpu_cc_src, s->T1); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); } -static inline void gen_op_testl_T0_T1_cc(void) +static inline void gen_op_testl_T0_T1_cc(DisasContext *s) { - tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1); + tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1); } -static void gen_op_update_neg_cc(void) +static void gen_op_update_neg_cc(DisasContext *s) { - tcg_gen_mov_tl(cpu_cc_dst, cpu_T0); - tcg_gen_neg_tl(cpu_cc_src, cpu_T0); - tcg_gen_movi_tl(cpu_cc_srcT, 0); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); + tcg_gen_neg_tl(cpu_cc_src, s->T0); + tcg_gen_movi_tl(s->cc_srcT, 0); } /* compute all eflags to cc_src */ @@ -739,17 +750,17 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg) case CC_OP_SUBB ... CC_OP_SUBQ: /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */ size = s->cc_op - CC_OP_SUBB; - t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); + t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false); /* If no temporary was used, be careful not to alias t1 and t0. */ - t0 = t1 == cpu_cc_src ? cpu_tmp0 : reg; - tcg_gen_mov_tl(t0, cpu_cc_srcT); + t0 = t1 == cpu_cc_src ? s->tmp0 : reg; + tcg_gen_mov_tl(t0, s->cc_srcT); gen_extu(size, t0); goto add_sub; case CC_OP_ADDB ... CC_OP_ADDQ: /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */ size = s->cc_op - CC_OP_ADDB; - t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); + t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false); t0 = gen_ext_tl(reg, cpu_cc_dst, size, false); add_sub: return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0, @@ -899,10 +910,10 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) size = s->cc_op - CC_OP_SUBB; switch (jcc_op) { case JCC_BE: - tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT); - gen_extu(size, cpu_tmp4); - t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); - cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4, + tcg_gen_mov_tl(s->tmp4, s->cc_srcT); + gen_extu(size, s->tmp4); + t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false); + cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4, .reg2 = t0, .mask = -1, .use_reg2 = true }; break; @@ -912,10 +923,10 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) case JCC_LE: cond = TCG_COND_LE; fast_jcc_l: - tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT); - gen_exts(size, cpu_tmp4); - t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true); - cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4, + tcg_gen_mov_tl(s->tmp4, s->cc_srcT); + gen_exts(size, s->tmp4); + t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true); + cc = (CCPrepare) { .cond = cond, .reg = s->tmp4, .reg2 = t0, .mask = -1, .use_reg2 = true }; break; @@ -951,7 +962,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) case JCC_L: gen_compute_eflags(s); if (reg == cpu_cc_src) { - reg = cpu_tmp0; + reg = s->tmp0; } tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */ tcg_gen_xor_tl(reg, reg, cpu_cc_src); @@ -962,7 +973,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) case JCC_LE: gen_compute_eflags(s); if (reg == cpu_cc_src) { - reg = cpu_tmp0; + reg = s->tmp0; } tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */ tcg_gen_xor_tl(reg, reg, cpu_cc_src); @@ -1018,11 +1029,11 @@ static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg) value 'b'. In the fast case, T0 is guaranted not to be used. */ static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1) { - CCPrepare cc = gen_prepare_cc(s, b, cpu_T0); + CCPrepare cc = gen_prepare_cc(s, b, s->T0); if (cc.mask != -1) { - tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask); - cc.reg = cpu_T0; + tcg_gen_andi_tl(s->T0, cc.reg, cc.mask); + cc.reg = s->T0; } if (cc.use_reg2) { tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1); @@ -1036,12 +1047,12 @@ static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1) A translation block must end soon. */ static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1) { - CCPrepare cc = gen_prepare_cc(s, b, cpu_T0); + CCPrepare cc = gen_prepare_cc(s, b, s->T0); gen_update_cc_op(s); if (cc.mask != -1) { - tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask); - cc.reg = cpu_T0; + tcg_gen_andi_tl(s->T0, cc.reg, cc.mask); + cc.reg = s->T0; } set_cc_op(s, CC_OP_DYNAMIC); if (cc.use_reg2) { @@ -1057,7 +1068,7 @@ static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip) { TCGLabel *l1 = gen_new_label(); TCGLabel *l2 = gen_new_label(); - gen_op_jnz_ecx(s->aflag, l1); + gen_op_jnz_ecx(s, s->aflag, l1); gen_set_label(l2); gen_jmp_tb(s, next_eip, 1); gen_set_label(l1); @@ -1066,40 +1077,40 @@ static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip) static inline void gen_stos(DisasContext *s, TCGMemOp ot) { - gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX); + gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX); gen_string_movl_A0_EDI(s); - gen_op_st_v(s, ot, cpu_T0, cpu_A0); - gen_op_movl_T0_Dshift(ot); - gen_op_add_reg_T0(s->aflag, R_EDI); + gen_op_st_v(s, ot, s->T0, s->A0); + gen_op_movl_T0_Dshift(s, ot); + gen_op_add_reg_T0(s, s->aflag, R_EDI); } static inline void gen_lods(DisasContext *s, TCGMemOp ot) { gen_string_movl_A0_ESI(s); - gen_op_ld_v(s, ot, cpu_T0, cpu_A0); - gen_op_mov_reg_v(ot, R_EAX, cpu_T0); - gen_op_movl_T0_Dshift(ot); - gen_op_add_reg_T0(s->aflag, R_ESI); + gen_op_ld_v(s, ot, s->T0, s->A0); + gen_op_mov_reg_v(s, ot, R_EAX, s->T0); + gen_op_movl_T0_Dshift(s, ot); + gen_op_add_reg_T0(s, s->aflag, R_ESI); } static inline void gen_scas(DisasContext *s, TCGMemOp ot) { gen_string_movl_A0_EDI(s); - gen_op_ld_v(s, ot, cpu_T1, cpu_A0); + gen_op_ld_v(s, ot, s->T1, s->A0); gen_op(s, OP_CMPL, ot, R_EAX); - gen_op_movl_T0_Dshift(ot); - gen_op_add_reg_T0(s->aflag, R_EDI); + gen_op_movl_T0_Dshift(s, ot); + gen_op_add_reg_T0(s, s->aflag, R_EDI); } static inline void gen_cmps(DisasContext *s, TCGMemOp ot) { gen_string_movl_A0_EDI(s); - gen_op_ld_v(s, ot, cpu_T1, cpu_A0); + gen_op_ld_v(s, ot, s->T1, s->A0); gen_string_movl_A0_ESI(s); gen_op(s, OP_CMPL, ot, OR_TMP0); - gen_op_movl_T0_Dshift(ot); - gen_op_add_reg_T0(s->aflag, R_ESI); - gen_op_add_reg_T0(s->aflag, R_EDI); + gen_op_movl_T0_Dshift(s, ot); + gen_op_add_reg_T0(s, s->aflag, R_ESI); + gen_op_add_reg_T0(s, s->aflag, R_EDI); } static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot) @@ -1123,15 +1134,15 @@ static inline void gen_ins(DisasContext *s, TCGMemOp ot) gen_string_movl_A0_EDI(s); /* Note: we must do this dummy write first to be restartable in case of page fault. */ - tcg_gen_movi_tl(cpu_T0, 0); - gen_op_st_v(s, ot, cpu_T0, cpu_A0); - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]); - tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff); - gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32); - gen_op_st_v(s, ot, cpu_T0, cpu_A0); - gen_op_movl_T0_Dshift(ot); - gen_op_add_reg_T0(s->aflag, R_EDI); - gen_bpt_io(s, cpu_tmp2_i32, ot); + tcg_gen_movi_tl(s->T0, 0); + gen_op_st_v(s, ot, s->T0, s->A0); + tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]); + tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff); + gen_helper_in_func(ot, s->T0, s->tmp2_i32); + gen_op_st_v(s, ot, s->T0, s->A0); + gen_op_movl_T0_Dshift(s, ot); + gen_op_add_reg_T0(s, s->aflag, R_EDI); + gen_bpt_io(s, s->tmp2_i32, ot); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_end(); } @@ -1143,15 +1154,15 @@ static inline void gen_outs(DisasContext *s, TCGMemOp ot) gen_io_start(); } gen_string_movl_A0_ESI(s); - gen_op_ld_v(s, ot, cpu_T0, cpu_A0); - - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]); - tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff); - tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T0); - gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32); - gen_op_movl_T0_Dshift(ot); - gen_op_add_reg_T0(s->aflag, R_ESI); - gen_bpt_io(s, cpu_tmp2_i32, ot); + gen_op_ld_v(s, ot, s->T0, s->A0); + + tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]); + tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff); + tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0); + gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32); + gen_op_movl_T0_Dshift(s, ot); + gen_op_add_reg_T0(s, s->aflag, R_ESI); + gen_bpt_io(s, s->tmp2_i32, ot); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_end(); } @@ -1167,11 +1178,11 @@ static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot, \ gen_update_cc_op(s); \ l2 = gen_jz_ecx_string(s, next_eip); \ gen_ ## op(s, ot); \ - gen_op_add_reg_im(s->aflag, R_ECX, -1); \ + gen_op_add_reg_im(s, s->aflag, R_ECX, -1); \ /* a loop would cause two single step exceptions if ECX = 1 \ before rep string_insn */ \ if (s->repz_opt) \ - gen_op_jz_ecx(s->aflag, l2); \ + gen_op_jz_ecx(s, s->aflag, l2); \ gen_jmp(s, cur_eip); \ } @@ -1185,11 +1196,11 @@ static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot, \ gen_update_cc_op(s); \ l2 = gen_jz_ecx_string(s, next_eip); \ gen_ ## op(s, ot); \ - gen_op_add_reg_im(s->aflag, R_ECX, -1); \ + gen_op_add_reg_im(s, s->aflag, R_ECX, -1); \ gen_update_cc_op(s); \ gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2); \ if (s->repz_opt) \ - gen_op_jz_ecx(s->aflag, l2); \ + gen_op_jz_ecx(s, s->aflag, l2); \ gen_jmp(s, cur_eip); \ } @@ -1261,103 +1272,103 @@ static void gen_helper_fp_arith_STN_ST0(int op, int opreg) static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d) { if (d != OR_TMP0) { - gen_op_mov_v_reg(ot, cpu_T0, d); + gen_op_mov_v_reg(s1, ot, s1->T0, d); } else if (!(s1->prefix & PREFIX_LOCK)) { - gen_op_ld_v(s1, ot, cpu_T0, cpu_A0); + gen_op_ld_v(s1, ot, s1->T0, s1->A0); } switch(op) { case OP_ADCL: - gen_compute_eflags_c(s1, cpu_tmp4); + gen_compute_eflags_c(s1, s1->tmp4); if (s1->prefix & PREFIX_LOCK) { - tcg_gen_add_tl(cpu_T0, cpu_tmp4, cpu_T1); - tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0, + tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1); + tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0, s1->mem_index, ot | MO_LE); } else { - tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1); - tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4); + tcg_gen_add_tl(s1->T0, s1->T0, s1->T1); + tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4); gen_op_st_rm_T0_A0(s1, ot, d); } - gen_op_update3_cc(cpu_tmp4); + gen_op_update3_cc(s1, s1->tmp4); set_cc_op(s1, CC_OP_ADCB + ot); break; case OP_SBBL: - gen_compute_eflags_c(s1, cpu_tmp4); + gen_compute_eflags_c(s1, s1->tmp4); if (s1->prefix & PREFIX_LOCK) { - tcg_gen_add_tl(cpu_T0, cpu_T1, cpu_tmp4); - tcg_gen_neg_tl(cpu_T0, cpu_T0); - tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0, + tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4); + tcg_gen_neg_tl(s1->T0, s1->T0); + tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0, s1->mem_index, ot | MO_LE); } else { - tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1); - tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4); + tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1); + tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4); gen_op_st_rm_T0_A0(s1, ot, d); } - gen_op_update3_cc(cpu_tmp4); + gen_op_update3_cc(s1, s1->tmp4); set_cc_op(s1, CC_OP_SBBB + ot); break; case OP_ADDL: if (s1->prefix & PREFIX_LOCK) { - tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T1, + tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1, s1->mem_index, ot | MO_LE); } else { - tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1); + tcg_gen_add_tl(s1->T0, s1->T0, s1->T1); gen_op_st_rm_T0_A0(s1, ot, d); } - gen_op_update2_cc(); + gen_op_update2_cc(s1); set_cc_op(s1, CC_OP_ADDB + ot); break; case OP_SUBL: if (s1->prefix & PREFIX_LOCK) { - tcg_gen_neg_tl(cpu_T0, cpu_T1); - tcg_gen_atomic_fetch_add_tl(cpu_cc_srcT, cpu_A0, cpu_T0, + tcg_gen_neg_tl(s1->T0, s1->T1); + tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0, s1->mem_index, ot | MO_LE); - tcg_gen_sub_tl(cpu_T0, cpu_cc_srcT, cpu_T1); + tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1); } else { - tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0); - tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1); + tcg_gen_mov_tl(s1->cc_srcT, s1->T0); + tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1); gen_op_st_rm_T0_A0(s1, ot, d); } - gen_op_update2_cc(); + gen_op_update2_cc(s1); set_cc_op(s1, CC_OP_SUBB + ot); break; default: case OP_ANDL: if (s1->prefix & PREFIX_LOCK) { - tcg_gen_atomic_and_fetch_tl(cpu_T0, cpu_A0, cpu_T1, + tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1, s1->mem_index, ot | MO_LE); } else { - tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1); + tcg_gen_and_tl(s1->T0, s1->T0, s1->T1); gen_op_st_rm_T0_A0(s1, ot, d); } - gen_op_update1_cc(); + gen_op_update1_cc(s1); set_cc_op(s1, CC_OP_LOGICB + ot); break; case OP_ORL: if (s1->prefix & PREFIX_LOCK) { - tcg_gen_atomic_or_fetch_tl(cpu_T0, cpu_A0, cpu_T1, + tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1, s1->mem_index, ot | MO_LE); } else { - tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1); + tcg_gen_or_tl(s1->T0, s1->T0, s1->T1); gen_op_st_rm_T0_A0(s1, ot, d); } - gen_op_update1_cc(); + gen_op_update1_cc(s1); set_cc_op(s1, CC_OP_LOGICB + ot); break; case OP_XORL: if (s1->prefix & PREFIX_LOCK) { - tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T1, + tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1, s1->mem_index, ot | MO_LE); } else { - tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1); + tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1); gen_op_st_rm_T0_A0(s1, ot, d); } - gen_op_update1_cc(); + gen_op_update1_cc(s1); set_cc_op(s1, CC_OP_LOGICB + ot); break; case OP_CMPL: - tcg_gen_mov_tl(cpu_cc_src, cpu_T1); - tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0); - tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1); + tcg_gen_mov_tl(cpu_cc_src, s1->T1); + tcg_gen_mov_tl(s1->cc_srcT, s1->T0); + tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1); set_cc_op(s1, CC_OP_SUBB + ot); break; } @@ -1367,21 +1378,21 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d) static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c) { if (s1->prefix & PREFIX_LOCK) { - tcg_gen_movi_tl(cpu_T0, c > 0 ? 1 : -1); - tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0, + tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1); + tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0, s1->mem_index, ot | MO_LE); } else { if (d != OR_TMP0) { - gen_op_mov_v_reg(ot, cpu_T0, d); + gen_op_mov_v_reg(s1, ot, s1->T0, d); } else { - gen_op_ld_v(s1, ot, cpu_T0, cpu_A0); + gen_op_ld_v(s1, ot, s1->T0, s1->A0); } - tcg_gen_addi_tl(cpu_T0, cpu_T0, (c > 0 ? 1 : -1)); + tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1)); gen_op_st_rm_T0_A0(s1, ot, d); } gen_compute_eflags_c(s1, cpu_cc_src); - tcg_gen_mov_tl(cpu_cc_dst, cpu_T0); + tcg_gen_mov_tl(cpu_cc_dst, s1->T0); set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot); } @@ -1410,19 +1421,19 @@ static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result, tcg_temp_free(z_tl); /* Get the two potential CC_OP values into temporaries. */ - tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot); + tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot); if (s->cc_op == CC_OP_DYNAMIC) { oldop = cpu_cc_op; } else { - tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op); - oldop = cpu_tmp3_i32; + tcg_gen_movi_i32(s->tmp3_i32, s->cc_op); + oldop = s->tmp3_i32; } /* Conditionally store the CC_OP value. */ z32 = tcg_const_i32(0); s32 = tcg_temp_new_i32(); tcg_gen_trunc_tl_i32(s32, count); - tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop); + tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop); tcg_temp_free_i32(z32); tcg_temp_free_i32(s32); @@ -1437,33 +1448,33 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1, /* load */ if (op1 == OR_TMP0) { - gen_op_ld_v(s, ot, cpu_T0, cpu_A0); + gen_op_ld_v(s, ot, s->T0, s->A0); } else { - gen_op_mov_v_reg(ot, cpu_T0, op1); + gen_op_mov_v_reg(s, ot, s->T0, op1); } - tcg_gen_andi_tl(cpu_T1, cpu_T1, mask); - tcg_gen_subi_tl(cpu_tmp0, cpu_T1, 1); + tcg_gen_andi_tl(s->T1, s->T1, mask); + tcg_gen_subi_tl(s->tmp0, s->T1, 1); if (is_right) { if (is_arith) { - gen_exts(ot, cpu_T0); - tcg_gen_sar_tl(cpu_tmp0, cpu_T0, cpu_tmp0); - tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1); + gen_exts(ot, s->T0); + tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0); + tcg_gen_sar_tl(s->T0, s->T0, s->T1); } else { - gen_extu(ot, cpu_T0); - tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0); - tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1); + gen_extu(ot, s->T0); + tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0); + tcg_gen_shr_tl(s->T0, s->T0, s->T1); } } else { - tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0); - tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1); + tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0); + tcg_gen_shl_tl(s->T0, s->T0, s->T1); } /* store */ gen_op_st_rm_T0_A0(s, ot, op1); - gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, cpu_T1, is_right); + gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right); } static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2, @@ -1473,25 +1484,25 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2, /* load */ if (op1 == OR_TMP0) - gen_op_ld_v(s, ot, cpu_T0, cpu_A0); + gen_op_ld_v(s, ot, s->T0, s->A0); else - gen_op_mov_v_reg(ot, cpu_T0, op1); + gen_op_mov_v_reg(s, ot, s->T0, op1); op2 &= mask; if (op2 != 0) { if (is_right) { if (is_arith) { - gen_exts(ot, cpu_T0); - tcg_gen_sari_tl(cpu_tmp4, cpu_T0, op2 - 1); - tcg_gen_sari_tl(cpu_T0, cpu_T0, op2); + gen_exts(ot, s->T0); + tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1); + tcg_gen_sari_tl(s->T0, s->T0, op2); } else { - gen_extu(ot, cpu_T0); - tcg_gen_shri_tl(cpu_tmp4, cpu_T0, op2 - 1); - tcg_gen_shri_tl(cpu_T0, cpu_T0, op2); + gen_extu(ot, s->T0); + tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1); + tcg_gen_shri_tl(s->T0, s->T0, op2); } } else { - tcg_gen_shli_tl(cpu_tmp4, cpu_T0, op2 - 1); - tcg_gen_shli_tl(cpu_T0, cpu_T0, op2); + tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1); + tcg_gen_shli_tl(s->T0, s->T0, op2); } } @@ -1500,8 +1511,8 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2, /* update eflags if non zero shift */ if (op2 != 0) { - tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4); - tcg_gen_mov_tl(cpu_cc_dst, cpu_T0); + tcg_gen_mov_tl(cpu_cc_src, s->tmp4); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot); } } @@ -1513,41 +1524,41 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right) /* load */ if (op1 == OR_TMP0) { - gen_op_ld_v(s, ot, cpu_T0, cpu_A0); + gen_op_ld_v(s, ot, s->T0, s->A0); } else { - gen_op_mov_v_reg(ot, cpu_T0, op1); + gen_op_mov_v_reg(s, ot, s->T0, op1); } - tcg_gen_andi_tl(cpu_T1, cpu_T1, mask); + tcg_gen_andi_tl(s->T1, s->T1, mask); switch (ot) { case MO_8: /* Replicate the 8-bit input so that a 32-bit rotate works. */ - tcg_gen_ext8u_tl(cpu_T0, cpu_T0); - tcg_gen_muli_tl(cpu_T0, cpu_T0, 0x01010101); + tcg_gen_ext8u_tl(s->T0, s->T0); + tcg_gen_muli_tl(s->T0, s->T0, 0x01010101); goto do_long; case MO_16: /* Replicate the 16-bit input so that a 32-bit rotate works. */ - tcg_gen_deposit_tl(cpu_T0, cpu_T0, cpu_T0, 16, 16); + tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16); goto do_long; do_long: #ifdef TARGET_X86_64 case MO_32: - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); - tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1); + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); if (is_right) { - tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32); + tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32); } else { - tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32); + tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32); } - tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32); + tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); break; #endif default: if (is_right) { - tcg_gen_rotr_tl(cpu_T0, cpu_T0, cpu_T1); + tcg_gen_rotr_tl(s->T0, s->T0, s->T1); } else { - tcg_gen_rotl_tl(cpu_T0, cpu_T0, cpu_T1); + tcg_gen_rotl_tl(s->T0, s->T0, s->T1); } break; } @@ -1563,12 +1574,12 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right) since we've computed the flags into CC_SRC, these variables are currently dead. */ if (is_right) { - tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1); - tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask); + tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1); + tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask); tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1); } else { - tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask); - tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1); + tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask); + tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1); } tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1); tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst); @@ -1579,11 +1590,11 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right) exactly as we computed above. */ t0 = tcg_const_i32(0); t1 = tcg_temp_new_i32(); - tcg_gen_trunc_tl_i32(t1, cpu_T1); - tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX); - tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS); + tcg_gen_trunc_tl_i32(t1, s->T1); + tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX); + tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS); tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0, - cpu_tmp2_i32, cpu_tmp3_i32); + s->tmp2_i32, s->tmp3_i32); tcg_temp_free_i32(t0); tcg_temp_free_i32(t1); @@ -1599,9 +1610,9 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2, /* load */ if (op1 == OR_TMP0) { - gen_op_ld_v(s, ot, cpu_T0, cpu_A0); + gen_op_ld_v(s, ot, s->T0, s->A0); } else { - gen_op_mov_v_reg(ot, cpu_T0, op1); + gen_op_mov_v_reg(s, ot, s->T0, op1); } op2 &= mask; @@ -1609,20 +1620,20 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2, switch (ot) { #ifdef TARGET_X86_64 case MO_32: - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); if (is_right) { - tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2); + tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2); } else { - tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2); + tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2); } - tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32); + tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); break; #endif default: if (is_right) { - tcg_gen_rotri_tl(cpu_T0, cpu_T0, op2); + tcg_gen_rotri_tl(s->T0, s->T0, op2); } else { - tcg_gen_rotli_tl(cpu_T0, cpu_T0, op2); + tcg_gen_rotli_tl(s->T0, s->T0, op2); } break; case MO_8: @@ -1635,10 +1646,10 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2, if (is_right) { shift = mask + 1 - shift; } - gen_extu(ot, cpu_T0); - tcg_gen_shli_tl(cpu_tmp0, cpu_T0, shift); - tcg_gen_shri_tl(cpu_T0, cpu_T0, mask + 1 - shift); - tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0); + gen_extu(ot, s->T0); + tcg_gen_shli_tl(s->tmp0, s->T0, shift); + tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift); + tcg_gen_or_tl(s->T0, s->T0, s->tmp0); break; } } @@ -1655,12 +1666,12 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2, since we've computed the flags into CC_SRC, these variables are currently dead. */ if (is_right) { - tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1); - tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask); + tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1); + tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask); tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1); } else { - tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask); - tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1); + tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask); + tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1); } tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1); tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst); @@ -1677,24 +1688,24 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1, /* load */ if (op1 == OR_TMP0) - gen_op_ld_v(s, ot, cpu_T0, cpu_A0); + gen_op_ld_v(s, ot, s->T0, s->A0); else - gen_op_mov_v_reg(ot, cpu_T0, op1); + gen_op_mov_v_reg(s, ot, s->T0, op1); if (is_right) { switch (ot) { case MO_8: - gen_helper_rcrb(cpu_T0, cpu_env, cpu_T0, cpu_T1); + gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1); break; case MO_16: - gen_helper_rcrw(cpu_T0, cpu_env, cpu_T0, cpu_T1); + gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1); break; case MO_32: - gen_helper_rcrl(cpu_T0, cpu_env, cpu_T0, cpu_T1); + gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1); break; #ifdef TARGET_X86_64 case MO_64: - gen_helper_rcrq(cpu_T0, cpu_env, cpu_T0, cpu_T1); + gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1); break; #endif default: @@ -1703,17 +1714,17 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1, } else { switch (ot) { case MO_8: - gen_helper_rclb(cpu_T0, cpu_env, cpu_T0, cpu_T1); + gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1); break; case MO_16: - gen_helper_rclw(cpu_T0, cpu_env, cpu_T0, cpu_T1); + gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1); break; case MO_32: - gen_helper_rcll(cpu_T0, cpu_env, cpu_T0, cpu_T1); + gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1); break; #ifdef TARGET_X86_64 case MO_64: - gen_helper_rclq(cpu_T0, cpu_env, cpu_T0, cpu_T1); + gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1); break; #endif default: @@ -1733,9 +1744,9 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1, /* load */ if (op1 == OR_TMP0) { - gen_op_ld_v(s, ot, cpu_T0, cpu_A0); + gen_op_ld_v(s, ot, s->T0, s->A0); } else { - gen_op_mov_v_reg(ot, cpu_T0, op1); + gen_op_mov_v_reg(s, ot, s->T0, op1); } count = tcg_temp_new(); @@ -1747,69 +1758,69 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1, This means "shrdw C, B, A" shifts A:B:A >> C. Build the B:A portion by constructing it as a 32-bit value. */ if (is_right) { - tcg_gen_deposit_tl(cpu_tmp0, cpu_T0, cpu_T1, 16, 16); - tcg_gen_mov_tl(cpu_T1, cpu_T0); - tcg_gen_mov_tl(cpu_T0, cpu_tmp0); + tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16); + tcg_gen_mov_tl(s->T1, s->T0); + tcg_gen_mov_tl(s->T0, s->tmp0); } else { - tcg_gen_deposit_tl(cpu_T1, cpu_T0, cpu_T1, 16, 16); + tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16); } /* FALLTHRU */ #ifdef TARGET_X86_64 case MO_32: /* Concatenate the two 32-bit values and use a 64-bit shift. */ - tcg_gen_subi_tl(cpu_tmp0, count, 1); + tcg_gen_subi_tl(s->tmp0, count, 1); if (is_right) { - tcg_gen_concat_tl_i64(cpu_T0, cpu_T0, cpu_T1); - tcg_gen_shr_i64(cpu_tmp0, cpu_T0, cpu_tmp0); - tcg_gen_shr_i64(cpu_T0, cpu_T0, count); + tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1); + tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0); + tcg_gen_shr_i64(s->T0, s->T0, count); } else { - tcg_gen_concat_tl_i64(cpu_T0, cpu_T1, cpu_T0); - tcg_gen_shl_i64(cpu_tmp0, cpu_T0, cpu_tmp0); - tcg_gen_shl_i64(cpu_T0, cpu_T0, count); - tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32); - tcg_gen_shri_i64(cpu_T0, cpu_T0, 32); + tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0); + tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0); + tcg_gen_shl_i64(s->T0, s->T0, count); + tcg_gen_shri_i64(s->tmp0, s->tmp0, 32); + tcg_gen_shri_i64(s->T0, s->T0, 32); } break; #endif default: - tcg_gen_subi_tl(cpu_tmp0, count, 1); + tcg_gen_subi_tl(s->tmp0, count, 1); if (is_right) { - tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0); + tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0); - tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count); - tcg_gen_shr_tl(cpu_T0, cpu_T0, count); - tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4); + tcg_gen_subfi_tl(s->tmp4, mask + 1, count); + tcg_gen_shr_tl(s->T0, s->T0, count); + tcg_gen_shl_tl(s->T1, s->T1, s->tmp4); } else { - tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0); + tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0); if (ot == MO_16) { /* Only needed if count > 16, for Intel behaviour. */ - tcg_gen_subfi_tl(cpu_tmp4, 33, count); - tcg_gen_shr_tl(cpu_tmp4, cpu_T1, cpu_tmp4); - tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4); + tcg_gen_subfi_tl(s->tmp4, 33, count); + tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4); + tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4); } - tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count); - tcg_gen_shl_tl(cpu_T0, cpu_T0, count); - tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4); + tcg_gen_subfi_tl(s->tmp4, mask + 1, count); + tcg_gen_shl_tl(s->T0, s->T0, count); + tcg_gen_shr_tl(s->T1, s->T1, s->tmp4); } - tcg_gen_movi_tl(cpu_tmp4, 0); - tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4, - cpu_tmp4, cpu_T1); - tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1); + tcg_gen_movi_tl(s->tmp4, 0); + tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4, + s->tmp4, s->T1); + tcg_gen_or_tl(s->T0, s->T0, s->T1); break; } /* store */ gen_op_st_rm_T0_A0(s, ot, op1); - gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, count, is_right); + gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right); tcg_temp_free(count); } static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s) { if (s != OR_TMP1) - gen_op_mov_v_reg(ot, cpu_T1, s); + gen_op_mov_v_reg(s1, ot, s1->T1, s); switch(op) { case OP_ROL: gen_rot_rm_T1(s1, ot, d, 0); @@ -1857,7 +1868,7 @@ static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c) break; default: /* currently not optimized */ - tcg_gen_movi_tl(cpu_T1, c); + tcg_gen_movi_tl(s1->T1, c); gen_shift(s1, op, ot, d, OR_TMP1); break; } @@ -2048,7 +2059,7 @@ static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s, } /* Compute the address, with a minimum number of TCG ops. */ -static TCGv gen_lea_modrm_1(AddressParts a) +static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a) { TCGv ea = NULL; @@ -2056,22 +2067,22 @@ static TCGv gen_lea_modrm_1(AddressParts a) if (a.scale == 0) { ea = cpu_regs[a.index]; } else { - tcg_gen_shli_tl(cpu_A0, cpu_regs[a.index], a.scale); - ea = cpu_A0; + tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale); + ea = s->A0; } if (a.base >= 0) { - tcg_gen_add_tl(cpu_A0, ea, cpu_regs[a.base]); - ea = cpu_A0; + tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]); + ea = s->A0; } } else if (a.base >= 0) { ea = cpu_regs[a.base]; } if (!ea) { - tcg_gen_movi_tl(cpu_A0, a.disp); - ea = cpu_A0; + tcg_gen_movi_tl(s->A0, a.disp); + ea = s->A0; } else if (a.disp != 0) { - tcg_gen_addi_tl(cpu_A0, ea, a.disp); - ea = cpu_A0; + tcg_gen_addi_tl(s->A0, ea, a.disp); + ea = s->A0; } return ea; @@ -2080,7 +2091,7 @@ static TCGv gen_lea_modrm_1(AddressParts a) static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm) { AddressParts a = gen_lea_modrm_0(env, s, modrm); - TCGv ea = gen_lea_modrm_1(a); + TCGv ea = gen_lea_modrm_1(s, a); gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override); } @@ -2093,21 +2104,21 @@ static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm) static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm, TCGCond cond, TCGv_i64 bndv) { - TCGv ea = gen_lea_modrm_1(gen_lea_modrm_0(env, s, modrm)); + TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm)); - tcg_gen_extu_tl_i64(cpu_tmp1_i64, ea); + tcg_gen_extu_tl_i64(s->tmp1_i64, ea); if (!CODE64(s)) { - tcg_gen_ext32u_i64(cpu_tmp1_i64, cpu_tmp1_i64); + tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64); } - tcg_gen_setcond_i64(cond, cpu_tmp1_i64, cpu_tmp1_i64, bndv); - tcg_gen_extrl_i64_i32(cpu_tmp2_i32, cpu_tmp1_i64); - gen_helper_bndck(cpu_env, cpu_tmp2_i32); + tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv); + tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64); + gen_helper_bndck(cpu_env, s->tmp2_i32); } /* used for LEA and MOV AX, mem */ static void gen_add_A0_ds_seg(DisasContext *s) { - gen_lea_v_seg(s, s->aflag, cpu_A0, R_DS, s->override); + gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override); } /* generate modrm memory load or store of 'reg'. TMP0 is used if reg == @@ -2122,23 +2133,23 @@ static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm, if (mod == 3) { if (is_store) { if (reg != OR_TMP0) - gen_op_mov_v_reg(ot, cpu_T0, reg); - gen_op_mov_reg_v(ot, rm, cpu_T0); + gen_op_mov_v_reg(s, ot, s->T0, reg); + gen_op_mov_reg_v(s, ot, rm, s->T0); } else { - gen_op_mov_v_reg(ot, cpu_T0, rm); + gen_op_mov_v_reg(s, ot, s->T0, rm); if (reg != OR_TMP0) - gen_op_mov_reg_v(ot, reg, cpu_T0); + gen_op_mov_reg_v(s, ot, reg, s->T0); } } else { gen_lea_modrm(env, s, modrm); if (is_store) { if (reg != OR_TMP0) - gen_op_mov_v_reg(ot, cpu_T0, reg); - gen_op_st_v(s, ot, cpu_T0, cpu_A0); + gen_op_mov_v_reg(s, ot, s->T0, reg); + gen_op_st_v(s, ot, s->T0, s->A0); } else { - gen_op_ld_v(s, ot, cpu_T0, cpu_A0); + gen_op_ld_v(s, ot, s->T0, s->A0); if (reg != OR_TMP0) - gen_op_mov_reg_v(ot, reg, cpu_T0); + gen_op_mov_reg_v(s, ot, reg, s->T0); } } } @@ -2192,13 +2203,13 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip) if (use_goto_tb(s, pc)) { /* jump to same page: we can use a direct jump */ tcg_gen_goto_tb(tb_num); - gen_jmp_im(eip); + gen_jmp_im(s, eip); tcg_gen_exit_tb(s->base.tb, tb_num); s->base.is_jmp = DISAS_NORETURN; } else { /* jump to another page */ - gen_jmp_im(eip); - gen_jr(s, cpu_tmp0); + gen_jmp_im(s, eip); + gen_jr(s, s->tmp0); } } @@ -2220,11 +2231,11 @@ static inline void gen_jcc(DisasContext *s, int b, l2 = gen_new_label(); gen_jcc1(s, b, l1); - gen_jmp_im(next_eip); + gen_jmp_im(s, next_eip); tcg_gen_br(l2); gen_set_label(l1); - gen_jmp_im(val); + gen_jmp_im(s, val); gen_set_label(l2); gen_eob(s); } @@ -2237,7 +2248,7 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b, gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - cc = gen_prepare_cc(s, b, cpu_T1); + cc = gen_prepare_cc(s, b, s->T1); if (cc.mask != -1) { TCGv t0 = tcg_temp_new(); tcg_gen_andi_tl(t0, cc.reg, cc.mask); @@ -2247,9 +2258,9 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b, cc.reg2 = tcg_const_tl(cc.imm); } - tcg_gen_movcond_tl(cc.cond, cpu_T0, cc.reg, cc.reg2, - cpu_T0, cpu_regs[reg]); - gen_op_mov_reg_v(ot, reg, cpu_T0); + tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2, + s->T0, cpu_regs[reg]); + gen_op_mov_reg_v(s, ot, reg, s->T0); if (cc.mask != -1) { tcg_temp_free(cc.reg); @@ -2259,18 +2270,18 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b, } } -static inline void gen_op_movl_T0_seg(int seg_reg) +static inline void gen_op_movl_T0_seg(DisasContext *s, int seg_reg) { - tcg_gen_ld32u_tl(cpu_T0, cpu_env, + tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,segs[seg_reg].selector)); } -static inline void gen_op_movl_seg_T0_vm(int seg_reg) +static inline void gen_op_movl_seg_T0_vm(DisasContext *s, int seg_reg) { - tcg_gen_ext16u_tl(cpu_T0, cpu_T0); - tcg_gen_st32_tl(cpu_T0, cpu_env, + tcg_gen_ext16u_tl(s->T0, s->T0); + tcg_gen_st32_tl(s->T0, cpu_env, offsetof(CPUX86State,segs[seg_reg].selector)); - tcg_gen_shli_tl(cpu_seg_base[seg_reg], cpu_T0, 4); + tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4); } /* move T0 to seg_reg and compute if the CPU state may change. Never @@ -2278,8 +2289,8 @@ static inline void gen_op_movl_seg_T0_vm(int seg_reg) static void gen_movl_seg_T0(DisasContext *s, int seg_reg) { if (s->pe && !s->vm86) { - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); - gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32); + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32); /* abort translation because the addseg value may change or because ss32 may change. For R_SS, translation must always stop as a special handling must be done to disable hardware @@ -2288,7 +2299,7 @@ static void gen_movl_seg_T0(DisasContext *s, int seg_reg) s->base.is_jmp = DISAS_TOO_MANY; } } else { - gen_op_movl_seg_T0_vm(seg_reg); + gen_op_movl_seg_T0_vm(s, seg_reg); if (seg_reg == R_SS) { s->base.is_jmp = DISAS_TOO_MANY; } @@ -2305,10 +2316,10 @@ gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start, uint32_t type, uint64_t param) { /* no SVM activated; fast case */ - if (likely(!(s->flags & HF_SVMI_MASK))) + if (likely(!(s->flags & HF_GUEST_MASK))) return; gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type), tcg_const_i64(param)); } @@ -2321,7 +2332,7 @@ gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type) static inline void gen_stack_update(DisasContext *s, int addend) { - gen_op_add_reg_im(mo_stacksize(s), R_ESP, addend); + gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend); } /* Generate a push. It depends on ss32, addseg and dflag. */ @@ -2330,20 +2341,20 @@ static void gen_push_v(DisasContext *s, TCGv val) TCGMemOp d_ot = mo_pushpop(s, s->dflag); TCGMemOp a_ot = mo_stacksize(s); int size = 1 << d_ot; - TCGv new_esp = cpu_A0; + TCGv new_esp = s->A0; - tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size); + tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size); if (!CODE64(s)) { if (s->addseg) { - new_esp = cpu_tmp4; - tcg_gen_mov_tl(new_esp, cpu_A0); + new_esp = s->tmp4; + tcg_gen_mov_tl(new_esp, s->A0); } - gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1); + gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1); } - gen_op_st_v(s, d_ot, val, cpu_A0); - gen_op_mov_reg_v(a_ot, R_ESP, new_esp); + gen_op_st_v(s, d_ot, val, s->A0); + gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp); } /* two step pop is necessary for precise exceptions */ @@ -2352,7 +2363,7 @@ static TCGMemOp gen_pop_T0(DisasContext *s) TCGMemOp d_ot = mo_pushpop(s, s->dflag); gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1); - gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0); + gen_op_ld_v(s, d_ot, s->T0, s->A0); return d_ot; } @@ -2375,9 +2386,9 @@ static void gen_pusha(DisasContext *s) int i; for (i = 0; i < 8; i++) { - tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], (i - 8) * size); - gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1); - gen_op_st_v(s, d_ot, cpu_regs[7 - i], cpu_A0); + tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size); + gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1); + gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0); } gen_stack_update(s, -8 * size); @@ -2395,10 +2406,10 @@ static void gen_popa(DisasContext *s) if (7 - i == R_ESP) { continue; } - tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], i * size); - gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1); - gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0); - gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0); + tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size); + gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1); + gen_op_ld_v(s, d_ot, s->T0, s->A0); + gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0); } gen_stack_update(s, 8 * size); @@ -2411,9 +2422,9 @@ static void gen_enter(DisasContext *s, int esp_addend, int level) int size = 1 << d_ot; /* Push BP; compute FrameTemp into T1. */ - tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size); - gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1); - gen_op_st_v(s, d_ot, cpu_regs[R_EBP], cpu_A0); + tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size); + gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1); + gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0); level &= 31; if (level != 0) { @@ -2421,27 +2432,27 @@ static void gen_enter(DisasContext *s, int esp_addend, int level) /* Copy level-1 pointers from the previous frame. */ for (i = 1; i < level; ++i) { - tcg_gen_subi_tl(cpu_A0, cpu_regs[R_EBP], size * i); - gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1); - gen_op_ld_v(s, d_ot, cpu_tmp0, cpu_A0); + tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i); + gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1); + gen_op_ld_v(s, d_ot, s->tmp0, s->A0); - tcg_gen_subi_tl(cpu_A0, cpu_T1, size * i); - gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1); - gen_op_st_v(s, d_ot, cpu_tmp0, cpu_A0); + tcg_gen_subi_tl(s->A0, s->T1, size * i); + gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1); + gen_op_st_v(s, d_ot, s->tmp0, s->A0); } /* Push the current FrameTemp as the last level. */ - tcg_gen_subi_tl(cpu_A0, cpu_T1, size * level); - gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1); - gen_op_st_v(s, d_ot, cpu_T1, cpu_A0); + tcg_gen_subi_tl(s->A0, s->T1, size * level); + gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1); + gen_op_st_v(s, d_ot, s->T1, s->A0); } /* Copy the FrameTemp value to EBP. */ - gen_op_mov_reg_v(a_ot, R_EBP, cpu_T1); + gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1); /* Compute the final value of ESP. */ - tcg_gen_subi_tl(cpu_T1, cpu_T1, esp_addend + size * level); - gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1); + tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level); + gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1); } static void gen_leave(DisasContext *s) @@ -2450,18 +2461,18 @@ static void gen_leave(DisasContext *s) TCGMemOp a_ot = mo_stacksize(s); gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1); - gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0); + gen_op_ld_v(s, d_ot, s->T0, s->A0); - tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot); + tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot); - gen_op_mov_reg_v(d_ot, R_EBP, cpu_T0); - gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1); + gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0); + gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1); } static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip) { gen_update_cc_op(s); - gen_jmp_im(cur_eip); + gen_jmp_im(s, cur_eip); gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno)); s->base.is_jmp = DISAS_NORETURN; } @@ -2498,7 +2509,7 @@ static void gen_interrupt(DisasContext *s, int intno, target_ulong cur_eip, target_ulong next_eip) { gen_update_cc_op(s); - gen_jmp_im(cur_eip); + gen_jmp_im(s, cur_eip); gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno), tcg_const_i32(next_eip - cur_eip)); s->base.is_jmp = DISAS_NORETURN; @@ -2507,7 +2518,7 @@ static void gen_interrupt(DisasContext *s, int intno, static void gen_debug(DisasContext *s, target_ulong cur_eip) { gen_update_cc_op(s); - gen_jmp_im(cur_eip); + gen_jmp_im(s, cur_eip); gen_helper_debug(cpu_env); s->base.is_jmp = DISAS_NORETURN; } @@ -2617,7 +2628,7 @@ static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num) if (s->jmp_opt) { gen_goto_tb(s, tb_num, eip); } else { - gen_jmp_im(eip); + gen_jmp_im(s, eip); gen_eob(s); } } @@ -2629,60 +2640,60 @@ static void gen_jmp(DisasContext *s, target_ulong eip) static inline void gen_ldq_env_A0(DisasContext *s, int offset) { - tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset); + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset); } static inline void gen_stq_env_A0(DisasContext *s, int offset) { - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset); - tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ); + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ); } static inline void gen_ldo_env_A0(DisasContext *s, int offset) { int mem_index = s->mem_index; - tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0))); - tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8); - tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1))); + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0))); + tcg_gen_addi_tl(s->tmp0, s->A0, 8); + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1))); } static inline void gen_sto_env_A0(DisasContext *s, int offset) { int mem_index = s->mem_index; - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0))); - tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ); - tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8); - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1))); - tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ); + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0))); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ); + tcg_gen_addi_tl(s->tmp0, s->A0, 8); + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1))); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ); } -static inline void gen_op_movo(int d_offset, int s_offset) +static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset) { - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0))); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0))); - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1))); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1))); + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0))); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0))); + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1))); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1))); } -static inline void gen_op_movq(int d_offset, int s_offset) +static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset) { - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset); + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset); } -static inline void gen_op_movl(int d_offset, int s_offset) +static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset) { - tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset); - tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset); + tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset); + tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset); } -static inline void gen_op_movq_env_0(int d_offset) +static inline void gen_op_movq_env_0(DisasContext *s, int d_offset) { - tcg_gen_movi_i64(cpu_tmp1_i64, 0); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset); + tcg_gen_movi_i64(s->tmp1_i64, 0); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset); } typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg); @@ -3122,41 +3133,42 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_stq_env_A0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0))); } else { - tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, + tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0))); - gen_op_st_v(s, MO_32, cpu_T0, cpu_A0); + gen_op_st_v(s, MO_32, s->T0, s->A0); } break; case 0x6e: /* movd mm, ea */ #ifdef TARGET_X86_64 if (s->dflag == MO_64) { gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0); - tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx)); + tcg_gen_st_tl(s->T0, cpu_env, + offsetof(CPUX86State, fpregs[reg].mmx)); } else #endif { gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, + tcg_gen_addi_ptr(s->ptr0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx)); - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); - gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32); + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32); } break; case 0x16e: /* movd xmm, ea */ #ifdef TARGET_X86_64 if (s->dflag == MO_64) { gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, + tcg_gen_addi_ptr(s->ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[reg])); - gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T0); + gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0); } else #endif { gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, + tcg_gen_addi_ptr(s->ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[reg])); - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); - gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32); + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32); } break; case 0x6f: /* movq mm, ea */ @@ -3165,9 +3177,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx)); } else { rm = (modrm & 7); - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx)); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, + tcg_gen_st_i64(s->tmp1_i64, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx)); } break; @@ -3182,22 +3194,26 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]), + gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]), offsetof(CPUX86State,xmm_regs[rm])); } break; case 0x210: /* movss xmm, ea */ if (mod != 3) { gen_lea_modrm(env, s, modrm); - gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0); - tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0))); - tcg_gen_movi_tl(cpu_T0, 0); - tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1))); - tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2))); - tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3))); + gen_op_ld_v(s, MO_32, s->T0, s->A0); + tcg_gen_st32_tl(s->T0, cpu_env, + offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0))); + tcg_gen_movi_tl(s->T0, 0); + tcg_gen_st32_tl(s->T0, cpu_env, + offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1))); + tcg_gen_st32_tl(s->T0, cpu_env, + offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2))); + tcg_gen_st32_tl(s->T0, cpu_env, + offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3))); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)), + gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)), offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0))); } break; @@ -3206,12 +3222,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_lea_modrm(env, s, modrm); gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0))); - tcg_gen_movi_tl(cpu_T0, 0); - tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2))); - tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3))); + tcg_gen_movi_tl(s->T0, 0); + tcg_gen_st32_tl(s->T0, cpu_env, + offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2))); + tcg_gen_st32_tl(s->T0, cpu_env, + offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3))); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)), + gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)), offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0))); } break; @@ -3224,7 +3242,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } else { /* movhlps */ rm = (modrm & 7) | REX_B(s); - gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)), + gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)), offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1))); } break; @@ -3234,14 +3252,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)), + gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)), offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0))); - gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)), + gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)), offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2))); } - gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)), + gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)), offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0))); - gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)), + gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)), offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2))); break; case 0x312: /* movddup */ @@ -3251,10 +3269,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, xmm_regs[reg].ZMM_Q(0))); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)), + gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)), offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0))); } - gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)), + gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)), offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0))); break; case 0x016: /* movhps */ @@ -3266,7 +3284,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } else { /* movlhps */ rm = (modrm & 7) | REX_B(s); - gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)), + gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)), offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0))); } break; @@ -3276,14 +3294,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)), + gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)), offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1))); - gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)), + gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)), offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3))); } - gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)), + gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)), offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1))); - gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)), + gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)), offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3))); break; case 0x178: @@ -3295,14 +3313,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, goto illegal_op; field_length = x86_ldub_code(env, s) & 0x3F; bit_index = x86_ldub_code(env, s) & 0x3F; - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, + tcg_gen_addi_ptr(s->ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[reg])); if (b1 == 1) - gen_helper_extrq_i(cpu_env, cpu_ptr0, + gen_helper_extrq_i(cpu_env, s->ptr0, tcg_const_i32(bit_index), tcg_const_i32(field_length)); else - gen_helper_insertq_i(cpu_env, cpu_ptr0, + gen_helper_insertq_i(cpu_env, s->ptr0, tcg_const_i32(bit_index), tcg_const_i32(field_length)); } @@ -3310,13 +3328,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x7e: /* movd ea, mm */ #ifdef TARGET_X86_64 if (s->dflag == MO_64) { - tcg_gen_ld_i64(cpu_T0, cpu_env, + tcg_gen_ld_i64(s->T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx)); gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1); } else #endif { - tcg_gen_ld32u_tl(cpu_T0, cpu_env, + tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0))); gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1); } @@ -3324,13 +3342,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x17e: /* movd ea, xmm */ #ifdef TARGET_X86_64 if (s->dflag == MO_64) { - tcg_gen_ld_i64(cpu_T0, cpu_env, + tcg_gen_ld_i64(s->T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0))); gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1); } else #endif { - tcg_gen_ld32u_tl(cpu_T0, cpu_env, + tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0))); gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1); } @@ -3342,10 +3360,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, xmm_regs[reg].ZMM_Q(0))); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)), + gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)), offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0))); } - gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1))); + gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1))); break; case 0x7f: /* movq ea, mm */ if (mod != 3) { @@ -3353,7 +3371,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx)); } else { rm = (modrm & 7); - gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx), + gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx), offsetof(CPUX86State,fpregs[reg].mmx)); } break; @@ -3368,18 +3386,19 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]), + gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]), offsetof(CPUX86State,xmm_regs[reg])); } break; case 0x211: /* movss ea, xmm */ if (mod != 3) { gen_lea_modrm(env, s, modrm); - tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0))); - gen_op_st_v(s, MO_32, cpu_T0, cpu_A0); + tcg_gen_ld32u_tl(s->T0, cpu_env, + offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0))); + gen_op_st_v(s, MO_32, s->T0, s->A0); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)), + gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)), offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0))); } break; @@ -3390,7 +3409,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, xmm_regs[reg].ZMM_Q(0))); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)), + gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)), offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0))); } break; @@ -3425,16 +3444,20 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } val = x86_ldub_code(env, s); if (is_xmm) { - tcg_gen_movi_tl(cpu_T0, val); - tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0))); - tcg_gen_movi_tl(cpu_T0, 0); - tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(1))); + tcg_gen_movi_tl(s->T0, val); + tcg_gen_st32_tl(s->T0, cpu_env, + offsetof(CPUX86State, xmm_t0.ZMM_L(0))); + tcg_gen_movi_tl(s->T0, 0); + tcg_gen_st32_tl(s->T0, cpu_env, + offsetof(CPUX86State, xmm_t0.ZMM_L(1))); op1_offset = offsetof(CPUX86State,xmm_t0); } else { - tcg_gen_movi_tl(cpu_T0, val); - tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0))); - tcg_gen_movi_tl(cpu_T0, 0); - tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1))); + tcg_gen_movi_tl(s->T0, val); + tcg_gen_st32_tl(s->T0, cpu_env, + offsetof(CPUX86State, mmx_t0.MMX_L(0))); + tcg_gen_movi_tl(s->T0, 0); + tcg_gen_st32_tl(s->T0, cpu_env, + offsetof(CPUX86State, mmx_t0.MMX_L(1))); op1_offset = offsetof(CPUX86State,mmx_t0); } sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 + @@ -3449,23 +3472,23 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, rm = (modrm & 7); op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); } - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset); - tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset); - sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1); + tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset); + tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset); + sse_fn_epp(cpu_env, s->ptr0, s->ptr1); break; case 0x050: /* movmskps */ rm = (modrm & 7) | REX_B(s); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, + tcg_gen_addi_ptr(s->ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm])); - gen_helper_movmskps(cpu_tmp2_i32, cpu_env, cpu_ptr0); - tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32); + gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0); + tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32); break; case 0x150: /* movmskpd */ rm = (modrm & 7) | REX_B(s); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, + tcg_gen_addi_ptr(s->ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm])); - gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, cpu_ptr0); - tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32); + gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0); + tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32); break; case 0x02a: /* cvtpi2ps */ case 0x12a: /* cvtpi2pd */ @@ -3479,15 +3502,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); } op1_offset = offsetof(CPUX86State,xmm_regs[reg]); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); switch(b >> 8) { case 0x0: - gen_helper_cvtpi2ps(cpu_env, cpu_ptr0, cpu_ptr1); + gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1); break; default: case 0x1: - gen_helper_cvtpi2pd(cpu_env, cpu_ptr0, cpu_ptr1); + gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1); break; } break; @@ -3496,15 +3519,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, ot = mo_64_32(s->dflag); gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); op1_offset = offsetof(CPUX86State,xmm_regs[reg]); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); if (ot == MO_32) { SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1]; - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); - sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32); + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32); } else { #ifdef TARGET_X86_64 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1]; - sse_fn_epl(cpu_env, cpu_ptr0, cpu_T0); + sse_fn_epl(cpu_env, s->ptr0, s->T0); #else goto illegal_op; #endif @@ -3524,20 +3547,20 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, op2_offset = offsetof(CPUX86State,xmm_regs[rm]); } op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); switch(b) { case 0x02c: - gen_helper_cvttps2pi(cpu_env, cpu_ptr0, cpu_ptr1); + gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1); break; case 0x12c: - gen_helper_cvttpd2pi(cpu_env, cpu_ptr0, cpu_ptr1); + gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1); break; case 0x02d: - gen_helper_cvtps2pi(cpu_env, cpu_ptr0, cpu_ptr1); + gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1); break; case 0x12d: - gen_helper_cvtpd2pi(cpu_env, cpu_ptr0, cpu_ptr1); + gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1); break; } break; @@ -3551,30 +3574,31 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, if ((b >> 8) & 1) { gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0))); } else { - gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0); - tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0))); + gen_op_ld_v(s, MO_32, s->T0, s->A0); + tcg_gen_st32_tl(s->T0, cpu_env, + offsetof(CPUX86State, xmm_t0.ZMM_L(0))); } op2_offset = offsetof(CPUX86State,xmm_t0); } else { rm = (modrm & 7) | REX_B(s); op2_offset = offsetof(CPUX86State,xmm_regs[rm]); } - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset); + tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset); if (ot == MO_32) { SSEFunc_i_ep sse_fn_i_ep = sse_op_table3bi[((b >> 7) & 2) | (b & 1)]; - sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0); - tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32); + sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0); + tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); } else { #ifdef TARGET_X86_64 SSEFunc_l_ep sse_fn_l_ep = sse_op_table3bq[((b >> 7) & 2) | (b & 1)]; - sse_fn_l_ep(cpu_T0, cpu_env, cpu_ptr0); + sse_fn_l_ep(s->T0, cpu_env, s->ptr0); #else goto illegal_op; #endif } - gen_op_mov_reg_v(ot, reg, cpu_T0); + gen_op_mov_reg_v(s, ot, reg, s->T0); break; case 0xc4: /* pinsrw */ case 0x1c4: @@ -3583,11 +3607,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, val = x86_ldub_code(env, s); if (b1) { val &= 7; - tcg_gen_st16_tl(cpu_T0, cpu_env, + tcg_gen_st16_tl(s->T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val))); } else { val &= 3; - tcg_gen_st16_tl(cpu_T0, cpu_env, + tcg_gen_st16_tl(s->T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val))); } break; @@ -3600,16 +3624,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, if (b1) { val &= 7; rm = (modrm & 7) | REX_B(s); - tcg_gen_ld16u_tl(cpu_T0, cpu_env, + tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val))); } else { val &= 3; rm = (modrm & 7); - tcg_gen_ld16u_tl(cpu_T0, cpu_env, + tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val))); } reg = ((modrm >> 3) & 7) | rex_r; - gen_op_mov_reg_v(ot, reg, cpu_T0); + gen_op_mov_reg_v(s, ot, reg, s->T0); break; case 0x1d6: /* movq ea, xmm */ if (mod != 3) { @@ -3618,22 +3642,23 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, xmm_regs[reg].ZMM_Q(0))); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)), + gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)), offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0))); - gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1))); + gen_op_movq_env_0(s, + offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1))); } break; case 0x2d6: /* movq2dq */ gen_helper_enter_mmx(cpu_env); rm = (modrm & 7); - gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)), + gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)), offsetof(CPUX86State,fpregs[rm].mmx)); - gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1))); + gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1))); break; case 0x3d6: /* movdq2q */ gen_helper_enter_mmx(cpu_env); rm = (modrm & 7) | REX_B(s); - gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx), + gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx), offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0))); break; case 0xd7: /* pmovmskb */ @@ -3642,15 +3667,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, goto illegal_op; if (b1) { rm = (modrm & 7) | REX_B(s); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm])); - gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_env, cpu_ptr0); + tcg_gen_addi_ptr(s->ptr0, cpu_env, + offsetof(CPUX86State, xmm_regs[rm])); + gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0); } else { rm = (modrm & 7); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx)); - gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, cpu_ptr0); + tcg_gen_addi_ptr(s->ptr0, cpu_env, + offsetof(CPUX86State, fpregs[rm].mmx)); + gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0); } reg = ((modrm >> 3) & 7) | rex_r; - tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32); + tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32); break; case 0x138: @@ -3690,15 +3717,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */ case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */ - tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); - tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset + + tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset + offsetof(ZMMReg, ZMM_L(0))); break; case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */ - tcg_gen_qemu_ld_tl(cpu_tmp0, cpu_A0, + tcg_gen_qemu_ld_tl(s->tmp0, s->A0, s->mem_index, MO_LEUW); - tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset + + tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset + offsetof(ZMMReg, ZMM_W(0))); break; case 0x2a: /* movntqda */ @@ -3722,9 +3749,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, goto unknown_op; } - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); - sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1); + tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); + sse_fn_epp(cpu_env, s->ptr0, s->ptr1); if (b == 0x17) { set_cc_op(s, CC_OP_EFLAGS); @@ -3754,13 +3781,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, ot = MO_64; } - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]); + tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]); gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - gen_helper_crc32(cpu_T0, cpu_tmp2_i32, - cpu_T0, tcg_const_i32(8 << ot)); + gen_helper_crc32(s->T0, s->tmp2_i32, + s->T0, tcg_const_i32(8 << ot)); ot = mo_64_32(s->dflag); - gen_op_mov_reg_v(ot, reg, cpu_T0); + gen_op_mov_reg_v(s, ot, reg, s->T0); break; case 0x1f0: /* crc32 or movbe */ @@ -3785,11 +3812,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_lea_modrm(env, s, modrm); if ((b & 1) == 0) { - tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0, + tcg_gen_qemu_ld_tl(s->T0, s->A0, s->mem_index, ot | MO_BE); - gen_op_mov_reg_v(ot, reg, cpu_T0); + gen_op_mov_reg_v(s, ot, reg, s->T0); } else { - tcg_gen_qemu_st_tl(cpu_regs[reg], cpu_A0, + tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0, s->mem_index, ot | MO_BE); } break; @@ -3802,9 +3829,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } ot = mo_64_32(s->dflag); gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_regs[s->vex_v]); - gen_op_mov_reg_v(ot, reg, cpu_T0); - gen_op_update1_cc(); + tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]); + gen_op_mov_reg_v(s, ot, reg, s->T0); + gen_op_update1_cc(s); set_cc_op(s, CC_OP_LOGICB + ot); break; @@ -3821,28 +3848,28 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); /* Extract START, and shift the operand. Shifts larger than operand size get zeros. */ - tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]); - tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_A0); + tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]); + tcg_gen_shr_tl(s->T0, s->T0, s->A0); bound = tcg_const_tl(ot == MO_64 ? 63 : 31); zero = tcg_const_tl(0); - tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, cpu_A0, bound, - cpu_T0, zero); + tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, + s->T0, zero); tcg_temp_free(zero); /* Extract the LEN into a mask. Lengths larger than operand size get all ones. */ - tcg_gen_extract_tl(cpu_A0, cpu_regs[s->vex_v], 8, 8); - tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound, - cpu_A0, bound); + tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8); + tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound, + s->A0, bound); tcg_temp_free(bound); - tcg_gen_movi_tl(cpu_T1, 1); - tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_A0); - tcg_gen_subi_tl(cpu_T1, cpu_T1, 1); - tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1); + tcg_gen_movi_tl(s->T1, 1); + tcg_gen_shl_tl(s->T1, s->T1, s->A0); + tcg_gen_subi_tl(s->T1, s->T1, 1); + tcg_gen_and_tl(s->T0, s->T0, s->T1); - gen_op_mov_reg_v(ot, reg, cpu_T0); - gen_op_update1_cc(); + gen_op_mov_reg_v(s, ot, reg, s->T0); + gen_op_update1_cc(s); set_cc_op(s, CC_OP_LOGICB + ot); } break; @@ -3855,22 +3882,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } ot = mo_64_32(s->dflag); gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - tcg_gen_ext8u_tl(cpu_T1, cpu_regs[s->vex_v]); + tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]); { TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31); /* Note that since we're using BMILG (in order to get O cleared) we need to store the inverse into C. */ tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src, - cpu_T1, bound); - tcg_gen_movcond_tl(TCG_COND_GT, cpu_T1, cpu_T1, - bound, bound, cpu_T1); + s->T1, bound); + tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1, + bound, bound, s->T1); tcg_temp_free(bound); } - tcg_gen_movi_tl(cpu_A0, -1); - tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T1); - tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_A0); - gen_op_mov_reg_v(ot, reg, cpu_T0); - gen_op_update1_cc(); + tcg_gen_movi_tl(s->A0, -1); + tcg_gen_shl_tl(s->A0, s->A0, s->T1); + tcg_gen_andc_tl(s->T0, s->T0, s->A0); + gen_op_mov_reg_v(s, ot, reg, s->T0); + gen_op_update1_cc(s); set_cc_op(s, CC_OP_BMILGB + ot); break; @@ -3884,19 +3911,19 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); switch (ot) { default: - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); - tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]); - tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32, - cpu_tmp2_i32, cpu_tmp3_i32); - tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], cpu_tmp2_i32); - tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32); + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]); + tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32, + s->tmp2_i32, s->tmp3_i32); + tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32); + tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32); break; #ifdef TARGET_X86_64 case MO_64: - tcg_gen_mulu2_i64(cpu_T0, cpu_T1, - cpu_T0, cpu_regs[R_EDX]); - tcg_gen_mov_i64(cpu_regs[s->vex_v], cpu_T0); - tcg_gen_mov_i64(cpu_regs[reg], cpu_T1); + tcg_gen_mulu2_i64(s->T0, s->T1, + s->T0, cpu_regs[R_EDX]); + tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0); + tcg_gen_mov_i64(cpu_regs[reg], s->T1); break; #endif } @@ -3913,11 +3940,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, /* Note that by zero-extending the mask operand, we automatically handle zero-extending the result. */ if (ot == MO_64) { - tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]); + tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]); } else { - tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]); + tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]); } - gen_helper_pdep(cpu_regs[reg], cpu_T0, cpu_T1); + gen_helper_pdep(cpu_regs[reg], s->T0, s->T1); break; case 0x2f5: /* pext Gy, By, Ey */ @@ -3931,11 +3958,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, /* Note that by zero-extending the mask operand, we automatically handle zero-extending the result. */ if (ot == MO_64) { - tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]); + tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]); } else { - tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]); + tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]); } - gen_helper_pext(cpu_regs[reg], cpu_T0, cpu_T1); + gen_helper_pext(cpu_regs[reg], s->T0, s->T1); break; case 0x1f6: /* adcx Gy, Ey */ @@ -3982,7 +4009,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) { gen_compute_eflags(s); } - carry_in = cpu_tmp0; + carry_in = s->tmp0; tcg_gen_extract_tl(carry_in, cpu_cc_src, ctz32(b == 0x1f6 ? CC_C : CC_O), 1); } @@ -3993,22 +4020,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, /* If we know TL is 64-bit, and we want a 32-bit result, just do everything in 64-bit arithmetic. */ tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]); - tcg_gen_ext32u_i64(cpu_T0, cpu_T0); - tcg_gen_add_i64(cpu_T0, cpu_T0, cpu_regs[reg]); - tcg_gen_add_i64(cpu_T0, cpu_T0, carry_in); - tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T0); - tcg_gen_shri_i64(carry_out, cpu_T0, 32); + tcg_gen_ext32u_i64(s->T0, s->T0); + tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]); + tcg_gen_add_i64(s->T0, s->T0, carry_in); + tcg_gen_ext32u_i64(cpu_regs[reg], s->T0); + tcg_gen_shri_i64(carry_out, s->T0, 32); break; #endif default: /* Otherwise compute the carry-out in two steps. */ zero = tcg_const_tl(0); - tcg_gen_add2_tl(cpu_T0, carry_out, - cpu_T0, zero, + tcg_gen_add2_tl(s->T0, carry_out, + s->T0, zero, carry_in, zero); tcg_gen_add2_tl(cpu_regs[reg], carry_out, cpu_regs[reg], carry_out, - cpu_T0, zero); + s->T0, zero); tcg_temp_free(zero); break; } @@ -4027,24 +4054,24 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, ot = mo_64_32(s->dflag); gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); if (ot == MO_64) { - tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 63); + tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63); } else { - tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 31); + tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31); } if (b == 0x1f7) { - tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1); + tcg_gen_shl_tl(s->T0, s->T0, s->T1); } else if (b == 0x2f7) { if (ot != MO_64) { - tcg_gen_ext32s_tl(cpu_T0, cpu_T0); + tcg_gen_ext32s_tl(s->T0, s->T0); } - tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1); + tcg_gen_sar_tl(s->T0, s->T0, s->T1); } else { if (ot != MO_64) { - tcg_gen_ext32u_tl(cpu_T0, cpu_T0); + tcg_gen_ext32u_tl(s->T0, s->T0); } - tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1); + tcg_gen_shr_tl(s->T0, s->T0, s->T1); } - gen_op_mov_reg_v(ot, reg, cpu_T0); + gen_op_mov_reg_v(s, ot, reg, s->T0); break; case 0x0f3: @@ -4059,25 +4086,25 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, ot = mo_64_32(s->dflag); gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - tcg_gen_mov_tl(cpu_cc_src, cpu_T0); + tcg_gen_mov_tl(cpu_cc_src, s->T0); switch (reg & 7) { case 1: /* blsr By,Ey */ - tcg_gen_subi_tl(cpu_T1, cpu_T0, 1); - tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1); + tcg_gen_subi_tl(s->T1, s->T0, 1); + tcg_gen_and_tl(s->T0, s->T0, s->T1); break; case 2: /* blsmsk By,Ey */ - tcg_gen_subi_tl(cpu_T1, cpu_T0, 1); - tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1); + tcg_gen_subi_tl(s->T1, s->T0, 1); + tcg_gen_xor_tl(s->T0, s->T0, s->T1); break; case 3: /* blsi By, Ey */ - tcg_gen_neg_tl(cpu_T1, cpu_T0); - tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1); + tcg_gen_neg_tl(s->T1, s->T0); + tcg_gen_and_tl(s->T0, s->T0, s->T1); break; default: goto unknown_op; } - tcg_gen_mov_tl(cpu_cc_dst, cpu_T0); - gen_op_mov_reg_v(ot, s->vex_v, cpu_T0); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); + gen_op_mov_reg_v(s, ot, s->vex_v, s->T0); set_cc_op(s, CC_OP_BMILGB + ot); break; @@ -4115,45 +4142,45 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, val = x86_ldub_code(env, s); switch (b) { case 0x14: /* pextrb */ - tcg_gen_ld8u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, + tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State, xmm_regs[reg].ZMM_B(val & 15))); if (mod == 3) { - gen_op_mov_reg_v(ot, rm, cpu_T0); + gen_op_mov_reg_v(s, ot, rm, s->T0); } else { - tcg_gen_qemu_st_tl(cpu_T0, cpu_A0, + tcg_gen_qemu_st_tl(s->T0, s->A0, s->mem_index, MO_UB); } break; case 0x15: /* pextrw */ - tcg_gen_ld16u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, + tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State, xmm_regs[reg].ZMM_W(val & 7))); if (mod == 3) { - gen_op_mov_reg_v(ot, rm, cpu_T0); + gen_op_mov_reg_v(s, ot, rm, s->T0); } else { - tcg_gen_qemu_st_tl(cpu_T0, cpu_A0, + tcg_gen_qemu_st_tl(s->T0, s->A0, s->mem_index, MO_LEUW); } break; case 0x16: if (ot == MO_32) { /* pextrd */ - tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, + tcg_gen_ld_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(val & 3))); if (mod == 3) { - tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32); + tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32); } else { - tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0, + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); } } else { /* pextrq */ #ifdef TARGET_X86_64 - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(val & 1))); if (mod == 3) { - tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64); + tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64); } else { - tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, + tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ); } #else @@ -4162,35 +4189,35 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } break; case 0x17: /* extractps */ - tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, + tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(val & 3))); if (mod == 3) { - gen_op_mov_reg_v(ot, rm, cpu_T0); + gen_op_mov_reg_v(s, ot, rm, s->T0); } else { - tcg_gen_qemu_st_tl(cpu_T0, cpu_A0, + tcg_gen_qemu_st_tl(s->T0, s->A0, s->mem_index, MO_LEUL); } break; case 0x20: /* pinsrb */ if (mod == 3) { - gen_op_mov_v_reg(MO_32, cpu_T0, rm); + gen_op_mov_v_reg(s, MO_32, s->T0, rm); } else { - tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0, + tcg_gen_qemu_ld_tl(s->T0, s->A0, s->mem_index, MO_UB); } - tcg_gen_st8_tl(cpu_T0, cpu_env, offsetof(CPUX86State, + tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State, xmm_regs[reg].ZMM_B(val & 15))); break; case 0x21: /* insertps */ if (mod == 3) { - tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, + tcg_gen_ld_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State,xmm_regs[rm] .ZMM_L((val >> 6) & 3))); } else { - tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); } - tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, + tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State,xmm_regs[reg] .ZMM_L((val >> 4) & 3))); if ((val >> 0) & 1) @@ -4213,23 +4240,23 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x22: if (ot == MO_32) { /* pinsrd */ if (mod == 3) { - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[rm]); + tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]); } else { - tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); } - tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, + tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(val & 3))); } else { /* pinsrq */ #ifdef TARGET_X86_64 if (mod == 3) { - gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm); + gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm); } else { - tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ); } - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, + tcg_gen_st_i64(s->tmp1_i64, cpu_env, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(val & 1))); #else @@ -4271,9 +4298,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } } - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); - sse_fn_eppi(cpu_env, cpu_ptr0, cpu_ptr1, tcg_const_i32(val)); + tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); + sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val)); break; case 0x33a: @@ -4293,13 +4320,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); b = x86_ldub_code(env, s); if (ot == MO_64) { - tcg_gen_rotri_tl(cpu_T0, cpu_T0, b & 63); + tcg_gen_rotri_tl(s->T0, s->T0, b & 63); } else { - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); - tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31); - tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32); + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31); + tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); } - gen_op_mov_reg_v(ot, reg, cpu_T0); + gen_op_mov_reg_v(s, ot, reg, s->T0); break; default: @@ -4356,8 +4383,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, switch (sz) { case 2: /* 32 bit access */ - gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0); - tcg_gen_st32_tl(cpu_T0, cpu_env, + gen_op_ld_v(s, MO_32, s->T0, s->A0); + tcg_gen_st32_tl(s->T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0))); break; case 3: @@ -4394,18 +4421,18 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) { goto illegal_op; } - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); - sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1); + tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); + sse_fn_epp(cpu_env, s->ptr0, s->ptr1); break; case 0x70: /* pshufx insn */ case 0xc6: /* pshufx insn */ val = x86_ldub_code(env, s); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); /* XXX: introduce a new table? */ sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp; - sse_fn_ppi(cpu_ptr0, cpu_ptr1, tcg_const_i32(val)); + sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val)); break; case 0xc2: /* compare insns */ @@ -4414,28 +4441,28 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, goto unknown_op; sse_fn_epp = sse_op_table4[val][b1]; - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); - sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1); + tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); + sse_fn_epp(cpu_env, s->ptr0, s->ptr1); break; case 0xf7: /* maskmov : we must prepare A0 */ if (mod != 3) goto illegal_op; - tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EDI]); - gen_extu(s->aflag, cpu_A0); + tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]); + gen_extu(s->aflag, s->A0); gen_add_A0_ds_seg(s); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); /* XXX: introduce a new table? */ sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp; - sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, cpu_A0); + sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0); break; default: - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); - sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1); + tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); + sse_fn_epp(cpu_env, s->ptr0, s->ptr1); break; } if (b == 0x2e || b == 0x2f) { @@ -4462,7 +4489,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) #ifdef TARGET_X86_64 s->rex_x = 0; s->rex_b = 0; - x86_64_hregs = 0; + s->x86_64_hregs = false; #endif s->rip_offset = 0; /* for relative ip address */ s->vex_l = 0; @@ -4521,7 +4548,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) rex_r = (b & 0x4) << 1; s->rex_x = (b & 0x2) << 2; REX_B(s) = (b & 0x1) << 3; - x86_64_hregs = 1; /* select uniform byte register addressing */ + /* select uniform byte register addressing */ + s->x86_64_hregs = true; goto next_byte; } break; @@ -4549,7 +4577,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } #ifdef TARGET_X86_64 - if (x86_64_hregs) { + if (s->x86_64_hregs) { goto illegal_op; } #endif @@ -4653,13 +4681,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) xor_zero: /* xor reg, reg optimisation */ set_cc_op(s, CC_OP_CLR); - tcg_gen_movi_tl(cpu_T0, 0); - gen_op_mov_reg_v(ot, reg, cpu_T0); + tcg_gen_movi_tl(s->T0, 0); + gen_op_mov_reg_v(s, ot, reg, s->T0); break; } else { opreg = rm; } - gen_op_mov_v_reg(ot, cpu_T1, reg); + gen_op_mov_v_reg(s, ot, s->T1, reg); gen_op(s, op, ot, opreg); break; case 1: /* OP Gv, Ev */ @@ -4669,17 +4697,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) rm = (modrm & 7) | REX_B(s); if (mod != 3) { gen_lea_modrm(env, s, modrm); - gen_op_ld_v(s, ot, cpu_T1, cpu_A0); + gen_op_ld_v(s, ot, s->T1, s->A0); } else if (op == OP_XORL && rm == reg) { goto xor_zero; } else { - gen_op_mov_v_reg(ot, cpu_T1, rm); + gen_op_mov_v_reg(s, ot, s->T1, rm); } gen_op(s, op, ot, reg); break; case 2: /* OP A, Iv */ val = insn_get(env, s, ot); - tcg_gen_movi_tl(cpu_T1, val); + tcg_gen_movi_tl(s->T1, val); gen_op(s, op, ot, OR_EAX); break; } @@ -4725,7 +4753,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) val = (int8_t)insn_get(env, s, MO_8); break; } - tcg_gen_movi_tl(cpu_T1, val); + tcg_gen_movi_tl(s->T1, val); gen_op(s, op, ot, opreg); } break; @@ -4756,17 +4784,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) /* For those below that handle locked memory, don't load here. */ if (!(s->prefix & PREFIX_LOCK) || op != 2) { - gen_op_ld_v(s, ot, cpu_T0, cpu_A0); + gen_op_ld_v(s, ot, s->T0, s->A0); } } else { - gen_op_mov_v_reg(ot, cpu_T0, rm); + gen_op_mov_v_reg(s, ot, s->T0, rm); } switch(op) { case 0: /* test */ val = insn_get(env, s, ot); - tcg_gen_movi_tl(cpu_T1, val); - gen_op_testl_T0_T1_cc(); + tcg_gen_movi_tl(s->T1, val); + gen_op_testl_T0_T1_cc(s); set_cc_op(s, CC_OP_LOGICB + ot); break; case 2: /* not */ @@ -4774,15 +4802,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (mod == 3) { goto illegal_op; } - tcg_gen_movi_tl(cpu_T0, ~0); - tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T0, + tcg_gen_movi_tl(s->T0, ~0); + tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0, s->mem_index, ot | MO_LE); } else { - tcg_gen_not_tl(cpu_T0, cpu_T0); + tcg_gen_not_tl(s->T0, s->T0); if (mod != 3) { - gen_op_st_v(s, ot, cpu_T0, cpu_A0); + gen_op_st_v(s, ot, s->T0, s->A0); } else { - gen_op_mov_reg_v(ot, rm, cpu_T0); + gen_op_mov_reg_v(s, ot, rm, s->T0); } } break; @@ -4798,8 +4826,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) t0 = tcg_temp_local_new(); label1 = gen_new_label(); - tcg_gen_mov_tl(a0, cpu_A0); - tcg_gen_mov_tl(t0, cpu_T0); + tcg_gen_mov_tl(a0, s->A0); + tcg_gen_mov_tl(t0, s->T0); gen_set_label(label1); t1 = tcg_temp_new(); @@ -4813,53 +4841,53 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) tcg_temp_free(t2); tcg_temp_free(a0); - tcg_gen_mov_tl(cpu_T0, t0); + tcg_gen_mov_tl(s->T0, t0); tcg_temp_free(t0); } else { - tcg_gen_neg_tl(cpu_T0, cpu_T0); + tcg_gen_neg_tl(s->T0, s->T0); if (mod != 3) { - gen_op_st_v(s, ot, cpu_T0, cpu_A0); + gen_op_st_v(s, ot, s->T0, s->A0); } else { - gen_op_mov_reg_v(ot, rm, cpu_T0); + gen_op_mov_reg_v(s, ot, rm, s->T0); } } - gen_op_update_neg_cc(); + gen_op_update_neg_cc(s); set_cc_op(s, CC_OP_SUBB + ot); break; case 4: /* mul */ switch(ot) { case MO_8: - gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX); - tcg_gen_ext8u_tl(cpu_T0, cpu_T0); - tcg_gen_ext8u_tl(cpu_T1, cpu_T1); + gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX); + tcg_gen_ext8u_tl(s->T0, s->T0); + tcg_gen_ext8u_tl(s->T1, s->T1); /* XXX: use 32 bit mul which could be faster */ - tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1); - gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0); - tcg_gen_mov_tl(cpu_cc_dst, cpu_T0); - tcg_gen_andi_tl(cpu_cc_src, cpu_T0, 0xff00); + tcg_gen_mul_tl(s->T0, s->T0, s->T1); + gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); + tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00); set_cc_op(s, CC_OP_MULB); break; case MO_16: - gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX); - tcg_gen_ext16u_tl(cpu_T0, cpu_T0); - tcg_gen_ext16u_tl(cpu_T1, cpu_T1); + gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX); + tcg_gen_ext16u_tl(s->T0, s->T0); + tcg_gen_ext16u_tl(s->T1, s->T1); /* XXX: use 32 bit mul which could be faster */ - tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1); - gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0); - tcg_gen_mov_tl(cpu_cc_dst, cpu_T0); - tcg_gen_shri_tl(cpu_T0, cpu_T0, 16); - gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0); - tcg_gen_mov_tl(cpu_cc_src, cpu_T0); + tcg_gen_mul_tl(s->T0, s->T0, s->T1); + gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); + tcg_gen_shri_tl(s->T0, s->T0, 16); + gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0); + tcg_gen_mov_tl(cpu_cc_src, s->T0); set_cc_op(s, CC_OP_MULW); break; default: case MO_32: - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); - tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]); - tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32, - cpu_tmp2_i32, cpu_tmp3_i32); - tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32); - tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32); + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]); + tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32, + s->tmp2_i32, s->tmp3_i32); + tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32); + tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32); tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]); tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]); set_cc_op(s, CC_OP_MULL); @@ -4867,7 +4895,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) #ifdef TARGET_X86_64 case MO_64: tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX], - cpu_T0, cpu_regs[R_EAX]); + s->T0, cpu_regs[R_EAX]); tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]); tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]); set_cc_op(s, CC_OP_MULQ); @@ -4878,49 +4906,49 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 5: /* imul */ switch(ot) { case MO_8: - gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX); - tcg_gen_ext8s_tl(cpu_T0, cpu_T0); - tcg_gen_ext8s_tl(cpu_T1, cpu_T1); + gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX); + tcg_gen_ext8s_tl(s->T0, s->T0); + tcg_gen_ext8s_tl(s->T1, s->T1); /* XXX: use 32 bit mul which could be faster */ - tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1); - gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0); - tcg_gen_mov_tl(cpu_cc_dst, cpu_T0); - tcg_gen_ext8s_tl(cpu_tmp0, cpu_T0); - tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0); + tcg_gen_mul_tl(s->T0, s->T0, s->T1); + gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); + tcg_gen_ext8s_tl(s->tmp0, s->T0); + tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0); set_cc_op(s, CC_OP_MULB); break; case MO_16: - gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX); - tcg_gen_ext16s_tl(cpu_T0, cpu_T0); - tcg_gen_ext16s_tl(cpu_T1, cpu_T1); + gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX); + tcg_gen_ext16s_tl(s->T0, s->T0); + tcg_gen_ext16s_tl(s->T1, s->T1); /* XXX: use 32 bit mul which could be faster */ - tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1); - gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0); - tcg_gen_mov_tl(cpu_cc_dst, cpu_T0); - tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0); - tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0); - tcg_gen_shri_tl(cpu_T0, cpu_T0, 16); - gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0); + tcg_gen_mul_tl(s->T0, s->T0, s->T1); + gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); + tcg_gen_ext16s_tl(s->tmp0, s->T0); + tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0); + tcg_gen_shri_tl(s->T0, s->T0, 16); + gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0); set_cc_op(s, CC_OP_MULW); break; default: case MO_32: - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); - tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]); - tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32, - cpu_tmp2_i32, cpu_tmp3_i32); - tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32); - tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32); - tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31); + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]); + tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32, + s->tmp2_i32, s->tmp3_i32); + tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32); + tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32); + tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31); tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]); - tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32); - tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32); + tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32); + tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32); set_cc_op(s, CC_OP_MULL); break; #ifdef TARGET_X86_64 case MO_64: tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX], - cpu_T0, cpu_regs[R_EAX]); + s->T0, cpu_regs[R_EAX]); tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]); tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63); tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]); @@ -4932,18 +4960,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 6: /* div */ switch(ot) { case MO_8: - gen_helper_divb_AL(cpu_env, cpu_T0); + gen_helper_divb_AL(cpu_env, s->T0); break; case MO_16: - gen_helper_divw_AX(cpu_env, cpu_T0); + gen_helper_divw_AX(cpu_env, s->T0); break; default: case MO_32: - gen_helper_divl_EAX(cpu_env, cpu_T0); + gen_helper_divl_EAX(cpu_env, s->T0); break; #ifdef TARGET_X86_64 case MO_64: - gen_helper_divq_EAX(cpu_env, cpu_T0); + gen_helper_divq_EAX(cpu_env, s->T0); break; #endif } @@ -4951,18 +4979,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 7: /* idiv */ switch(ot) { case MO_8: - gen_helper_idivb_AL(cpu_env, cpu_T0); + gen_helper_idivb_AL(cpu_env, s->T0); break; case MO_16: - gen_helper_idivw_AX(cpu_env, cpu_T0); + gen_helper_idivw_AX(cpu_env, s->T0); break; default: case MO_32: - gen_helper_idivl_EAX(cpu_env, cpu_T0); + gen_helper_idivl_EAX(cpu_env, s->T0); break; #ifdef TARGET_X86_64 case MO_64: - gen_helper_idivq_EAX(cpu_env, cpu_T0); + gen_helper_idivq_EAX(cpu_env, s->T0); break; #endif } @@ -4997,9 +5025,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (mod != 3) { gen_lea_modrm(env, s, modrm); if (op >= 2 && op != 3 && op != 5) - gen_op_ld_v(s, ot, cpu_T0, cpu_A0); + gen_op_ld_v(s, ot, s->T0, s->A0); } else { - gen_op_mov_v_reg(ot, cpu_T0, rm); + gen_op_mov_v_reg(s, ot, s->T0, rm); } switch(op) { @@ -5020,60 +5048,60 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 2: /* call Ev */ /* XXX: optimize if memory (no 'and' is necessary) */ if (dflag == MO_16) { - tcg_gen_ext16u_tl(cpu_T0, cpu_T0); + tcg_gen_ext16u_tl(s->T0, s->T0); } next_eip = s->pc - s->cs_base; - tcg_gen_movi_tl(cpu_T1, next_eip); - gen_push_v(s, cpu_T1); - gen_op_jmp_v(cpu_T0); + tcg_gen_movi_tl(s->T1, next_eip); + gen_push_v(s, s->T1); + gen_op_jmp_v(s->T0); gen_bnd_jmp(s); - gen_jr(s, cpu_T0); + gen_jr(s, s->T0); break; case 3: /* lcall Ev */ - gen_op_ld_v(s, ot, cpu_T1, cpu_A0); + gen_op_ld_v(s, ot, s->T1, s->A0); gen_add_A0_im(s, 1 << ot); - gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0); + gen_op_ld_v(s, MO_16, s->T0, s->A0); do_lcall: if (s->pe && !s->vm86) { - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); - gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T1, + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1, tcg_const_i32(dflag - 1), tcg_const_tl(s->pc - s->cs_base)); } else { - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); - gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T1, + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1, tcg_const_i32(dflag - 1), tcg_const_i32(s->pc - s->cs_base)); } - tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip)); - gen_jr(s, cpu_tmp4); + tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip)); + gen_jr(s, s->tmp4); break; case 4: /* jmp Ev */ if (dflag == MO_16) { - tcg_gen_ext16u_tl(cpu_T0, cpu_T0); + tcg_gen_ext16u_tl(s->T0, s->T0); } - gen_op_jmp_v(cpu_T0); + gen_op_jmp_v(s->T0); gen_bnd_jmp(s); - gen_jr(s, cpu_T0); + gen_jr(s, s->T0); break; case 5: /* ljmp Ev */ - gen_op_ld_v(s, ot, cpu_T1, cpu_A0); + gen_op_ld_v(s, ot, s->T1, s->A0); gen_add_A0_im(s, 1 << ot); - gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0); + gen_op_ld_v(s, MO_16, s->T0, s->A0); do_ljmp: if (s->pe && !s->vm86) { - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); - gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T1, + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1, tcg_const_tl(s->pc - s->cs_base)); } else { - gen_op_movl_seg_T0_vm(R_CS); - gen_op_jmp_v(cpu_T1); + gen_op_movl_seg_T0_vm(s, R_CS); + gen_op_jmp_v(s->T1); } - tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip)); - gen_jr(s, cpu_tmp4); + tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip)); + gen_jr(s, s->tmp4); break; case 6: /* push Ev */ - gen_push_v(s, cpu_T0); + gen_push_v(s, s->T0); break; default: goto unknown_op; @@ -5088,8 +5116,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) reg = ((modrm >> 3) & 7) | rex_r; gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - gen_op_mov_v_reg(ot, cpu_T1, reg); - gen_op_testl_T0_T1_cc(); + gen_op_mov_v_reg(s, ot, s->T1, reg); + gen_op_testl_T0_T1_cc(s); set_cc_op(s, CC_OP_LOGICB + ot); break; @@ -5098,9 +5126,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) ot = mo_b_d(b, dflag); val = insn_get(env, s, ot); - gen_op_mov_v_reg(ot, cpu_T0, OR_EAX); - tcg_gen_movi_tl(cpu_T1, val); - gen_op_testl_T0_T1_cc(); + gen_op_mov_v_reg(s, ot, s->T0, OR_EAX); + tcg_gen_movi_tl(s->T1, val); + gen_op_testl_T0_T1_cc(s); set_cc_op(s, CC_OP_LOGICB + ot); break; @@ -5108,20 +5136,20 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) switch (dflag) { #ifdef TARGET_X86_64 case MO_64: - gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX); - tcg_gen_ext32s_tl(cpu_T0, cpu_T0); - gen_op_mov_reg_v(MO_64, R_EAX, cpu_T0); + gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX); + tcg_gen_ext32s_tl(s->T0, s->T0); + gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0); break; #endif case MO_32: - gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX); - tcg_gen_ext16s_tl(cpu_T0, cpu_T0); - gen_op_mov_reg_v(MO_32, R_EAX, cpu_T0); + gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX); + tcg_gen_ext16s_tl(s->T0, s->T0); + gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0); break; case MO_16: - gen_op_mov_v_reg(MO_8, cpu_T0, R_EAX); - tcg_gen_ext8s_tl(cpu_T0, cpu_T0); - gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0); + gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX); + tcg_gen_ext8s_tl(s->T0, s->T0); + gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); break; default: tcg_abort(); @@ -5131,22 +5159,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) switch (dflag) { #ifdef TARGET_X86_64 case MO_64: - gen_op_mov_v_reg(MO_64, cpu_T0, R_EAX); - tcg_gen_sari_tl(cpu_T0, cpu_T0, 63); - gen_op_mov_reg_v(MO_64, R_EDX, cpu_T0); + gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX); + tcg_gen_sari_tl(s->T0, s->T0, 63); + gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0); break; #endif case MO_32: - gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX); - tcg_gen_ext32s_tl(cpu_T0, cpu_T0); - tcg_gen_sari_tl(cpu_T0, cpu_T0, 31); - gen_op_mov_reg_v(MO_32, R_EDX, cpu_T0); + gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX); + tcg_gen_ext32s_tl(s->T0, s->T0); + tcg_gen_sari_tl(s->T0, s->T0, 31); + gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0); break; case MO_16: - gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX); - tcg_gen_ext16s_tl(cpu_T0, cpu_T0); - tcg_gen_sari_tl(cpu_T0, cpu_T0, 15); - gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0); + gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX); + tcg_gen_ext16s_tl(s->T0, s->T0); + tcg_gen_sari_tl(s->T0, s->T0, 15); + gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0); break; default: tcg_abort(); @@ -5165,42 +5193,42 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); if (b == 0x69) { val = insn_get(env, s, ot); - tcg_gen_movi_tl(cpu_T1, val); + tcg_gen_movi_tl(s->T1, val); } else if (b == 0x6b) { val = (int8_t)insn_get(env, s, MO_8); - tcg_gen_movi_tl(cpu_T1, val); + tcg_gen_movi_tl(s->T1, val); } else { - gen_op_mov_v_reg(ot, cpu_T1, reg); + gen_op_mov_v_reg(s, ot, s->T1, reg); } switch (ot) { #ifdef TARGET_X86_64 case MO_64: - tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, cpu_T0, cpu_T1); + tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1); tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]); tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63); - tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T1); + tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1); break; #endif case MO_32: - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); - tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1); - tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32, - cpu_tmp2_i32, cpu_tmp3_i32); - tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32); - tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31); + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); + tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32, + s->tmp2_i32, s->tmp3_i32); + tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32); + tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31); tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]); - tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32); - tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32); + tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32); + tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32); break; default: - tcg_gen_ext16s_tl(cpu_T0, cpu_T0); - tcg_gen_ext16s_tl(cpu_T1, cpu_T1); + tcg_gen_ext16s_tl(s->T0, s->T0); + tcg_gen_ext16s_tl(s->T1, s->T1); /* XXX: use 32 bit mul which could be faster */ - tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1); - tcg_gen_mov_tl(cpu_cc_dst, cpu_T0); - tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0); - tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0); - gen_op_mov_reg_v(ot, reg, cpu_T0); + tcg_gen_mul_tl(s->T0, s->T0, s->T1); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); + tcg_gen_ext16s_tl(s->tmp0, s->T0); + tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0); + gen_op_mov_reg_v(s, ot, reg, s->T0); break; } set_cc_op(s, CC_OP_MULB + ot); @@ -5211,27 +5239,27 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) modrm = x86_ldub_code(env, s); reg = ((modrm >> 3) & 7) | rex_r; mod = (modrm >> 6) & 3; - gen_op_mov_v_reg(ot, cpu_T0, reg); + gen_op_mov_v_reg(s, ot, s->T0, reg); if (mod == 3) { rm = (modrm & 7) | REX_B(s); - gen_op_mov_v_reg(ot, cpu_T1, rm); - tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1); - gen_op_mov_reg_v(ot, reg, cpu_T1); - gen_op_mov_reg_v(ot, rm, cpu_T0); + gen_op_mov_v_reg(s, ot, s->T1, rm); + tcg_gen_add_tl(s->T0, s->T0, s->T1); + gen_op_mov_reg_v(s, ot, reg, s->T1); + gen_op_mov_reg_v(s, ot, rm, s->T0); } else { gen_lea_modrm(env, s, modrm); if (s->prefix & PREFIX_LOCK) { - tcg_gen_atomic_fetch_add_tl(cpu_T1, cpu_A0, cpu_T0, + tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0, s->mem_index, ot | MO_LE); - tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1); + tcg_gen_add_tl(s->T0, s->T0, s->T1); } else { - gen_op_ld_v(s, ot, cpu_T1, cpu_A0); - tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1); - gen_op_st_v(s, ot, cpu_T0, cpu_A0); + gen_op_ld_v(s, ot, s->T1, s->A0); + tcg_gen_add_tl(s->T0, s->T0, s->T1); + gen_op_st_v(s, ot, s->T0, s->A0); } - gen_op_mov_reg_v(ot, reg, cpu_T1); + gen_op_mov_reg_v(s, ot, reg, s->T1); } - gen_op_update2_cc(); + gen_op_update2_cc(s); set_cc_op(s, CC_OP_ADDB + ot); break; case 0x1b0: @@ -5246,7 +5274,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) oldv = tcg_temp_new(); newv = tcg_temp_new(); cmpv = tcg_temp_new(); - gen_op_mov_v_reg(ot, newv, reg); + gen_op_mov_v_reg(s, ot, newv, reg); tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]); if (s->prefix & PREFIX_LOCK) { @@ -5254,16 +5282,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_lea_modrm(env, s, modrm); - tcg_gen_atomic_cmpxchg_tl(oldv, cpu_A0, cmpv, newv, + tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv, s->mem_index, ot | MO_LE); - gen_op_mov_reg_v(ot, R_EAX, oldv); + gen_op_mov_reg_v(s, ot, R_EAX, oldv); } else { if (mod == 3) { rm = (modrm & 7) | REX_B(s); - gen_op_mov_v_reg(ot, oldv, rm); + gen_op_mov_v_reg(s, ot, oldv, rm); } else { gen_lea_modrm(env, s, modrm); - gen_op_ld_v(s, ot, oldv, cpu_A0); + gen_op_ld_v(s, ot, oldv, s->A0); rm = 0; /* avoid warning */ } gen_extu(ot, oldv); @@ -5271,19 +5299,19 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) /* store value = (old == cmp ? new : old); */ tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv); if (mod == 3) { - gen_op_mov_reg_v(ot, R_EAX, oldv); - gen_op_mov_reg_v(ot, rm, newv); + gen_op_mov_reg_v(s, ot, R_EAX, oldv); + gen_op_mov_reg_v(s, ot, rm, newv); } else { /* Perform an unconditional store cycle like physical cpu; must be before changing accumulator to ensure idempotency if the store faults and the instruction is restarted */ - gen_op_st_v(s, ot, newv, cpu_A0); - gen_op_mov_reg_v(ot, R_EAX, oldv); + gen_op_st_v(s, ot, newv, s->A0); + gen_op_mov_reg_v(s, ot, R_EAX, oldv); } } tcg_gen_mov_tl(cpu_cc_src, oldv); - tcg_gen_mov_tl(cpu_cc_srcT, cmpv); + tcg_gen_mov_tl(s->cc_srcT, cmpv); tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv); set_cc_op(s, CC_OP_SUBB + ot); tcg_temp_free(oldv); @@ -5302,9 +5330,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; gen_lea_modrm(env, s, modrm); if ((s->prefix & PREFIX_LOCK) && (tb_cflags(s->base.tb) & CF_PARALLEL)) { - gen_helper_cmpxchg16b(cpu_env, cpu_A0); + gen_helper_cmpxchg16b(cpu_env, s->A0); } else { - gen_helper_cmpxchg16b_unlocked(cpu_env, cpu_A0); + gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0); } } else #endif @@ -5313,9 +5341,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; gen_lea_modrm(env, s, modrm); if ((s->prefix & PREFIX_LOCK) && (tb_cflags(s->base.tb) & CF_PARALLEL)) { - gen_helper_cmpxchg8b(cpu_env, cpu_A0); + gen_helper_cmpxchg8b(cpu_env, s->A0); } else { - gen_helper_cmpxchg8b_unlocked(cpu_env, cpu_A0); + gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0); } } set_cc_op(s, CC_OP_EFLAGS); @@ -5324,14 +5352,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) /**************************/ /* push/pop */ case 0x50 ... 0x57: /* push */ - gen_op_mov_v_reg(MO_32, cpu_T0, (b & 7) | REX_B(s)); - gen_push_v(s, cpu_T0); + gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s)); + gen_push_v(s, s->T0); break; case 0x58 ... 0x5f: /* pop */ ot = gen_pop_T0(s); /* NOTE: order is important for pop %sp */ gen_pop_update(s, ot); - gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), cpu_T0); + gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0); break; case 0x60: /* pusha */ if (CODE64(s)) @@ -5350,8 +5378,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) val = insn_get(env, s, ot); else val = (int8_t)insn_get(env, s, MO_8); - tcg_gen_movi_tl(cpu_T0, val); - gen_push_v(s, cpu_T0); + tcg_gen_movi_tl(s->T0, val); + gen_push_v(s, s->T0); break; case 0x8f: /* pop Ev */ modrm = x86_ldub_code(env, s); @@ -5361,7 +5389,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) /* NOTE: order is important for pop %sp */ gen_pop_update(s, ot); rm = (modrm & 7) | REX_B(s); - gen_op_mov_reg_v(ot, rm, cpu_T0); + gen_op_mov_reg_v(s, ot, rm, s->T0); } else { /* NOTE: order is important too for MMU exceptions */ s->popl_esp_hack = 1 << ot; @@ -5387,13 +5415,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x1e: /* push ds */ if (CODE64(s)) goto illegal_op; - gen_op_movl_T0_seg(b >> 3); - gen_push_v(s, cpu_T0); + gen_op_movl_T0_seg(s, b >> 3); + gen_push_v(s, s->T0); break; case 0x1a0: /* push fs */ case 0x1a8: /* push gs */ - gen_op_movl_T0_seg((b >> 3) & 7); - gen_push_v(s, cpu_T0); + gen_op_movl_T0_seg(s, (b >> 3) & 7); + gen_push_v(s, s->T0); break; case 0x07: /* pop es */ case 0x17: /* pop ss */ @@ -5406,7 +5434,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_pop_update(s, ot); /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp. */ if (s->base.is_jmp) { - gen_jmp_im(s->pc - s->cs_base); + gen_jmp_im(s, s->pc - s->cs_base); if (reg == R_SS) { s->tf = 0; gen_eob_inhibit_irq(s, true); @@ -5421,7 +5449,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_movl_seg_T0(s, (b >> 3) & 7); gen_pop_update(s, ot); if (s->base.is_jmp) { - gen_jmp_im(s->pc - s->cs_base); + gen_jmp_im(s, s->pc - s->cs_base); gen_eob(s); } break; @@ -5447,11 +5475,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_lea_modrm(env, s, modrm); } val = insn_get(env, s, ot); - tcg_gen_movi_tl(cpu_T0, val); + tcg_gen_movi_tl(s->T0, val); if (mod != 3) { - gen_op_st_v(s, ot, cpu_T0, cpu_A0); + gen_op_st_v(s, ot, s->T0, s->A0); } else { - gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T0); + gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0); } break; case 0x8a: @@ -5461,7 +5489,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) reg = ((modrm >> 3) & 7) | rex_r; gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - gen_op_mov_reg_v(ot, reg, cpu_T0); + gen_op_mov_reg_v(s, ot, reg, s->T0); break; case 0x8e: /* mov seg, Gv */ modrm = x86_ldub_code(env, s); @@ -5472,7 +5500,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_movl_seg_T0(s, reg); /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp. */ if (s->base.is_jmp) { - gen_jmp_im(s->pc - s->cs_base); + gen_jmp_im(s, s->pc - s->cs_base); if (reg == R_SS) { s->tf = 0; gen_eob_inhibit_irq(s, true); @@ -5487,7 +5515,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) mod = (modrm >> 6) & 3; if (reg >= 6) goto illegal_op; - gen_op_movl_T0_seg(reg); + gen_op_movl_T0_seg(s, reg); ot = mod == 3 ? dflag : MO_16; gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1); break; @@ -5513,31 +5541,31 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) rm = (modrm & 7) | REX_B(s); if (mod == 3) { - if (s_ot == MO_SB && byte_reg_is_xH(rm)) { - tcg_gen_sextract_tl(cpu_T0, cpu_regs[rm - 4], 8, 8); + if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) { + tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8); } else { - gen_op_mov_v_reg(ot, cpu_T0, rm); + gen_op_mov_v_reg(s, ot, s->T0, rm); switch (s_ot) { case MO_UB: - tcg_gen_ext8u_tl(cpu_T0, cpu_T0); + tcg_gen_ext8u_tl(s->T0, s->T0); break; case MO_SB: - tcg_gen_ext8s_tl(cpu_T0, cpu_T0); + tcg_gen_ext8s_tl(s->T0, s->T0); break; case MO_UW: - tcg_gen_ext16u_tl(cpu_T0, cpu_T0); + tcg_gen_ext16u_tl(s->T0, s->T0); break; default: case MO_SW: - tcg_gen_ext16s_tl(cpu_T0, cpu_T0); + tcg_gen_ext16s_tl(s->T0, s->T0); break; } } - gen_op_mov_reg_v(d_ot, reg, cpu_T0); + gen_op_mov_reg_v(s, d_ot, reg, s->T0); } else { gen_lea_modrm(env, s, modrm); - gen_op_ld_v(s, s_ot, cpu_T0, cpu_A0); - gen_op_mov_reg_v(d_ot, reg, cpu_T0); + gen_op_ld_v(s, s_ot, s->T0, s->A0); + gen_op_mov_reg_v(s, d_ot, reg, s->T0); } } break; @@ -5550,9 +5578,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) reg = ((modrm >> 3) & 7) | rex_r; { AddressParts a = gen_lea_modrm_0(env, s, modrm); - TCGv ea = gen_lea_modrm_1(a); + TCGv ea = gen_lea_modrm_1(s, a); gen_lea_v_seg(s, s->aflag, ea, -1, -1); - gen_op_mov_reg_v(dflag, reg, cpu_A0); + gen_op_mov_reg_v(s, dflag, reg, s->A0); } break; @@ -5574,30 +5602,30 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) offset_addr = insn_get(env, s, s->aflag); break; } - tcg_gen_movi_tl(cpu_A0, offset_addr); + tcg_gen_movi_tl(s->A0, offset_addr); gen_add_A0_ds_seg(s); if ((b & 2) == 0) { - gen_op_ld_v(s, ot, cpu_T0, cpu_A0); - gen_op_mov_reg_v(ot, R_EAX, cpu_T0); + gen_op_ld_v(s, ot, s->T0, s->A0); + gen_op_mov_reg_v(s, ot, R_EAX, s->T0); } else { - gen_op_mov_v_reg(ot, cpu_T0, R_EAX); - gen_op_st_v(s, ot, cpu_T0, cpu_A0); + gen_op_mov_v_reg(s, ot, s->T0, R_EAX); + gen_op_st_v(s, ot, s->T0, s->A0); } } break; case 0xd7: /* xlat */ - tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EBX]); - tcg_gen_ext8u_tl(cpu_T0, cpu_regs[R_EAX]); - tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T0); - gen_extu(s->aflag, cpu_A0); + tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]); + tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]); + tcg_gen_add_tl(s->A0, s->A0, s->T0); + gen_extu(s->aflag, s->A0); gen_add_A0_ds_seg(s); - gen_op_ld_v(s, MO_8, cpu_T0, cpu_A0); - gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0); + gen_op_ld_v(s, MO_8, s->T0, s->A0); + gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0); break; case 0xb0 ... 0xb7: /* mov R, Ib */ val = insn_get(env, s, MO_8); - tcg_gen_movi_tl(cpu_T0, val); - gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), cpu_T0); + tcg_gen_movi_tl(s->T0, val); + gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0); break; case 0xb8 ... 0xbf: /* mov R, Iv */ #ifdef TARGET_X86_64 @@ -5606,16 +5634,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) /* 64 bit case */ tmp = x86_ldq_code(env, s); reg = (b & 7) | REX_B(s); - tcg_gen_movi_tl(cpu_T0, tmp); - gen_op_mov_reg_v(MO_64, reg, cpu_T0); + tcg_gen_movi_tl(s->T0, tmp); + gen_op_mov_reg_v(s, MO_64, reg, s->T0); } else #endif { ot = dflag; val = insn_get(env, s, ot); reg = (b & 7) | REX_B(s); - tcg_gen_movi_tl(cpu_T0, val); - gen_op_mov_reg_v(ot, reg, cpu_T0); + tcg_gen_movi_tl(s->T0, val); + gen_op_mov_reg_v(s, ot, reg, s->T0); } break; @@ -5634,17 +5662,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (mod == 3) { rm = (modrm & 7) | REX_B(s); do_xchg_reg: - gen_op_mov_v_reg(ot, cpu_T0, reg); - gen_op_mov_v_reg(ot, cpu_T1, rm); - gen_op_mov_reg_v(ot, rm, cpu_T0); - gen_op_mov_reg_v(ot, reg, cpu_T1); + gen_op_mov_v_reg(s, ot, s->T0, reg); + gen_op_mov_v_reg(s, ot, s->T1, rm); + gen_op_mov_reg_v(s, ot, rm, s->T0); + gen_op_mov_reg_v(s, ot, reg, s->T1); } else { gen_lea_modrm(env, s, modrm); - gen_op_mov_v_reg(ot, cpu_T0, reg); + gen_op_mov_v_reg(s, ot, s->T0, reg); /* for xchg, lock is implicit */ - tcg_gen_atomic_xchg_tl(cpu_T1, cpu_A0, cpu_T0, + tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0, s->mem_index, ot | MO_LE); - gen_op_mov_reg_v(ot, reg, cpu_T1); + gen_op_mov_reg_v(s, ot, reg, s->T1); } break; case 0xc4: /* les Gv */ @@ -5671,15 +5699,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (mod == 3) goto illegal_op; gen_lea_modrm(env, s, modrm); - gen_op_ld_v(s, ot, cpu_T1, cpu_A0); + gen_op_ld_v(s, ot, s->T1, s->A0); gen_add_A0_im(s, 1 << ot); /* load the segment first to handle exceptions properly */ - gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0); + gen_op_ld_v(s, MO_16, s->T0, s->A0); gen_movl_seg_T0(s, op); /* then put the data */ - gen_op_mov_reg_v(ot, reg, cpu_T1); + gen_op_mov_reg_v(s, ot, reg, s->T1); if (s->base.is_jmp) { - gen_jmp_im(s->pc - s->cs_base); + gen_jmp_im(s, s->pc - s->cs_base); gen_eob(s); } break; @@ -5756,7 +5784,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } else { opreg = rm; } - gen_op_mov_v_reg(ot, cpu_T1, reg); + gen_op_mov_v_reg(s, ot, s->T1, reg); if (shift) { TCGv imm = tcg_const_tl(x86_ldub_code(env, s)); @@ -5794,25 +5822,25 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) switch(op >> 4) { case 0: - tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); - gen_helper_flds_FT0(cpu_env, cpu_tmp2_i32); + gen_helper_flds_FT0(cpu_env, s->tmp2_i32); break; case 1: - tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); - gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32); + gen_helper_fildl_FT0(cpu_env, s->tmp2_i32); break; case 2: - tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ); - gen_helper_fldl_FT0(cpu_env, cpu_tmp1_i64); + gen_helper_fldl_FT0(cpu_env, s->tmp1_i64); break; case 3: default: - tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LESW); - gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32); + gen_helper_fildl_FT0(cpu_env, s->tmp2_i32); break; } @@ -5833,25 +5861,25 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0: switch(op >> 4) { case 0: - tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); - gen_helper_flds_ST0(cpu_env, cpu_tmp2_i32); + gen_helper_flds_ST0(cpu_env, s->tmp2_i32); break; case 1: - tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); - gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32); + gen_helper_fildl_ST0(cpu_env, s->tmp2_i32); break; case 2: - tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ); - gen_helper_fldl_ST0(cpu_env, cpu_tmp1_i64); + gen_helper_fldl_ST0(cpu_env, s->tmp1_i64); break; case 3: default: - tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LESW); - gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32); + gen_helper_fildl_ST0(cpu_env, s->tmp2_i32); break; } break; @@ -5859,19 +5887,19 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) /* XXX: the corresponding CPUID bit must be tested ! */ switch(op >> 4) { case 1: - gen_helper_fisttl_ST0(cpu_tmp2_i32, cpu_env); - tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0, + gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); break; case 2: - gen_helper_fisttll_ST0(cpu_tmp1_i64, cpu_env); - tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, + gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ); break; case 3: default: - gen_helper_fistt_ST0(cpu_tmp2_i32, cpu_env); - tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0, + gen_helper_fistt_ST0(s->tmp2_i32, cpu_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUW); break; } @@ -5880,24 +5908,24 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) default: switch(op >> 4) { case 0: - gen_helper_fsts_ST0(cpu_tmp2_i32, cpu_env); - tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0, + gen_helper_fsts_ST0(s->tmp2_i32, cpu_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); break; case 1: - gen_helper_fistl_ST0(cpu_tmp2_i32, cpu_env); - tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0, + gen_helper_fistl_ST0(s->tmp2_i32, cpu_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); break; case 2: - gen_helper_fstl_ST0(cpu_tmp1_i64, cpu_env); - tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, + gen_helper_fstl_ST0(s->tmp1_i64, cpu_env); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ); break; case 3: default: - gen_helper_fist_ST0(cpu_tmp2_i32, cpu_env); - tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0, + gen_helper_fist_ST0(s->tmp2_i32, cpu_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUW); break; } @@ -5907,53 +5935,53 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } break; case 0x0c: /* fldenv mem */ - gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1)); + gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1)); break; case 0x0d: /* fldcw mem */ - tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUW); - gen_helper_fldcw(cpu_env, cpu_tmp2_i32); + gen_helper_fldcw(cpu_env, s->tmp2_i32); break; case 0x0e: /* fnstenv mem */ - gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1)); + gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1)); break; case 0x0f: /* fnstcw mem */ - gen_helper_fnstcw(cpu_tmp2_i32, cpu_env); - tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0, + gen_helper_fnstcw(s->tmp2_i32, cpu_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUW); break; case 0x1d: /* fldt mem */ - gen_helper_fldt_ST0(cpu_env, cpu_A0); + gen_helper_fldt_ST0(cpu_env, s->A0); break; case 0x1f: /* fstpt mem */ - gen_helper_fstt_ST0(cpu_env, cpu_A0); + gen_helper_fstt_ST0(cpu_env, s->A0); gen_helper_fpop(cpu_env); break; case 0x2c: /* frstor mem */ - gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1)); + gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1)); break; case 0x2e: /* fnsave mem */ - gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1)); + gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1)); break; case 0x2f: /* fnstsw mem */ - gen_helper_fnstsw(cpu_tmp2_i32, cpu_env); - tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0, + gen_helper_fnstsw(s->tmp2_i32, cpu_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUW); break; case 0x3c: /* fbld */ - gen_helper_fbld_ST0(cpu_env, cpu_A0); + gen_helper_fbld_ST0(cpu_env, s->A0); break; case 0x3e: /* fbstp */ - gen_helper_fbst_ST0(cpu_env, cpu_A0); + gen_helper_fbst_ST0(cpu_env, s->A0); gen_helper_fpop(cpu_env); break; case 0x3d: /* fildll */ - tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ); - gen_helper_fildll_ST0(cpu_env, cpu_tmp1_i64); + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ); + gen_helper_fildll_ST0(cpu_env, s->tmp1_i64); break; case 0x3f: /* fistpll */ - gen_helper_fistll_ST0(cpu_tmp1_i64, cpu_env); - tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ); + gen_helper_fistll_ST0(s->tmp1_i64, cpu_env); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ); gen_helper_fpop(cpu_env); break; default: @@ -6215,9 +6243,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x3c: /* df/4 */ switch(rm) { case 0: - gen_helper_fnstsw(cpu_tmp2_i32, cpu_env); - tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32); - gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0); + gen_helper_fnstsw(s->tmp2_i32, cpu_env); + tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); + gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); break; default: goto unknown_op; @@ -6327,7 +6355,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x6c: /* insS */ case 0x6d: ot = mo_b_d32(b, dflag); - tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]); + tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]); gen_check_io(s, ot, pc_start - s->cs_base, SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4); if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { @@ -6342,7 +6370,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x6e: /* outsS */ case 0x6f: ot = mo_b_d32(b, dflag); - tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]); + tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]); gen_check_io(s, ot, pc_start - s->cs_base, svm_is_rep(prefixes) | 4); if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { @@ -6362,16 +6390,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0xe5: ot = mo_b_d32(b, dflag); val = x86_ldub_code(env, s); - tcg_gen_movi_tl(cpu_T0, val); + tcg_gen_movi_tl(s->T0, val); gen_check_io(s, ot, pc_start - s->cs_base, SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes)); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_start(); } - tcg_gen_movi_i32(cpu_tmp2_i32, val); - gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32); - gen_op_mov_reg_v(ot, R_EAX, cpu_T1); - gen_bpt_io(s, cpu_tmp2_i32, ot); + tcg_gen_movi_i32(s->tmp2_i32, val); + gen_helper_in_func(ot, s->T1, s->tmp2_i32); + gen_op_mov_reg_v(s, ot, R_EAX, s->T1); + gen_bpt_io(s, s->tmp2_i32, ot); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_end(); gen_jmp(s, s->pc - s->cs_base); @@ -6381,18 +6409,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0xe7: ot = mo_b_d32(b, dflag); val = x86_ldub_code(env, s); - tcg_gen_movi_tl(cpu_T0, val); + tcg_gen_movi_tl(s->T0, val); gen_check_io(s, ot, pc_start - s->cs_base, svm_is_rep(prefixes)); - gen_op_mov_v_reg(ot, cpu_T1, R_EAX); + gen_op_mov_v_reg(s, ot, s->T1, R_EAX); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_start(); } - tcg_gen_movi_i32(cpu_tmp2_i32, val); - tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1); - gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32); - gen_bpt_io(s, cpu_tmp2_i32, ot); + tcg_gen_movi_i32(s->tmp2_i32, val); + tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); + gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32); + gen_bpt_io(s, s->tmp2_i32, ot); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_end(); gen_jmp(s, s->pc - s->cs_base); @@ -6401,16 +6429,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0xec: case 0xed: ot = mo_b_d32(b, dflag); - tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]); + tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]); gen_check_io(s, ot, pc_start - s->cs_base, SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes)); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_start(); } - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); - gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32); - gen_op_mov_reg_v(ot, R_EAX, cpu_T1); - gen_bpt_io(s, cpu_tmp2_i32, ot); + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + gen_helper_in_func(ot, s->T1, s->tmp2_i32); + gen_op_mov_reg_v(s, ot, R_EAX, s->T1); + gen_bpt_io(s, s->tmp2_i32, ot); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_end(); gen_jmp(s, s->pc - s->cs_base); @@ -6419,18 +6447,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0xee: case 0xef: ot = mo_b_d32(b, dflag); - tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]); + tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]); gen_check_io(s, ot, pc_start - s->cs_base, svm_is_rep(prefixes)); - gen_op_mov_v_reg(ot, cpu_T1, R_EAX); + gen_op_mov_v_reg(s, ot, s->T1, R_EAX); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_start(); } - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); - tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1); - gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32); - gen_bpt_io(s, cpu_tmp2_i32, ot); + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); + gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32); + gen_bpt_io(s, s->tmp2_i32, ot); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_end(); gen_jmp(s, s->pc - s->cs_base); @@ -6444,37 +6472,37 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) ot = gen_pop_T0(s); gen_stack_update(s, val + (1 << ot)); /* Note that gen_pop_T0 uses a zero-extending load. */ - gen_op_jmp_v(cpu_T0); + gen_op_jmp_v(s->T0); gen_bnd_jmp(s); - gen_jr(s, cpu_T0); + gen_jr(s, s->T0); break; case 0xc3: /* ret */ ot = gen_pop_T0(s); gen_pop_update(s, ot); /* Note that gen_pop_T0 uses a zero-extending load. */ - gen_op_jmp_v(cpu_T0); + gen_op_jmp_v(s->T0); gen_bnd_jmp(s); - gen_jr(s, cpu_T0); + gen_jr(s, s->T0); break; case 0xca: /* lret im */ val = x86_ldsw_code(env, s); do_lret: if (s->pe && !s->vm86) { gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1), tcg_const_i32(val)); } else { gen_stack_A0(s); /* pop offset */ - gen_op_ld_v(s, dflag, cpu_T0, cpu_A0); + gen_op_ld_v(s, dflag, s->T0, s->A0); /* NOTE: keeping EIP updated is not a problem in case of exception */ - gen_op_jmp_v(cpu_T0); + gen_op_jmp_v(s->T0); /* pop selector */ gen_add_A0_im(s, 1 << dflag); - gen_op_ld_v(s, dflag, cpu_T0, cpu_A0); - gen_op_movl_seg_T0_vm(R_CS); + gen_op_ld_v(s, dflag, s->T0, s->A0); + gen_op_movl_seg_T0_vm(s, R_CS); /* add stack offset */ gen_stack_update(s, val + (2 << dflag)); } @@ -6517,8 +6545,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } else if (!CODE64(s)) { tval &= 0xffffffff; } - tcg_gen_movi_tl(cpu_T0, next_eip); - gen_push_v(s, cpu_T0); + tcg_gen_movi_tl(s->T0, next_eip); + gen_push_v(s, s->T0); gen_bnd_jmp(s); gen_jmp(s, tval); } @@ -6533,8 +6561,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) offset = insn_get(env, s, ot); selector = insn_get(env, s, MO_16); - tcg_gen_movi_tl(cpu_T0, selector); - tcg_gen_movi_tl(cpu_T1, offset); + tcg_gen_movi_tl(s->T0, selector); + tcg_gen_movi_tl(s->T1, offset); } goto do_lcall; case 0xe9: /* jmp im */ @@ -6562,8 +6590,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) offset = insn_get(env, s, ot); selector = insn_get(env, s, MO_16); - tcg_gen_movi_tl(cpu_T0, selector); - tcg_gen_movi_tl(cpu_T1, offset); + tcg_gen_movi_tl(s->T0, selector); + tcg_gen_movi_tl(s->T1, offset); } goto do_ljmp; case 0xeb: /* jmp Jb */ @@ -6595,7 +6623,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x190 ... 0x19f: /* setcc Gv */ modrm = x86_ldub_code(env, s); - gen_setcc1(s, b, cpu_T0); + gen_setcc1(s, b, s->T0); gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1); break; case 0x140 ... 0x14f: /* cmov Gv, Ev */ @@ -6616,8 +6644,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); } else { gen_update_cc_op(s); - gen_helper_read_eflags(cpu_T0, cpu_env); - gen_push_v(s, cpu_T0); + gen_helper_read_eflags(s->T0, cpu_env); + gen_push_v(s, s->T0); } break; case 0x9d: /* popf */ @@ -6628,13 +6656,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) ot = gen_pop_T0(s); if (s->cpl == 0) { if (dflag != MO_16) { - gen_helper_write_eflags(cpu_env, cpu_T0, + gen_helper_write_eflags(cpu_env, s->T0, tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK | IOPL_MASK))); } else { - gen_helper_write_eflags(cpu_env, cpu_T0, + gen_helper_write_eflags(cpu_env, s->T0, tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK | IOPL_MASK) @@ -6643,14 +6671,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } else { if (s->cpl <= s->iopl) { if (dflag != MO_16) { - gen_helper_write_eflags(cpu_env, cpu_T0, + gen_helper_write_eflags(cpu_env, s->T0, tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK))); } else { - gen_helper_write_eflags(cpu_env, cpu_T0, + gen_helper_write_eflags(cpu_env, s->T0, tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | @@ -6660,11 +6688,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } } else { if (dflag != MO_16) { - gen_helper_write_eflags(cpu_env, cpu_T0, + gen_helper_write_eflags(cpu_env, s->T0, tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK))); } else { - gen_helper_write_eflags(cpu_env, cpu_T0, + gen_helper_write_eflags(cpu_env, s->T0, tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK) & 0xffff)); @@ -6674,26 +6702,26 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_pop_update(s, ot); set_cc_op(s, CC_OP_EFLAGS); /* abort translation because TF/AC flag may change */ - gen_jmp_im(s->pc - s->cs_base); + gen_jmp_im(s, s->pc - s->cs_base); gen_eob(s); } break; case 0x9e: /* sahf */ if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) goto illegal_op; - gen_op_mov_v_reg(MO_8, cpu_T0, R_AH); + gen_op_mov_v_reg(s, MO_8, s->T0, R_AH); gen_compute_eflags(s); tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O); - tcg_gen_andi_tl(cpu_T0, cpu_T0, CC_S | CC_Z | CC_A | CC_P | CC_C); - tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T0); + tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C); + tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0); break; case 0x9f: /* lahf */ if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) goto illegal_op; gen_compute_eflags(s); /* Note: gen_compute_eflags() only gives the condition codes */ - tcg_gen_ori_tl(cpu_T0, cpu_cc_src, 0x02); - gen_op_mov_reg_v(MO_8, R_AH, cpu_T0); + tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02); + gen_op_mov_reg_v(s, MO_8, R_AH, s->T0); break; case 0xf5: /* cmc */ gen_compute_eflags(s); @@ -6708,12 +6736,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C); break; case 0xfc: /* cld */ - tcg_gen_movi_i32(cpu_tmp2_i32, 1); - tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df)); + tcg_gen_movi_i32(s->tmp2_i32, 1); + tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df)); break; case 0xfd: /* std */ - tcg_gen_movi_i32(cpu_tmp2_i32, -1); - tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df)); + tcg_gen_movi_i32(s->tmp2_i32, -1); + tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df)); break; /************************/ @@ -6728,14 +6756,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) s->rip_offset = 1; gen_lea_modrm(env, s, modrm); if (!(s->prefix & PREFIX_LOCK)) { - gen_op_ld_v(s, ot, cpu_T0, cpu_A0); + gen_op_ld_v(s, ot, s->T0, s->A0); } } else { - gen_op_mov_v_reg(ot, cpu_T0, rm); + gen_op_mov_v_reg(s, ot, s->T0, rm); } /* load shift */ val = x86_ldub_code(env, s); - tcg_gen_movi_tl(cpu_T1, val); + tcg_gen_movi_tl(s->T1, val); if (op < 4) goto unknown_op; op -= 4; @@ -6757,70 +6785,70 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) reg = ((modrm >> 3) & 7) | rex_r; mod = (modrm >> 6) & 3; rm = (modrm & 7) | REX_B(s); - gen_op_mov_v_reg(MO_32, cpu_T1, reg); + gen_op_mov_v_reg(s, MO_32, s->T1, reg); if (mod != 3) { AddressParts a = gen_lea_modrm_0(env, s, modrm); /* specific case: we need to add a displacement */ - gen_exts(ot, cpu_T1); - tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot); - tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot); - tcg_gen_add_tl(cpu_A0, gen_lea_modrm_1(a), cpu_tmp0); - gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override); + gen_exts(ot, s->T1); + tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot); + tcg_gen_shli_tl(s->tmp0, s->tmp0, ot); + tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0); + gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override); if (!(s->prefix & PREFIX_LOCK)) { - gen_op_ld_v(s, ot, cpu_T0, cpu_A0); + gen_op_ld_v(s, ot, s->T0, s->A0); } } else { - gen_op_mov_v_reg(ot, cpu_T0, rm); + gen_op_mov_v_reg(s, ot, s->T0, rm); } bt_op: - tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1); - tcg_gen_movi_tl(cpu_tmp0, 1); - tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1); + tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1); + tcg_gen_movi_tl(s->tmp0, 1); + tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1); if (s->prefix & PREFIX_LOCK) { switch (op) { case 0: /* bt */ /* Needs no atomic ops; we surpressed the normal memory load for LOCK above so do it now. */ - gen_op_ld_v(s, ot, cpu_T0, cpu_A0); + gen_op_ld_v(s, ot, s->T0, s->A0); break; case 1: /* bts */ - tcg_gen_atomic_fetch_or_tl(cpu_T0, cpu_A0, cpu_tmp0, + tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0, s->mem_index, ot | MO_LE); break; case 2: /* btr */ - tcg_gen_not_tl(cpu_tmp0, cpu_tmp0); - tcg_gen_atomic_fetch_and_tl(cpu_T0, cpu_A0, cpu_tmp0, + tcg_gen_not_tl(s->tmp0, s->tmp0); + tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0, s->mem_index, ot | MO_LE); break; default: case 3: /* btc */ - tcg_gen_atomic_fetch_xor_tl(cpu_T0, cpu_A0, cpu_tmp0, + tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0, s->mem_index, ot | MO_LE); break; } - tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1); + tcg_gen_shr_tl(s->tmp4, s->T0, s->T1); } else { - tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1); + tcg_gen_shr_tl(s->tmp4, s->T0, s->T1); switch (op) { case 0: /* bt */ /* Data already loaded; nothing to do. */ break; case 1: /* bts */ - tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0); + tcg_gen_or_tl(s->T0, s->T0, s->tmp0); break; case 2: /* btr */ - tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0); + tcg_gen_andc_tl(s->T0, s->T0, s->tmp0); break; default: case 3: /* btc */ - tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0); + tcg_gen_xor_tl(s->T0, s->T0, s->tmp0); break; } if (op != 0) { if (mod != 3) { - gen_op_st_v(s, ot, cpu_T0, cpu_A0); + gen_op_st_v(s, ot, s->T0, s->A0); } else { - gen_op_mov_reg_v(ot, rm, cpu_T0); + gen_op_mov_reg_v(s, ot, rm, s->T0); } } } @@ -6844,13 +6872,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) We can get that same Z value (and the new C value) by leaving CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the same width. */ - tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4); + tcg_gen_mov_tl(cpu_cc_src, s->tmp4); set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB); break; default: /* Otherwise, generate EFLAGS and replace the C bit. */ gen_compute_eflags(s); - tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4, + tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4, ctz32(CC_C), 1); break; } @@ -6861,7 +6889,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) modrm = x86_ldub_code(env, s); reg = ((modrm >> 3) & 7) | rex_r; gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - gen_extu(ot, cpu_T0); + gen_extu(ot, s->T0); /* Note that lzcnt and tzcnt are in different extensions. */ if ((prefixes & PREFIX_REPZ) @@ -6870,23 +6898,23 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) { int size = 8 << ot; /* For lzcnt/tzcnt, C bit is defined related to the input. */ - tcg_gen_mov_tl(cpu_cc_src, cpu_T0); + tcg_gen_mov_tl(cpu_cc_src, s->T0); if (b & 1) { /* For lzcnt, reduce the target_ulong result by the number of zeros that we expect to find at the top. */ - tcg_gen_clzi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS); - tcg_gen_subi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - size); + tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS); + tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size); } else { /* For tzcnt, a zero input must return the operand size. */ - tcg_gen_ctzi_tl(cpu_T0, cpu_T0, size); + tcg_gen_ctzi_tl(s->T0, s->T0, size); } /* For lzcnt/tzcnt, Z bit is defined related to the result. */ - gen_op_update1_cc(); + gen_op_update1_cc(s); set_cc_op(s, CC_OP_BMILGB + ot); } else { /* For bsr/bsf, only the Z bit is defined and it is related to the input and not the result. */ - tcg_gen_mov_tl(cpu_cc_dst, cpu_T0); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); set_cc_op(s, CC_OP_LOGICB + ot); /* ??? The manual says that the output is undefined when the @@ -6896,14 +6924,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (b & 1) { /* For bsr, return the bit index of the first 1 bit, not the count of leading zeros. */ - tcg_gen_xori_tl(cpu_T1, cpu_regs[reg], TARGET_LONG_BITS - 1); - tcg_gen_clz_tl(cpu_T0, cpu_T0, cpu_T1); - tcg_gen_xori_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - 1); + tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1); + tcg_gen_clz_tl(s->T0, s->T0, s->T1); + tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1); } else { - tcg_gen_ctz_tl(cpu_T0, cpu_T0, cpu_regs[reg]); + tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]); } } - gen_op_mov_reg_v(ot, reg, cpu_T0); + gen_op_mov_reg_v(s, ot, reg, s->T0); break; /************************/ /* bcd */ @@ -6966,7 +6994,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } if (prefixes & PREFIX_REPZ) { gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start)); s->base.is_jmp = DISAS_NORETURN; } @@ -6994,7 +7022,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (CODE64(s)) goto illegal_op; gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start)); break; #ifdef WANT_ICEBP @@ -7028,7 +7056,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) { gen_helper_sti(cpu_env); /* interruptions are enabled only the first insn after sti */ - gen_jmp_im(s->pc - s->cs_base); + gen_jmp_im(s, s->pc - s->cs_base); gen_eob_inhibit_irq(s, true); } else { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); @@ -7043,37 +7071,37 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) mod = (modrm >> 6) & 3; if (mod == 3) goto illegal_op; - gen_op_mov_v_reg(ot, cpu_T0, reg); + gen_op_mov_v_reg(s, ot, s->T0, reg); gen_lea_modrm(env, s, modrm); - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); if (ot == MO_16) { - gen_helper_boundw(cpu_env, cpu_A0, cpu_tmp2_i32); + gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32); } else { - gen_helper_boundl(cpu_env, cpu_A0, cpu_tmp2_i32); + gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32); } break; case 0x1c8 ... 0x1cf: /* bswap reg */ reg = (b & 7) | REX_B(s); #ifdef TARGET_X86_64 if (dflag == MO_64) { - gen_op_mov_v_reg(MO_64, cpu_T0, reg); - tcg_gen_bswap64_i64(cpu_T0, cpu_T0); - gen_op_mov_reg_v(MO_64, reg, cpu_T0); + gen_op_mov_v_reg(s, MO_64, s->T0, reg); + tcg_gen_bswap64_i64(s->T0, s->T0); + gen_op_mov_reg_v(s, MO_64, reg, s->T0); } else #endif { - gen_op_mov_v_reg(MO_32, cpu_T0, reg); - tcg_gen_ext32u_tl(cpu_T0, cpu_T0); - tcg_gen_bswap32_tl(cpu_T0, cpu_T0); - gen_op_mov_reg_v(MO_32, reg, cpu_T0); + gen_op_mov_v_reg(s, MO_32, s->T0, reg); + tcg_gen_ext32u_tl(s->T0, s->T0); + tcg_gen_bswap32_tl(s->T0, s->T0); + gen_op_mov_reg_v(s, MO_32, reg, s->T0); } break; case 0xd6: /* salc */ if (CODE64(s)) goto illegal_op; - gen_compute_eflags_c(s, cpu_T0); - tcg_gen_neg_tl(cpu_T0, cpu_T0); - gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0); + gen_compute_eflags_c(s, s->T0); + tcg_gen_neg_tl(s->T0, s->T0); + gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0); break; case 0xe0: /* loopnz */ case 0xe1: /* loopz */ @@ -7096,26 +7124,26 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) switch(b) { case 0: /* loopnz */ case 1: /* loopz */ - gen_op_add_reg_im(s->aflag, R_ECX, -1); - gen_op_jz_ecx(s->aflag, l3); + gen_op_add_reg_im(s, s->aflag, R_ECX, -1); + gen_op_jz_ecx(s, s->aflag, l3); gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1); break; case 2: /* loop */ - gen_op_add_reg_im(s->aflag, R_ECX, -1); - gen_op_jnz_ecx(s->aflag, l1); + gen_op_add_reg_im(s, s->aflag, R_ECX, -1); + gen_op_jnz_ecx(s, s->aflag, l1); break; default: case 3: /* jcxz */ - gen_op_jz_ecx(s->aflag, l1); + gen_op_jz_ecx(s, s->aflag, l1); break; } gen_set_label(l3); - gen_jmp_im(next_eip); + gen_jmp_im(s, next_eip); tcg_gen_br(l2); gen_set_label(l1); - gen_jmp_im(tval); + gen_jmp_im(s, tval); gen_set_label(l2); gen_eob(s); } @@ -7126,7 +7154,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); } else { gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); if (b & 2) { gen_helper_rdmsr(cpu_env); } else { @@ -7136,7 +7164,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; case 0x131: /* rdtsc */ gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_start(); } @@ -7148,7 +7176,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; case 0x133: /* rdpmc */ gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); gen_helper_rdpmc(cpu_env); break; case 0x134: /* sysenter */ @@ -7177,7 +7205,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x105: /* syscall */ /* XXX: is it usable in real mode ? */ gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start)); /* TF handling for the syscall insn is different. The TF bit is checked after the syscall insn completes. This allows #DB to not be @@ -7203,7 +7231,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) #endif case 0x1a2: /* cpuid */ gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); gen_helper_cpuid(cpu_env); break; case 0xf4: /* hlt */ @@ -7211,7 +7239,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); } else { gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start)); s->base.is_jmp = DISAS_NORETURN; } @@ -7225,7 +7253,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (!s->pe || s->vm86) goto illegal_op; gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ); - tcg_gen_ld32u_tl(cpu_T0, cpu_env, + tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, ldt.selector)); ot = mod == 3 ? dflag : MO_16; gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1); @@ -7238,15 +7266,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } else { gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE); gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); - gen_helper_lldt(cpu_env, cpu_tmp2_i32); + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + gen_helper_lldt(cpu_env, s->tmp2_i32); } break; case 1: /* str */ if (!s->pe || s->vm86) goto illegal_op; gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ); - tcg_gen_ld32u_tl(cpu_T0, cpu_env, + tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, tr.selector)); ot = mod == 3 ? dflag : MO_16; gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1); @@ -7259,8 +7287,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } else { gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE); gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0); - gen_helper_ltr(cpu_env, cpu_tmp2_i32); + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + gen_helper_ltr(cpu_env, s->tmp2_i32); } break; case 4: /* verr */ @@ -7270,9 +7298,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); gen_update_cc_op(s); if (op == 4) { - gen_helper_verr(cpu_env, cpu_T0); + gen_helper_verr(cpu_env, s->T0); } else { - gen_helper_verw(cpu_env, cpu_T0); + gen_helper_verw(cpu_env, s->T0); } set_cc_op(s, CC_OP_EFLAGS); break; @@ -7287,15 +7315,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) CASE_MODRM_MEM_OP(0): /* sgdt */ gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ); gen_lea_modrm(env, s, modrm); - tcg_gen_ld32u_tl(cpu_T0, + tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.limit)); - gen_op_st_v(s, MO_16, cpu_T0, cpu_A0); + gen_op_st_v(s, MO_16, s->T0, s->A0); gen_add_A0_im(s, 2); - tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base)); + tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base)); if (dflag == MO_16) { - tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff); + tcg_gen_andi_tl(s->T0, s->T0, 0xffffff); } - gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0); + gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0); break; case 0xc8: /* monitor */ @@ -7303,11 +7331,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); - tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EAX]); - gen_extu(s->aflag, cpu_A0); + gen_jmp_im(s, pc_start - s->cs_base); + tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]); + gen_extu(s->aflag, s->A0); gen_add_A0_ds_seg(s); - gen_helper_monitor(cpu_env, cpu_A0); + gen_helper_monitor(cpu_env, s->A0); break; case 0xc9: /* mwait */ @@ -7315,7 +7343,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start)); gen_eob(s); break; @@ -7326,7 +7354,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_helper_clac(cpu_env); - gen_jmp_im(s->pc - s->cs_base); + gen_jmp_im(s, s->pc - s->cs_base); gen_eob(s); break; @@ -7336,21 +7364,21 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_helper_stac(cpu_env); - gen_jmp_im(s->pc - s->cs_base); + gen_jmp_im(s, s->pc - s->cs_base); gen_eob(s); break; CASE_MODRM_MEM_OP(1): /* sidt */ gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ); gen_lea_modrm(env, s, modrm); - tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit)); - gen_op_st_v(s, MO_16, cpu_T0, cpu_A0); + tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit)); + gen_op_st_v(s, MO_16, s->T0, s->A0); gen_add_A0_im(s, 2); - tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base)); + tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base)); if (dflag == MO_16) { - tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff); + tcg_gen_andi_tl(s->T0, s->T0, 0xffffff); } - gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0); + gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0); break; case 0xd0: /* xgetbv */ @@ -7359,9 +7387,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) | PREFIX_REPZ | PREFIX_REPNZ))) { goto illegal_op; } - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]); - gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32); - tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64); + tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]); + gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32); + tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64); break; case 0xd1: /* xsetbv */ @@ -7374,12 +7402,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); break; } - tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX], + tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX], cpu_regs[R_EDX]); - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]); - gen_helper_xsetbv(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64); + tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]); + gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64); /* End TB because translation flags may change. */ - gen_jmp_im(s->pc - s->cs_base); + gen_jmp_im(s, s->pc - s->cs_base); gen_eob(s); break; @@ -7392,7 +7420,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; } gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1), tcg_const_i32(s->pc - pc_start)); tcg_gen_exit_tb(NULL, 0); @@ -7404,7 +7432,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); gen_helper_vmmcall(cpu_env); break; @@ -7417,7 +7445,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; } gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1)); break; @@ -7430,7 +7458,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; } gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1)); break; @@ -7446,7 +7474,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } gen_update_cc_op(s); gen_helper_stgi(cpu_env); - gen_jmp_im(s->pc - s->cs_base); + gen_jmp_im(s, s->pc - s->cs_base); gen_eob(s); break; @@ -7459,7 +7487,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; } gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); gen_helper_clgi(cpu_env); break; @@ -7470,7 +7498,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); gen_helper_skinit(cpu_env); break; @@ -7483,7 +7511,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; } gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1)); break; @@ -7494,14 +7522,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE); gen_lea_modrm(env, s, modrm); - gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0); + gen_op_ld_v(s, MO_16, s->T1, s->A0); gen_add_A0_im(s, 2); - gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0); + gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0); if (dflag == MO_16) { - tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff); + tcg_gen_andi_tl(s->T0, s->T0, 0xffffff); } - tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base)); - tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit)); + tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base)); + tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit)); break; CASE_MODRM_MEM_OP(3): /* lidt */ @@ -7511,19 +7539,19 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE); gen_lea_modrm(env, s, modrm); - gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0); + gen_op_ld_v(s, MO_16, s->T1, s->A0); gen_add_A0_im(s, 2); - gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0); + gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0); if (dflag == MO_16) { - tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff); + tcg_gen_andi_tl(s->T0, s->T0, 0xffffff); } - tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base)); - tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit)); + tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base)); + tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit)); break; CASE_MODRM_OP(4): /* smsw */ gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0); - tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0])); + tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0])); if (CODE64(s)) { mod = (modrm >> 6) & 3; ot = (mod != 3 ? MO_16 : s->dflag); @@ -7536,18 +7564,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (prefixes & PREFIX_LOCK) { goto illegal_op; } - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]); - gen_helper_rdpkru(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32); - tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64); + tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]); + gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32); + tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64); break; case 0xef: /* wrpkru */ if (prefixes & PREFIX_LOCK) { goto illegal_op; } - tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX], + tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX], cpu_regs[R_EDX]); - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]); - gen_helper_wrpkru(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64); + tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]); + gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64); break; CASE_MODRM_OP(6): /* lmsw */ if (s->cpl != 0) { @@ -7556,8 +7584,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0); gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); - gen_helper_lmsw(cpu_env, cpu_T0); - gen_jmp_im(s->pc - s->cs_base); + gen_helper_lmsw(cpu_env, s->T0); + gen_jmp_im(s, s->pc - s->cs_base); gen_eob(s); break; @@ -7567,10 +7595,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; } gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); gen_lea_modrm(env, s, modrm); - gen_helper_invlpg(cpu_env, cpu_A0); - gen_jmp_im(s->pc - s->cs_base); + gen_helper_invlpg(cpu_env, s->A0); + gen_jmp_im(s, s->pc - s->cs_base); gen_eob(s); break; @@ -7580,10 +7608,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (s->cpl != 0) { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); } else { - tcg_gen_mov_tl(cpu_T0, cpu_seg_base[R_GS]); + tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]); tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env, offsetof(CPUX86State, kernelgsbase)); - tcg_gen_st_tl(cpu_T0, cpu_env, + tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, kernelgsbase)); } break; @@ -7596,7 +7624,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_start(); } @@ -7634,16 +7662,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) rm = (modrm & 7) | REX_B(s); if (mod == 3) { - gen_op_mov_v_reg(MO_32, cpu_T0, rm); + gen_op_mov_v_reg(s, MO_32, s->T0, rm); /* sign extend */ if (d_ot == MO_64) { - tcg_gen_ext32s_tl(cpu_T0, cpu_T0); + tcg_gen_ext32s_tl(s->T0, s->T0); } - gen_op_mov_reg_v(d_ot, reg, cpu_T0); + gen_op_mov_reg_v(s, d_ot, reg, s->T0); } else { gen_lea_modrm(env, s, modrm); - gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, cpu_A0); - gen_op_mov_reg_v(d_ot, reg, cpu_T0); + gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0); + gen_op_mov_reg_v(s, d_ot, reg, s->T0); } } else #endif @@ -7663,19 +7691,19 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) rm = modrm & 7; if (mod != 3) { gen_lea_modrm(env, s, modrm); - gen_op_ld_v(s, ot, t0, cpu_A0); + gen_op_ld_v(s, ot, t0, s->A0); a0 = tcg_temp_local_new(); - tcg_gen_mov_tl(a0, cpu_A0); + tcg_gen_mov_tl(a0, s->A0); } else { - gen_op_mov_v_reg(ot, t0, rm); + gen_op_mov_v_reg(s, ot, t0, rm); a0 = NULL; } - gen_op_mov_v_reg(ot, t1, reg); - tcg_gen_andi_tl(cpu_tmp0, t0, 3); + gen_op_mov_v_reg(s, ot, t1, reg); + tcg_gen_andi_tl(s->tmp0, t0, 3); tcg_gen_andi_tl(t1, t1, 3); tcg_gen_movi_tl(t2, 0); label1 = gen_new_label(); - tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1); + tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1); tcg_gen_andi_tl(t0, t0, ~3); tcg_gen_or_tl(t0, t0, t1); tcg_gen_movi_tl(t2, CC_Z); @@ -7684,7 +7712,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_op_st_v(s, ot, t0, a0); tcg_temp_free(a0); } else { - gen_op_mov_reg_v(ot, rm, t0); + gen_op_mov_reg_v(s, ot, rm, t0); } gen_compute_eflags(s); tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z); @@ -7708,14 +7736,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) t0 = tcg_temp_local_new(); gen_update_cc_op(s); if (b == 0x102) { - gen_helper_lar(t0, cpu_env, cpu_T0); + gen_helper_lar(t0, cpu_env, s->T0); } else { - gen_helper_lsl(t0, cpu_env, cpu_T0); + gen_helper_lsl(t0, cpu_env, s->T0); } - tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z); + tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z); label1 = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1); - gen_op_mov_reg_v(ot, reg, t0); + tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1); + gen_op_mov_reg_v(s, ot, reg, t0); gen_set_label(label1); set_cc_op(s, CC_OP_EFLAGS); tcg_temp_free(t0); @@ -7781,16 +7809,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } else { gen_lea_modrm(env, s, modrm); if (CODE64(s)) { - tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0, + tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0, s->mem_index, MO_LEQ); - tcg_gen_addi_tl(cpu_A0, cpu_A0, 8); - tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0, + tcg_gen_addi_tl(s->A0, s->A0, 8); + tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0, s->mem_index, MO_LEQ); } else { - tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0, + tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0, s->mem_index, MO_LEUL); - tcg_gen_addi_tl(cpu_A0, cpu_A0, 4); - tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0, + tcg_gen_addi_tl(s->A0, s->A0, 4); + tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0, s->mem_index, MO_LEUL); } /* bnd registers are now in-use */ @@ -7806,22 +7834,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } if (a.base >= 0) { - tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp); + tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp); } else { - tcg_gen_movi_tl(cpu_A0, 0); + tcg_gen_movi_tl(s->A0, 0); } - gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override); + gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override); if (a.index >= 0) { - tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]); + tcg_gen_mov_tl(s->T0, cpu_regs[a.index]); } else { - tcg_gen_movi_tl(cpu_T0, 0); + tcg_gen_movi_tl(s->T0, 0); } if (CODE64(s)) { - gen_helper_bndldx64(cpu_bndl[reg], cpu_env, cpu_A0, cpu_T0); + gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0); tcg_gen_ld_i64(cpu_bndu[reg], cpu_env, offsetof(CPUX86State, mmx_t0.MMX_Q(0))); } else { - gen_helper_bndldx32(cpu_bndu[reg], cpu_env, cpu_A0, cpu_T0); + gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0); tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]); tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32); } @@ -7855,11 +7883,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) /* rip-relative generates #ud */ goto illegal_op; } - tcg_gen_not_tl(cpu_A0, gen_lea_modrm_1(a)); + tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a)); if (!CODE64(s)) { - tcg_gen_ext32u_tl(cpu_A0, cpu_A0); + tcg_gen_ext32u_tl(s->A0, s->A0); } - tcg_gen_extu_tl_i64(cpu_bndu[reg], cpu_A0); + tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0); /* bnd registers are now in-use */ gen_set_hflag(s, HF_MPX_IU_MASK); break; @@ -7888,16 +7916,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } else { gen_lea_modrm(env, s, modrm); if (CODE64(s)) { - tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0, + tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0, s->mem_index, MO_LEQ); - tcg_gen_addi_tl(cpu_A0, cpu_A0, 8); - tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0, + tcg_gen_addi_tl(s->A0, s->A0, 8); + tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0, s->mem_index, MO_LEQ); } else { - tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0, + tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0, s->mem_index, MO_LEUL); - tcg_gen_addi_tl(cpu_A0, cpu_A0, 4); - tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0, + tcg_gen_addi_tl(s->A0, s->A0, 4); + tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0, s->mem_index, MO_LEUL); } } @@ -7911,21 +7939,21 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } if (a.base >= 0) { - tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp); + tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp); } else { - tcg_gen_movi_tl(cpu_A0, 0); + tcg_gen_movi_tl(s->A0, 0); } - gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override); + gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override); if (a.index >= 0) { - tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]); + tcg_gen_mov_tl(s->T0, cpu_regs[a.index]); } else { - tcg_gen_movi_tl(cpu_T0, 0); + tcg_gen_movi_tl(s->T0, 0); } if (CODE64(s)) { - gen_helper_bndstx64(cpu_env, cpu_A0, cpu_T0, + gen_helper_bndstx64(cpu_env, s->A0, s->T0, cpu_bndl[reg], cpu_bndu[reg]); } else { - gen_helper_bndstx32(cpu_env, cpu_A0, cpu_T0, + gen_helper_bndstx32(cpu_env, s->A0, s->T0, cpu_bndl[reg], cpu_bndu[reg]); } } @@ -7964,25 +7992,25 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 4: case 8: gen_update_cc_op(s); - gen_jmp_im(pc_start - s->cs_base); + gen_jmp_im(s, pc_start - s->cs_base); if (b & 2) { if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_start(); } - gen_op_mov_v_reg(ot, cpu_T0, rm); + gen_op_mov_v_reg(s, ot, s->T0, rm); gen_helper_write_crN(cpu_env, tcg_const_i32(reg), - cpu_T0); + s->T0); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_end(); } - gen_jmp_im(s->pc - s->cs_base); + gen_jmp_im(s, s->pc - s->cs_base); gen_eob(s); } else { if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_start(); } - gen_helper_read_crN(cpu_T0, cpu_env, tcg_const_i32(reg)); - gen_op_mov_reg_v(ot, rm, cpu_T0); + gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg)); + gen_op_mov_reg_v(s, ot, rm, s->T0); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_end(); } @@ -8015,16 +8043,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } if (b & 2) { gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg); - gen_op_mov_v_reg(ot, cpu_T0, rm); - tcg_gen_movi_i32(cpu_tmp2_i32, reg); - gen_helper_set_dr(cpu_env, cpu_tmp2_i32, cpu_T0); - gen_jmp_im(s->pc - s->cs_base); + gen_op_mov_v_reg(s, ot, s->T0, rm); + tcg_gen_movi_i32(s->tmp2_i32, reg); + gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0); + gen_jmp_im(s, s->pc - s->cs_base); gen_eob(s); } else { gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg); - tcg_gen_movi_i32(cpu_tmp2_i32, reg); - gen_helper_get_dr(cpu_T0, cpu_env, cpu_tmp2_i32); - gen_op_mov_reg_v(ot, rm, cpu_T0); + tcg_gen_movi_i32(s->tmp2_i32, reg); + gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32); + gen_op_mov_reg_v(s, ot, rm, s->T0); } } break; @@ -8035,7 +8063,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0); gen_helper_clts(cpu_env); /* abort block because static cpu state changed */ - gen_jmp_im(s->pc - s->cs_base); + gen_jmp_im(s, s->pc - s->cs_base); gen_eob(s); } break; @@ -8065,7 +8093,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; } gen_lea_modrm(env, s, modrm); - gen_helper_fxsave(cpu_env, cpu_A0); + gen_helper_fxsave(cpu_env, s->A0); break; CASE_MODRM_MEM_OP(1): /* fxrstor */ @@ -8078,7 +8106,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; } gen_lea_modrm(env, s, modrm); - gen_helper_fxrstor(cpu_env, cpu_A0); + gen_helper_fxrstor(cpu_env, s->A0); break; CASE_MODRM_MEM_OP(2): /* ldmxcsr */ @@ -8090,8 +8118,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; } gen_lea_modrm(env, s, modrm); - tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, s->mem_index, MO_LEUL); - gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32); + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); + gen_helper_ldmxcsr(cpu_env, s->tmp2_i32); break; CASE_MODRM_MEM_OP(3): /* stmxcsr */ @@ -8103,8 +8131,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; } gen_lea_modrm(env, s, modrm); - tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, mxcsr)); - gen_op_st_v(s, MO_32, cpu_T0, cpu_A0); + tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr)); + gen_op_st_v(s, MO_32, s->T0, s->A0); break; CASE_MODRM_MEM_OP(4): /* xsave */ @@ -8114,9 +8142,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_lea_modrm(env, s, modrm); - tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX], + tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX], cpu_regs[R_EDX]); - gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64); + gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64); break; CASE_MODRM_MEM_OP(5): /* xrstor */ @@ -8126,13 +8154,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_lea_modrm(env, s, modrm); - tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX], + tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX], cpu_regs[R_EDX]); - gen_helper_xrstor(cpu_env, cpu_A0, cpu_tmp1_i64); + gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64); /* XRSTOR is how MPX is enabled, which changes how we translate. Thus we need to end the TB. */ gen_update_cc_op(s); - gen_jmp_im(s->pc - s->cs_base); + gen_jmp_im(s, s->pc - s->cs_base); gen_eob(s); break; @@ -8154,9 +8182,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_lea_modrm(env, s, modrm); - tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX], + tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX], cpu_regs[R_EDX]); - gen_helper_xsaveopt(cpu_env, cpu_A0, cpu_tmp1_i64); + gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64); } break; @@ -8190,8 +8218,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) TCGv base, treg, src, dst; /* Preserve hflags bits by testing CR4 at runtime. */ - tcg_gen_movi_i32(cpu_tmp2_i32, CR4_FSGSBASE_MASK); - gen_helper_cr4_testbit(cpu_env, cpu_tmp2_i32); + tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK); + gen_helper_cr4_testbit(cpu_env, s->tmp2_i32); base = cpu_seg_base[modrm & 8 ? R_GS : R_FS]; treg = cpu_regs[(modrm & 7) | REX_B(s)]; @@ -8262,7 +8290,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (!(s->flags & HF_SMM_MASK)) goto illegal_op; gen_update_cc_op(s); - gen_jmp_im(s->pc - s->cs_base); + gen_jmp_im(s, s->pc - s->cs_base); gen_helper_rsm(cpu_env); gen_eob(s); break; @@ -8283,10 +8311,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - gen_extu(ot, cpu_T0); - tcg_gen_mov_tl(cpu_cc_src, cpu_T0); - tcg_gen_ctpop_tl(cpu_T0, cpu_T0); - gen_op_mov_reg_v(ot, reg, cpu_T0); + gen_extu(ot, s->T0); + tcg_gen_mov_tl(cpu_cc_src, s->T0); + tcg_gen_ctpop_tl(s->T0, s->T0); + gen_op_mov_reg_v(s, ot, reg, s->T0); set_cc_op(s, CC_OP_POPCNT); break; @@ -8452,18 +8480,18 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu) printf("ERROR addseg\n"); #endif - cpu_T0 = tcg_temp_new(); - cpu_T1 = tcg_temp_new(); - cpu_A0 = tcg_temp_new(); + dc->T0 = tcg_temp_new(); + dc->T1 = tcg_temp_new(); + dc->A0 = tcg_temp_new(); - cpu_tmp0 = tcg_temp_new(); - cpu_tmp1_i64 = tcg_temp_new_i64(); - cpu_tmp2_i32 = tcg_temp_new_i32(); - cpu_tmp3_i32 = tcg_temp_new_i32(); - cpu_tmp4 = tcg_temp_new(); - cpu_ptr0 = tcg_temp_new_ptr(); - cpu_ptr1 = tcg_temp_new_ptr(); - cpu_cc_srcT = tcg_temp_local_new(); + dc->tmp0 = tcg_temp_new(); + dc->tmp1_i64 = tcg_temp_new_i64(); + dc->tmp2_i32 = tcg_temp_new_i32(); + dc->tmp3_i32 = tcg_temp_new_i32(); + dc->tmp4 = tcg_temp_new(); + dc->ptr0 = tcg_temp_new_ptr(); + dc->ptr1 = tcg_temp_new_ptr(); + dc->cc_srcT = tcg_temp_local_new(); } static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu) @@ -8510,10 +8538,10 @@ static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) chance to happen */ dc->base.is_jmp = DISAS_TOO_MANY; } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT) - && ((dc->base.pc_next & TARGET_PAGE_MASK) - != ((dc->base.pc_next + TARGET_MAX_INSN_SIZE - 1) + && ((pc_next & TARGET_PAGE_MASK) + != ((pc_next + TARGET_MAX_INSN_SIZE - 1) & TARGET_PAGE_MASK) - || (dc->base.pc_next & ~TARGET_PAGE_MASK) == 0)) { + || (pc_next & ~TARGET_PAGE_MASK) == 0)) { /* Do not cross the boundary of the pages in icount mode, it can cause an exception. Do it only when boundary is crossed by the first instruction in the block. @@ -8533,7 +8561,7 @@ static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) DisasContext *dc = container_of(dcbase, DisasContext, base); if (dc->base.is_jmp == DISAS_TOO_MANY) { - gen_jmp_im(dc->base.pc_next - dc->cs_base); + gen_jmp_im(dc, dc->base.pc_next - dc->cs_base); gen_eob(dc); } } diff --git a/target/mips/cpu.c b/target/mips/cpu.c index 497706b669..e217fb3e36 100644 --- a/target/mips/cpu.c +++ b/target/mips/cpu.c @@ -113,11 +113,20 @@ static void mips_cpu_reset(CPUState *s) } static void mips_cpu_disas_set_info(CPUState *s, disassemble_info *info) { + MIPSCPU *cpu = MIPS_CPU(s); + CPUMIPSState *env = &cpu->env; + + if (!(env->insn_flags & ISA_NANOMIPS32)) { #ifdef TARGET_WORDS_BIGENDIAN - info->print_insn = print_insn_big_mips; + info->print_insn = print_insn_big_mips; #else - info->print_insn = print_insn_little_mips; + info->print_insn = print_insn_little_mips; #endif + } else { +#if defined(CONFIG_NANOMIPS_DIS) + info->print_insn = print_insn_nanomips; +#endif + } } static void mips_cpu_realizefn(DeviceState *dev, Error **errp) diff --git a/target/mips/cpu.h b/target/mips/cpu.h index 28af4d191c..03c03fd8c6 100644 --- a/target/mips/cpu.h +++ b/target/mips/cpu.h @@ -170,6 +170,16 @@ struct TCState { MSACSR_FS_MASK) float_status msa_fp_status; + +#define NUMBER_OF_MXU_REGISTERS 16 + target_ulong mxu_gpr[NUMBER_OF_MXU_REGISTERS - 1]; + target_ulong mxu_cr; +#define MXU_CR_LC 31 +#define MXU_CR_RC 30 +#define MXU_CR_BIAS 2 +#define MXU_CR_RD_EN 1 +#define MXU_CR_MXU_EN 0 + }; typedef struct CPUMIPSState CPUMIPSState; @@ -195,10 +205,125 @@ struct CPUMIPSState { #define MSAIR_ProcID 8 #define MSAIR_Rev 0 +/* + * Summary of CP0 registers + * ======================== + * + * + * Register 0 Register 1 Register 2 Register 3 + * ---------- ---------- ---------- ---------- + * + * 0 Index Random EntryLo0 EntryLo1 + * 1 MVPControl VPEControl TCStatus GlobalNumber + * 2 MVPConf0 VPEConf0 TCBind + * 3 MVPConf1 VPEConf1 TCRestart + * 4 VPControl YQMask TCHalt + * 5 VPESchedule TCContext + * 6 VPEScheFBack TCSchedule + * 7 VPEOpt TCScheFBack TCOpt + * + * + * Register 4 Register 5 Register 6 Register 7 + * ---------- ---------- ---------- ---------- + * + * 0 Context PageMask Wired HWREna + * 1 ContextConfig PageGrain SRSConf0 + * 2 UserLocal SegCtl0 SRSConf1 + * 3 XContextConfig SegCtl1 SRSConf2 + * 4 DebugContextID SegCtl2 SRSConf3 + * 5 MemoryMapID PWBase SRSConf4 + * 6 PWField PWCtl + * 7 PWSize + * + * + * Register 8 Register 9 Register 10 Register 11 + * ---------- ---------- ----------- ----------- + * + * 0 BadVAddr Count EntryHi Compare + * 1 BadInstr + * 2 BadInstrP + * 3 BadInstrX + * 4 GuestCtl1 GuestCtl0Ext + * 5 GuestCtl2 + * 6 GuestCtl3 + * 7 + * + * + * Register 12 Register 13 Register 14 Register 15 + * ----------- ----------- ----------- ----------- + * + * 0 Status Cause EPC PRId + * 1 IntCtl EBase + * 2 SRSCtl NestedEPC CDMMBase + * 3 SRSMap CMGCRBase + * 4 View_IPL View_RIPL BEVVA + * 5 SRSMap2 NestedExc + * 6 GuestCtl0 + * 7 GTOffset + * + * + * Register 16 Register 17 Register 18 Register 19 + * ----------- ----------- ----------- ----------- + * + * 0 Config LLAddr WatchLo WatchHi + * 1 Config1 MAAR WatchLo WatchHi + * 2 Config2 MAARI WatchLo WatchHi + * 3 Config3 WatchLo WatchHi + * 4 Config4 WatchLo WatchHi + * 5 Config5 WatchLo WatchHi + * 6 WatchLo WatchHi + * 7 WatchLo WatchHi + * + * + * Register 20 Register 21 Register 22 Register 23 + * ----------- ----------- ----------- ----------- + * + * 0 XContext Debug + * 1 TraceControl + * 2 TraceControl2 + * 3 UserTraceData1 + * 4 TraceIBPC + * 5 TraceDBPC + * 6 Debug2 + * 7 + * + * + * Register 24 Register 25 Register 26 Register 27 + * ----------- ----------- ----------- ----------- + * + * 0 DEPC PerfCnt ErrCtl CacheErr + * 1 PerfCnt + * 2 TraceControl3 PerfCnt + * 3 UserTraceData2 PerfCnt + * 4 PerfCnt + * 5 PerfCnt + * 6 PerfCnt + * 7 PerfCnt + * + * + * Register 28 Register 29 Register 30 Register 31 + * ----------- ----------- ----------- ----------- + * + * 0 DataLo DataHi ErrorEPC DESAVE + * 1 TagLo TagHi + * 2 DataLo DataHi KScratch<n> + * 3 TagLo TagHi KScratch<n> + * 4 DataLo DataHi KScratch<n> + * 5 TagLo TagHi KScratch<n> + * 6 DataLo DataHi KScratch<n> + * 7 TagLo TagHi KScratch<n> + * + */ +/* + * CP0 Register 0 + */ int32_t CP0_Index; /* CP0_MVP* are per MVP registers. */ int32_t CP0_VPControl; #define CP0VPCtl_DIS 0 +/* + * CP0 Register 1 + */ int32_t CP0_Random; int32_t CP0_VPEControl; #define CP0VPECo_YSI 21 @@ -239,7 +364,13 @@ struct CPUMIPSState { #define CP0VPEOpt_DWX2 2 #define CP0VPEOpt_DWX1 1 #define CP0VPEOpt_DWX0 0 +/* + * CP0 Register 2 + */ uint64_t CP0_EntryLo0; +/* + * CP0 Register 3 + */ uint64_t CP0_EntryLo1; #if defined(TARGET_MIPS64) # define CP0EnLo_RI 63 @@ -250,8 +381,14 @@ struct CPUMIPSState { #endif int32_t CP0_GlobalNumber; #define CP0GN_VPId 0 +/* + * CP0 Register 4 + */ target_ulong CP0_Context; target_ulong CP0_KScratch[MIPS_KSCRATCH_NUM]; +/* + * CP0 Register 5 + */ int32_t CP0_PageMask; int32_t CP0_PageGrain_rw_bitmask; int32_t CP0_PageGrain; @@ -289,7 +426,47 @@ struct CPUMIPSState { #define CP0SC2_XR 56 #define CP0SC2_XR_MASK (0xFFULL << CP0SC2_XR) #define CP0SC2_MASK (CP0SC_1GMASK | (CP0SC_1GMASK << 16) | CP0SC2_XR_MASK) + target_ulong CP0_PWBase; + target_ulong CP0_PWField; +#if defined(TARGET_MIPS64) +#define CP0PF_BDI 32 /* 37..32 */ +#define CP0PF_GDI 24 /* 29..24 */ +#define CP0PF_UDI 18 /* 23..18 */ +#define CP0PF_MDI 12 /* 17..12 */ +#define CP0PF_PTI 6 /* 11..6 */ +#define CP0PF_PTEI 0 /* 5..0 */ +#else +#define CP0PF_GDW 24 /* 29..24 */ +#define CP0PF_UDW 18 /* 23..18 */ +#define CP0PF_MDW 12 /* 17..12 */ +#define CP0PF_PTW 6 /* 11..6 */ +#define CP0PF_PTEW 0 /* 5..0 */ +#endif + target_ulong CP0_PWSize; +#if defined(TARGET_MIPS64) +#define CP0PS_BDW 32 /* 37..32 */ +#endif +#define CP0PS_PS 30 +#define CP0PS_GDW 24 /* 29..24 */ +#define CP0PS_UDW 18 /* 23..18 */ +#define CP0PS_MDW 12 /* 17..12 */ +#define CP0PS_PTW 6 /* 11..6 */ +#define CP0PS_PTEW 0 /* 5..0 */ +/* + * CP0 Register 6 + */ int32_t CP0_Wired; + int32_t CP0_PWCtl; +#define CP0PC_PWEN 31 +#if defined(TARGET_MIPS64) +#define CP0PC_PWDIREXT 30 +#define CP0PC_XK 28 +#define CP0PC_XS 27 +#define CP0PC_XU 26 +#endif +#define CP0PC_DPH 7 +#define CP0PC_HUGEPG 6 +#define CP0PC_PSN 0 /* 5..0 */ int32_t CP0_SRSConf0_rw_bitmask; int32_t CP0_SRSConf0; #define CP0SRSC0_M 31 @@ -319,16 +496,34 @@ struct CPUMIPSState { #define CP0SRSC4_SRS15 20 #define CP0SRSC4_SRS14 10 #define CP0SRSC4_SRS13 0 +/* + * CP0 Register 7 + */ int32_t CP0_HWREna; +/* + * CP0 Register 8 + */ target_ulong CP0_BadVAddr; uint32_t CP0_BadInstr; uint32_t CP0_BadInstrP; uint32_t CP0_BadInstrX; +/* + * CP0 Register 9 + */ int32_t CP0_Count; +/* + * CP0 Register 10 + */ target_ulong CP0_EntryHi; #define CP0EnHi_EHINV 10 target_ulong CP0_EntryHi_ASID_mask; +/* + * CP0 Register 11 + */ int32_t CP0_Compare; +/* + * CP0 Register 12 + */ int32_t CP0_Status; #define CP0St_CU3 31 #define CP0St_CU2 30 @@ -370,6 +565,9 @@ struct CPUMIPSState { #define CP0SRSMap_SSV2 8 #define CP0SRSMap_SSV1 4 #define CP0SRSMap_SSV0 0 +/* + * CP0 Register 13 + */ int32_t CP0_Cause; #define CP0Ca_BD 31 #define CP0Ca_TI 30 @@ -381,12 +579,21 @@ struct CPUMIPSState { #define CP0Ca_IP 8 #define CP0Ca_IP_mask 0x0000FF00 #define CP0Ca_EC 2 +/* + * CP0 Register 14 + */ target_ulong CP0_EPC; +/* + * CP0 Register 15 + */ int32_t CP0_PRid; target_ulong CP0_EBase; target_ulong CP0_EBaseWG_rw_bitmask; #define CP0EBase_WG 11 target_ulong CP0_CMGCRBase; +/* + * CP0 Register 16 + */ int32_t CP0_Config0; #define CP0C0_M 31 #define CP0C0_K23 28 /* 30..28 */ @@ -503,6 +710,9 @@ struct CPUMIPSState { uint64_t CP0_MAAR[MIPS_MAAR_MAX]; int32_t CP0_MAARI; /* XXX: Maybe make LLAddr per-TC? */ +/* + * CP0 Register 17 + */ uint64_t lladdr; target_ulong llval; target_ulong llnewval; @@ -511,11 +721,23 @@ struct CPUMIPSState { target_ulong llreg; uint64_t CP0_LLAddr_rw_bitmask; int CP0_LLAddr_shift; +/* + * CP0 Register 18 + */ target_ulong CP0_WatchLo[8]; +/* + * CP0 Register 19 + */ int32_t CP0_WatchHi[8]; #define CP0WH_ASID 16 +/* + * CP0 Register 20 + */ target_ulong CP0_XContext; int32_t CP0_Framemask; +/* + * CP0 Register 23 + */ int32_t CP0_Debug; #define CP0DB_DBD 31 #define CP0DB_DM 30 @@ -535,18 +757,40 @@ struct CPUMIPSState { #define CP0DB_DDBL 2 #define CP0DB_DBp 1 #define CP0DB_DSS 0 +/* + * CP0 Register 24 + */ target_ulong CP0_DEPC; +/* + * CP0 Register 25 + */ int32_t CP0_Performance0; +/* + * CP0 Register 26 + */ int32_t CP0_ErrCtl; #define CP0EC_WST 29 #define CP0EC_SPR 28 #define CP0EC_ITC 26 +/* + * CP0 Register 28 + */ uint64_t CP0_TagLo; int32_t CP0_DataLo; +/* + * CP0 Register 29 + */ int32_t CP0_TagHi; int32_t CP0_DataHi; +/* + * CP0 Register 30 + */ target_ulong CP0_ErrorEPC; +/* + * CP0 Register 31 + */ int32_t CP0_DESAVE; + /* We waste some space so we can handle shadow registers like TCs. */ TCState tcs[MIPS_SHADOW_SET_MAX]; CPUMIPSFPUContext fpus[MIPS_FPU_MAX]; @@ -596,8 +840,9 @@ struct CPUMIPSState { #define MIPS_HFLAG_BX 0x40000 /* branch exchanges execution mode */ #define MIPS_HFLAG_BMASK (MIPS_HFLAG_BMASK_BASE | MIPS_HFLAG_BMASK_EXT) /* MIPS DSP resources access. */ -#define MIPS_HFLAG_DSP 0x080000 /* Enable access to MIPS DSP resources. */ -#define MIPS_HFLAG_DSPR2 0x100000 /* Enable access to MIPS DSPR2 resources. */ +#define MIPS_HFLAG_DSP 0x080000 /* Enable access to DSP resources. */ +#define MIPS_HFLAG_DSP_R2 0x100000 /* Enable access to DSP R2 resources. */ +#define MIPS_HFLAG_DSP_R3 0x20000000 /* Enable access to DSP R3 resources. */ /* Extra flag about HWREna register. */ #define MIPS_HFLAG_HWRENA_ULR 0x200000 /* ULR bit from HWREna is set. */ #define MIPS_HFLAG_SBRI 0x400000 /* R6 SDBBP causes RI excpt. in user mode */ @@ -614,7 +859,7 @@ struct CPUMIPSState { int CCRes; /* Cycle count resolution/divisor */ uint32_t CP0_Status_rw_bitmask; /* Read/write bits in CP0_Status */ uint32_t CP0_TCStatus_rw_bitmask; /* Read/write bits in CP0_TCStatus */ - int insn_flags; /* Supported instruction set */ + uint64_t insn_flags; /* Supported instruction set */ /* Fields up to this point are cleared by a CPU reset */ struct {} end_reset_fields; diff --git a/target/mips/helper.c b/target/mips/helper.c index f0c268b83c..8988452dbd 100644 --- a/target/mips/helper.c +++ b/target/mips/helper.c @@ -537,6 +537,342 @@ hwaddr mips_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) } #endif +#if !defined(CONFIG_USER_ONLY) +#if !defined(TARGET_MIPS64) + +/* + * Perform hardware page table walk + * + * Memory accesses are performed using the KERNEL privilege level. + * Synchronous exceptions detected on memory accesses cause a silent exit + * from page table walking, resulting in a TLB or XTLB Refill exception. + * + * Implementations are not required to support page table walk memory + * accesses from mapped memory regions. When an unsupported access is + * attempted, a silent exit is taken, resulting in a TLB or XTLB Refill + * exception. + * + * Note that if an exception is caused by AddressTranslation or LoadMemory + * functions, the exception is not taken, a silent exit is taken, + * resulting in a TLB or XTLB Refill exception. + */ + +static bool get_pte(CPUMIPSState *env, uint64_t vaddr, int entry_size, + uint64_t *pte) +{ + if ((vaddr & ((entry_size >> 3) - 1)) != 0) { + return false; + } + if (entry_size == 64) { + *pte = cpu_ldq_code(env, vaddr); + } else { + *pte = cpu_ldl_code(env, vaddr); + } + return true; +} + +static uint64_t get_tlb_entry_layout(CPUMIPSState *env, uint64_t entry, + int entry_size, int ptei) +{ + uint64_t result = entry; + uint64_t rixi; + if (ptei > entry_size) { + ptei -= 32; + } + result >>= (ptei - 2); + rixi = result & 3; + result >>= 2; + result |= rixi << CP0EnLo_XI; + return result; +} + +static int walk_directory(CPUMIPSState *env, uint64_t *vaddr, + int directory_index, bool *huge_page, bool *hgpg_directory_hit, + uint64_t *pw_entrylo0, uint64_t *pw_entrylo1) +{ + int dph = (env->CP0_PWCtl >> CP0PC_DPH) & 0x1; + int psn = (env->CP0_PWCtl >> CP0PC_PSN) & 0x3F; + int hugepg = (env->CP0_PWCtl >> CP0PC_HUGEPG) & 0x1; + int pf_ptew = (env->CP0_PWField >> CP0PF_PTEW) & 0x3F; + int ptew = (env->CP0_PWSize >> CP0PS_PTEW) & 0x3F; + int native_shift = (((env->CP0_PWSize >> CP0PS_PS) & 1) == 0) ? 2 : 3; + int directory_shift = (ptew > 1) ? -1 : + (hugepg && (ptew == 1)) ? native_shift + 1 : native_shift; + int leaf_shift = (ptew > 1) ? -1 : + (ptew == 1) ? native_shift + 1 : native_shift; + uint32_t direntry_size = 1 << (directory_shift + 3); + uint32_t leafentry_size = 1 << (leaf_shift + 3); + uint64_t entry; + uint64_t paddr; + int prot; + uint64_t lsb = 0; + uint64_t w = 0; + + if (get_physical_address(env, &paddr, &prot, *vaddr, MMU_DATA_LOAD, + ACCESS_INT, cpu_mmu_index(env, false)) != + TLBRET_MATCH) { + /* wrong base address */ + return 0; + } + if (!get_pte(env, *vaddr, direntry_size, &entry)) { + return 0; + } + + if ((entry & (1 << psn)) && hugepg) { + *huge_page = true; + *hgpg_directory_hit = true; + entry = get_tlb_entry_layout(env, entry, leafentry_size, pf_ptew); + w = directory_index - 1; + if (directory_index & 0x1) { + /* Generate adjacent page from same PTE for odd TLB page */ + lsb = (1 << w) >> 6; + *pw_entrylo0 = entry & ~lsb; /* even page */ + *pw_entrylo1 = entry | lsb; /* odd page */ + } else if (dph) { + int oddpagebit = 1 << leaf_shift; + uint64_t vaddr2 = *vaddr ^ oddpagebit; + if (*vaddr & oddpagebit) { + *pw_entrylo1 = entry; + } else { + *pw_entrylo0 = entry; + } + if (get_physical_address(env, &paddr, &prot, vaddr2, MMU_DATA_LOAD, + ACCESS_INT, cpu_mmu_index(env, false)) != + TLBRET_MATCH) { + return 0; + } + if (!get_pte(env, vaddr2, leafentry_size, &entry)) { + return 0; + } + entry = get_tlb_entry_layout(env, entry, leafentry_size, pf_ptew); + if (*vaddr & oddpagebit) { + *pw_entrylo0 = entry; + } else { + *pw_entrylo1 = entry; + } + } else { + return 0; + } + return 1; + } else { + *vaddr = entry; + return 2; + } +} + +static bool page_table_walk_refill(CPUMIPSState *env, vaddr address, int rw, + int mmu_idx) +{ + int gdw = (env->CP0_PWSize >> CP0PS_GDW) & 0x3F; + int udw = (env->CP0_PWSize >> CP0PS_UDW) & 0x3F; + int mdw = (env->CP0_PWSize >> CP0PS_MDW) & 0x3F; + int ptw = (env->CP0_PWSize >> CP0PS_PTW) & 0x3F; + int ptew = (env->CP0_PWSize >> CP0PS_PTEW) & 0x3F; + + /* Initial values */ + bool huge_page = false; + bool hgpg_bdhit = false; + bool hgpg_gdhit = false; + bool hgpg_udhit = false; + bool hgpg_mdhit = false; + + int32_t pw_pagemask = 0; + target_ulong pw_entryhi = 0; + uint64_t pw_entrylo0 = 0; + uint64_t pw_entrylo1 = 0; + + /* Native pointer size */ + /*For the 32-bit architectures, this bit is fixed to 0.*/ + int native_shift = (((env->CP0_PWSize >> CP0PS_PS) & 1) == 0) ? 2 : 3; + + /* Indices from PWField */ + int pf_gdw = (env->CP0_PWField >> CP0PF_GDW) & 0x3F; + int pf_udw = (env->CP0_PWField >> CP0PF_UDW) & 0x3F; + int pf_mdw = (env->CP0_PWField >> CP0PF_MDW) & 0x3F; + int pf_ptw = (env->CP0_PWField >> CP0PF_PTW) & 0x3F; + int pf_ptew = (env->CP0_PWField >> CP0PF_PTEW) & 0x3F; + + /* Indices computed from faulting address */ + int gindex = (address >> pf_gdw) & ((1 << gdw) - 1); + int uindex = (address >> pf_udw) & ((1 << udw) - 1); + int mindex = (address >> pf_mdw) & ((1 << mdw) - 1); + int ptindex = (address >> pf_ptw) & ((1 << ptw) - 1); + + /* Other HTW configs */ + int hugepg = (env->CP0_PWCtl >> CP0PC_HUGEPG) & 0x1; + + /* HTW Shift values (depend on entry size) */ + int directory_shift = (ptew > 1) ? -1 : + (hugepg && (ptew == 1)) ? native_shift + 1 : native_shift; + int leaf_shift = (ptew > 1) ? -1 : + (ptew == 1) ? native_shift + 1 : native_shift; + + /* Offsets into tables */ + int goffset = gindex << directory_shift; + int uoffset = uindex << directory_shift; + int moffset = mindex << directory_shift; + int ptoffset0 = (ptindex >> 1) << (leaf_shift + 1); + int ptoffset1 = ptoffset0 | (1 << (leaf_shift)); + + uint32_t leafentry_size = 1 << (leaf_shift + 3); + + /* Starting address - Page Table Base */ + uint64_t vaddr = env->CP0_PWBase; + + uint64_t dir_entry; + uint64_t paddr; + int prot; + int m; + + if (!(env->CP0_Config3 & (1 << CP0C3_PW))) { + /* walker is unimplemented */ + return false; + } + if (!(env->CP0_PWCtl & (1 << CP0PC_PWEN))) { + /* walker is disabled */ + return false; + } + if (!(gdw > 0 || udw > 0 || mdw > 0)) { + /* no structure to walk */ + return false; + } + if ((directory_shift == -1) || (leaf_shift == -1)) { + return false; + } + + /* Global Directory */ + if (gdw > 0) { + vaddr |= goffset; + switch (walk_directory(env, &vaddr, pf_gdw, &huge_page, &hgpg_gdhit, + &pw_entrylo0, &pw_entrylo1)) + { + case 0: + return false; + case 1: + goto refill; + case 2: + default: + break; + } + } + + /* Upper directory */ + if (udw > 0) { + vaddr |= uoffset; + switch (walk_directory(env, &vaddr, pf_udw, &huge_page, &hgpg_udhit, + &pw_entrylo0, &pw_entrylo1)) + { + case 0: + return false; + case 1: + goto refill; + case 2: + default: + break; + } + } + + /* Middle directory */ + if (mdw > 0) { + vaddr |= moffset; + switch (walk_directory(env, &vaddr, pf_mdw, &huge_page, &hgpg_mdhit, + &pw_entrylo0, &pw_entrylo1)) + { + case 0: + return false; + case 1: + goto refill; + case 2: + default: + break; + } + } + + /* Leaf Level Page Table - First half of PTE pair */ + vaddr |= ptoffset0; + if (get_physical_address(env, &paddr, &prot, vaddr, MMU_DATA_LOAD, + ACCESS_INT, cpu_mmu_index(env, false)) != + TLBRET_MATCH) { + return false; + } + if (!get_pte(env, vaddr, leafentry_size, &dir_entry)) { + return false; + } + dir_entry = get_tlb_entry_layout(env, dir_entry, leafentry_size, pf_ptew); + pw_entrylo0 = dir_entry; + + /* Leaf Level Page Table - Second half of PTE pair */ + vaddr |= ptoffset1; + if (get_physical_address(env, &paddr, &prot, vaddr, MMU_DATA_LOAD, + ACCESS_INT, cpu_mmu_index(env, false)) != + TLBRET_MATCH) { + return false; + } + if (!get_pte(env, vaddr, leafentry_size, &dir_entry)) { + return false; + } + dir_entry = get_tlb_entry_layout(env, dir_entry, leafentry_size, pf_ptew); + pw_entrylo1 = dir_entry; + +refill: + + m = (1 << pf_ptw) - 1; + + if (huge_page) { + switch (hgpg_bdhit << 3 | hgpg_gdhit << 2 | hgpg_udhit << 1 | + hgpg_mdhit) + { + case 4: + m = (1 << pf_gdw) - 1; + if (pf_gdw & 1) { + m >>= 1; + } + break; + case 2: + m = (1 << pf_udw) - 1; + if (pf_udw & 1) { + m >>= 1; + } + break; + case 1: + m = (1 << pf_mdw) - 1; + if (pf_mdw & 1) { + m >>= 1; + } + break; + } + } + pw_pagemask = m >> 12; + update_pagemask(env, pw_pagemask << 13, &pw_pagemask); + pw_entryhi = (address & ~0x1fff) | (env->CP0_EntryHi & 0xFF); + { + target_ulong tmp_entryhi = env->CP0_EntryHi; + int32_t tmp_pagemask = env->CP0_PageMask; + uint64_t tmp_entrylo0 = env->CP0_EntryLo0; + uint64_t tmp_entrylo1 = env->CP0_EntryLo1; + + env->CP0_EntryHi = pw_entryhi; + env->CP0_PageMask = pw_pagemask; + env->CP0_EntryLo0 = pw_entrylo0; + env->CP0_EntryLo1 = pw_entrylo1; + + /* + * The hardware page walker inserts a page into the TLB in a manner + * identical to a TLBWR instruction as executed by the software refill + * handler. + */ + r4k_helper_tlbwr(env); + + env->CP0_EntryHi = tmp_entryhi; + env->CP0_PageMask = tmp_pagemask; + env->CP0_EntryLo0 = tmp_entrylo0; + env->CP0_EntryLo1 = tmp_entrylo1; + } + return true; +} +#endif +#endif + int mips_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int size, int rw, int mmu_idx) { @@ -558,8 +894,7 @@ int mips_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int size, int rw, /* data access */ #if !defined(CONFIG_USER_ONLY) - /* XXX: put correct access by using cpu_restore_state() - correctly */ + /* XXX: put correct access by using cpu_restore_state() correctly */ access_type = ACCESS_INT; ret = get_physical_address(env, &physical, &prot, address, rw, access_type, mmu_idx); @@ -583,6 +918,32 @@ int mips_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int size, int rw, } else if (ret < 0) #endif { +#if !defined(CONFIG_USER_ONLY) +#if !defined(TARGET_MIPS64) + if ((ret == TLBRET_NOMATCH) && (env->tlb->nb_tlb > 1)) { + /* + * Memory reads during hardware page table walking are performed + * as if they were kernel-mode load instructions. + */ + int mode = (env->hflags & MIPS_HFLAG_KSU); + bool ret_walker; + env->hflags &= ~MIPS_HFLAG_KSU; + ret_walker = page_table_walk_refill(env, address, rw, mmu_idx); + env->hflags |= mode; + if (ret_walker) { + ret = get_physical_address(env, &physical, &prot, + address, rw, access_type, mmu_idx); + if (ret == TLBRET_MATCH) { + tlb_set_page(cs, address & TARGET_PAGE_MASK, + physical & TARGET_PAGE_MASK, prot | PAGE_EXEC, + mmu_idx, TARGET_PAGE_SIZE); + ret = 0; + return ret; + } + } + } +#endif +#endif raise_mmu_exception(env, address, rw, ret); ret = 1; } diff --git a/target/mips/helper.h b/target/mips/helper.h index b2a780a6f2..c23e4e5d97 100644 --- a/target/mips/helper.h +++ b/target/mips/helper.h @@ -120,6 +120,8 @@ DEF_HELPER_2(mtc0_pagegrain, void, env, tl) DEF_HELPER_2(mtc0_segctl0, void, env, tl) DEF_HELPER_2(mtc0_segctl1, void, env, tl) DEF_HELPER_2(mtc0_segctl2, void, env, tl) +DEF_HELPER_2(mtc0_pwfield, void, env, tl) +DEF_HELPER_2(mtc0_pwsize, void, env, tl) DEF_HELPER_2(mtc0_wired, void, env, tl) DEF_HELPER_2(mtc0_srsconf0, void, env, tl) DEF_HELPER_2(mtc0_srsconf1, void, env, tl) @@ -127,6 +129,7 @@ DEF_HELPER_2(mtc0_srsconf2, void, env, tl) DEF_HELPER_2(mtc0_srsconf3, void, env, tl) DEF_HELPER_2(mtc0_srsconf4, void, env, tl) DEF_HELPER_2(mtc0_hwrena, void, env, tl) +DEF_HELPER_2(mtc0_pwctl, void, env, tl) DEF_HELPER_2(mtc0_count, void, env, tl) DEF_HELPER_2(mtc0_entryhi, void, env, tl) DEF_HELPER_2(mttc0_entryhi, void, env, tl) diff --git a/target/mips/internal.h b/target/mips/internal.h index e41051f8e6..8b1b2456af 100644 --- a/target/mips/internal.h +++ b/target/mips/internal.h @@ -59,7 +59,7 @@ struct mips_def_t { int32_t CP0_PageGrain_rw_bitmask; int32_t CP0_PageGrain; target_ulong CP0_EBaseWG_rw_bitmask; - int insn_flags; + uint64_t insn_flags; enum mips_mmu_types mmu_type; }; @@ -211,6 +211,7 @@ uint64_t float_class_d(uint64_t arg, float_status *fst); extern unsigned int ieee_rm[]; int ieee_ex_to_mips(int xcpt); +void update_pagemask(CPUMIPSState *env, target_ulong arg1, int32_t *pagemask); static inline void restore_rounding_mode(CPUMIPSState *env) { @@ -306,9 +307,9 @@ static inline void compute_hflags(CPUMIPSState *env) { env->hflags &= ~(MIPS_HFLAG_COP1X | MIPS_HFLAG_64 | MIPS_HFLAG_CP0 | MIPS_HFLAG_F64 | MIPS_HFLAG_FPU | MIPS_HFLAG_KSU | - MIPS_HFLAG_AWRAP | MIPS_HFLAG_DSP | MIPS_HFLAG_DSPR2 | - MIPS_HFLAG_SBRI | MIPS_HFLAG_MSA | MIPS_HFLAG_FRE | - MIPS_HFLAG_ELPA | MIPS_HFLAG_ERL); + MIPS_HFLAG_AWRAP | MIPS_HFLAG_DSP | MIPS_HFLAG_DSP_R2 | + MIPS_HFLAG_DSP_R3 | MIPS_HFLAG_SBRI | MIPS_HFLAG_MSA | + MIPS_HFLAG_FRE | MIPS_HFLAG_ELPA | MIPS_HFLAG_ERL); if (env->CP0_Status & (1 << CP0St_ERL)) { env->hflags |= MIPS_HFLAG_ERL; } @@ -355,16 +356,29 @@ static inline void compute_hflags(CPUMIPSState *env) (env->CP0_Config5 & (1 << CP0C5_SBRI))) { env->hflags |= MIPS_HFLAG_SBRI; } - if (env->insn_flags & ASE_DSPR2) { - /* Enables access MIPS DSP resources, now our cpu is DSP ASER2, - so enable to access DSPR2 resources. */ + if (env->insn_flags & ASE_DSP_R3) { + /* + * Our cpu supports DSP R3 ASE, so enable + * access to DSP R3 resources. + */ if (env->CP0_Status & (1 << CP0St_MX)) { - env->hflags |= MIPS_HFLAG_DSP | MIPS_HFLAG_DSPR2; + env->hflags |= MIPS_HFLAG_DSP | MIPS_HFLAG_DSP_R2 | + MIPS_HFLAG_DSP_R3; + } + } else if (env->insn_flags & ASE_DSP_R2) { + /* + * Our cpu supports DSP R2 ASE, so enable + * access to DSP R2 resources. + */ + if (env->CP0_Status & (1 << CP0St_MX)) { + env->hflags |= MIPS_HFLAG_DSP | MIPS_HFLAG_DSP_R2; } } else if (env->insn_flags & ASE_DSP) { - /* Enables access MIPS DSP resources, now our cpu is DSP ASE, - so enable to access DSP resources. */ + /* + * Our cpu supports DSP ASE, so enable + * access to DSP resources. + */ if (env->CP0_Status & (1 << CP0St_MX)) { env->hflags |= MIPS_HFLAG_DSP; } diff --git a/target/mips/machine.c b/target/mips/machine.c index 5ba78acd6d..70a8909b90 100644 --- a/target/mips/machine.c +++ b/target/mips/machine.c @@ -212,8 +212,8 @@ const VMStateDescription vmstate_tlb = { const VMStateDescription vmstate_mips_cpu = { .name = "cpu", - .version_id = 11, - .minimum_version_id = 11, + .version_id = 15, + .minimum_version_id = 15, .post_load = cpu_post_load, .fields = (VMStateField[]) { /* Active TC */ @@ -256,7 +256,11 @@ const VMStateDescription vmstate_mips_cpu = { VMSTATE_UINTTL(env.CP0_SegCtl0, MIPSCPU), VMSTATE_UINTTL(env.CP0_SegCtl1, MIPSCPU), VMSTATE_UINTTL(env.CP0_SegCtl2, MIPSCPU), + VMSTATE_UINTTL(env.CP0_PWBase, MIPSCPU), + VMSTATE_UINTTL(env.CP0_PWField, MIPSCPU), + VMSTATE_UINTTL(env.CP0_PWSize, MIPSCPU), VMSTATE_INT32(env.CP0_Wired, MIPSCPU), + VMSTATE_INT32(env.CP0_PWCtl, MIPSCPU), VMSTATE_INT32(env.CP0_SRSConf0, MIPSCPU), VMSTATE_INT32(env.CP0_SRSConf1, MIPSCPU), VMSTATE_INT32(env.CP0_SRSConf2, MIPSCPU), diff --git a/target/mips/mips-defs.h b/target/mips/mips-defs.h index c8e99791ad..dbdb4b2b2d 100644 --- a/target/mips/mips-defs.h +++ b/target/mips/mips-defs.h @@ -22,40 +22,54 @@ #endif #endif -/* Masks used to mark instructions to indicate which ISA level they - were introduced in. */ -#define ISA_MIPS1 0x00000001 -#define ISA_MIPS2 0x00000002 -#define ISA_MIPS3 0x00000004 -#define ISA_MIPS4 0x00000008 -#define ISA_MIPS5 0x00000010 -#define ISA_MIPS32 0x00000020 -#define ISA_MIPS32R2 0x00000040 -#define ISA_MIPS64 0x00000080 -#define ISA_MIPS64R2 0x00000100 -#define ISA_MIPS32R3 0x00000200 -#define ISA_MIPS64R3 0x00000400 -#define ISA_MIPS32R5 0x00000800 -#define ISA_MIPS64R5 0x00001000 -#define ISA_MIPS32R6 0x00002000 -#define ISA_MIPS64R6 0x00004000 -#define ISA_NANOMIPS32 0x00008000 - -/* MIPS ASEs. */ -#define ASE_MIPS16 0x00010000 -#define ASE_MIPS3D 0x00020000 -#define ASE_MDMX 0x00040000 -#define ASE_DSP 0x00080000 -#define ASE_DSPR2 0x00100000 -#define ASE_MT 0x00200000 -#define ASE_SMARTMIPS 0x00400000 -#define ASE_MICROMIPS 0x00800000 -#define ASE_MSA 0x01000000 - -/* Chip specific instructions. */ -#define INSN_LOONGSON2E 0x20000000 -#define INSN_LOONGSON2F 0x40000000 -#define INSN_VR54XX 0x80000000 +/* + * bit definitions for insn_flags (ISAs/ASEs flags) + * ------------------------------------------------ + */ +/* + * bits 0-31: MIPS base instruction sets + */ +#define ISA_MIPS1 0x0000000000000001ULL +#define ISA_MIPS2 0x0000000000000002ULL +#define ISA_MIPS3 0x0000000000000004ULL +#define ISA_MIPS4 0x0000000000000008ULL +#define ISA_MIPS5 0x0000000000000010ULL +#define ISA_MIPS32 0x0000000000000020ULL +#define ISA_MIPS32R2 0x0000000000000040ULL +#define ISA_MIPS64 0x0000000000000080ULL +#define ISA_MIPS64R2 0x0000000000000100ULL +#define ISA_MIPS32R3 0x0000000000000200ULL +#define ISA_MIPS64R3 0x0000000000000400ULL +#define ISA_MIPS32R5 0x0000000000000800ULL +#define ISA_MIPS64R5 0x0000000000001000ULL +#define ISA_MIPS32R6 0x0000000000002000ULL +#define ISA_MIPS64R6 0x0000000000004000ULL +#define ISA_NANOMIPS32 0x0000000000008000ULL +/* + * bits 32-47: MIPS ASEs + */ +#define ASE_MIPS16 0x0000000100000000ULL +#define ASE_MIPS3D 0x0000000200000000ULL +#define ASE_MDMX 0x0000000400000000ULL +#define ASE_DSP 0x0000000800000000ULL +#define ASE_DSP_R2 0x0000001000000000ULL +#define ASE_DSP_R3 0x0000002000000000ULL +#define ASE_MT 0x0000004000000000ULL +#define ASE_SMARTMIPS 0x0000008000000000ULL +#define ASE_MICROMIPS 0x0000010000000000ULL +#define ASE_MSA 0x0000020000000000ULL +/* + * bits 48-55: vendor-specific base instruction sets + */ +#define INSN_LOONGSON2E 0x0001000000000000ULL +#define INSN_LOONGSON2F 0x0002000000000000ULL +#define INSN_VR54XX 0x0004000000000000ULL +#define INSN_R5900 0x0008000000000000ULL +/* + * bits 56-63: vendor-specific ASEs + */ +#define ASE_MMI 0x0100000000000000ULL +#define ASE_MXU 0x0200000000000000ULL /* MIPS CPU defines. */ #define CPU_MIPS1 (ISA_MIPS1) @@ -63,6 +77,7 @@ #define CPU_MIPS3 (CPU_MIPS2 | ISA_MIPS3) #define CPU_MIPS4 (CPU_MIPS3 | ISA_MIPS4) #define CPU_VR54XX (CPU_MIPS4 | INSN_VR54XX) +#define CPU_R5900 (CPU_MIPS3 | INSN_R5900) #define CPU_LOONGSON2E (CPU_MIPS3 | INSN_LOONGSON2E) #define CPU_LOONGSON2F (CPU_MIPS3 | INSN_LOONGSON2F) diff --git a/target/mips/op_helper.c b/target/mips/op_helper.c index c148b310cd..d1f1d1aa35 100644 --- a/target/mips/op_helper.c +++ b/target/mips/op_helper.c @@ -1400,7 +1400,7 @@ void helper_mtc0_context(CPUMIPSState *env, target_ulong arg1) env->CP0_Context = (env->CP0_Context & 0x007FFFFF) | (arg1 & ~0x007FFFFF); } -void helper_mtc0_pagemask(CPUMIPSState *env, target_ulong arg1) +void update_pagemask(CPUMIPSState *env, target_ulong arg1, int32_t *pagemask) { uint64_t mask = arg1 >> (TARGET_PAGE_BITS + 1); if (!(env->insn_flags & ISA_MIPS32R6) || (arg1 == ~0) || @@ -1411,6 +1411,11 @@ void helper_mtc0_pagemask(CPUMIPSState *env, target_ulong arg1) } } +void helper_mtc0_pagemask(CPUMIPSState *env, target_ulong arg1) +{ + update_pagemask(env, arg1, &env->CP0_PageMask); +} + void helper_mtc0_pagegrain(CPUMIPSState *env, target_ulong arg1) { /* SmartMIPS not implemented */ @@ -1445,6 +1450,77 @@ void helper_mtc0_segctl2(CPUMIPSState *env, target_ulong arg1) tlb_flush(cs); } +void helper_mtc0_pwfield(CPUMIPSState *env, target_ulong arg1) +{ +#if defined(TARGET_MIPS64) + uint64_t mask = 0x3F3FFFFFFFULL; + uint32_t old_ptei = (env->CP0_PWField >> CP0PF_PTEI) & 0x3FULL; + uint32_t new_ptei = (arg1 >> CP0PF_PTEI) & 0x3FULL; + + if ((env->insn_flags & ISA_MIPS32R6)) { + if (((arg1 >> CP0PF_BDI) & 0x3FULL) < 12) { + mask &= ~(0x3FULL << CP0PF_BDI); + } + if (((arg1 >> CP0PF_GDI) & 0x3FULL) < 12) { + mask &= ~(0x3FULL << CP0PF_GDI); + } + if (((arg1 >> CP0PF_UDI) & 0x3FULL) < 12) { + mask &= ~(0x3FULL << CP0PF_UDI); + } + if (((arg1 >> CP0PF_MDI) & 0x3FULL) < 12) { + mask &= ~(0x3FULL << CP0PF_MDI); + } + if (((arg1 >> CP0PF_PTI) & 0x3FULL) < 12) { + mask &= ~(0x3FULL << CP0PF_PTI); + } + } + env->CP0_PWField = arg1 & mask; + + if ((new_ptei >= 32) || + ((env->insn_flags & ISA_MIPS32R6) && + (new_ptei == 0 || new_ptei == 1))) { + env->CP0_PWField = (env->CP0_PWField & ~0x3FULL) | + (old_ptei << CP0PF_PTEI); + } +#else + uint32_t mask = 0x3FFFFFFF; + uint32_t old_ptew = (env->CP0_PWField >> CP0PF_PTEW) & 0x3F; + uint32_t new_ptew = (arg1 >> CP0PF_PTEW) & 0x3F; + + if ((env->insn_flags & ISA_MIPS32R6)) { + if (((arg1 >> CP0PF_GDW) & 0x3F) < 12) { + mask &= ~(0x3F << CP0PF_GDW); + } + if (((arg1 >> CP0PF_UDW) & 0x3F) < 12) { + mask &= ~(0x3F << CP0PF_UDW); + } + if (((arg1 >> CP0PF_MDW) & 0x3F) < 12) { + mask &= ~(0x3F << CP0PF_MDW); + } + if (((arg1 >> CP0PF_PTW) & 0x3F) < 12) { + mask &= ~(0x3F << CP0PF_PTW); + } + } + env->CP0_PWField = arg1 & mask; + + if ((new_ptew >= 32) || + ((env->insn_flags & ISA_MIPS32R6) && + (new_ptew == 0 || new_ptew == 1))) { + env->CP0_PWField = (env->CP0_PWField & ~0x3F) | + (old_ptew << CP0PF_PTEW); + } +#endif +} + +void helper_mtc0_pwsize(CPUMIPSState *env, target_ulong arg1) +{ +#if defined(TARGET_MIPS64) + env->CP0_PWSize = arg1 & 0x3F7FFFFFFFULL; +#else + env->CP0_PWSize = arg1 & 0x3FFFFFFF; +#endif +} + void helper_mtc0_wired(CPUMIPSState *env, target_ulong arg1) { if (env->insn_flags & ISA_MIPS32R6) { @@ -1456,6 +1532,16 @@ void helper_mtc0_wired(CPUMIPSState *env, target_ulong arg1) } } +void helper_mtc0_pwctl(CPUMIPSState *env, target_ulong arg1) +{ +#if defined(TARGET_MIPS64) + /* PWEn = 0. Hardware page table walking is not implemented. */ + env->CP0_PWCtl = (env->CP0_PWCtl & 0x000000C0) | (arg1 & 0x5C00003F); +#else + env->CP0_PWCtl = (arg1 & 0x800000FF); +#endif +} + void helper_mtc0_srsconf0(CPUMIPSState *env, target_ulong arg1) { env->CP0_SRSConf0 |= arg1 & env->CP0_SRSConf0_rw_bitmask; diff --git a/target/mips/translate.c b/target/mips/translate.c index ab16cdb911..60320cbe69 100644 --- a/target/mips/translate.c +++ b/target/mips/translate.c @@ -1,5 +1,5 @@ /* - * MIPS32 emulation for qemu: main translation routines. + * MIPS emulation for QEMU - main translation routines * * Copyright (c) 2004-2005 Jocelyn Mayer * Copyright (c) 2006 Marius Groeger (FPU operations) @@ -463,8 +463,10 @@ enum { OPC_WSBH = (0x02 << 6) | OPC_BSHFL, OPC_SEB = (0x10 << 6) | OPC_BSHFL, OPC_SEH = (0x18 << 6) | OPC_BSHFL, - OPC_ALIGN = (0x08 << 6) | OPC_BSHFL, /* 010.bp */ - OPC_ALIGN_END = (0x0B << 6) | OPC_BSHFL, /* 010.00 to 010.11 */ + OPC_ALIGN = (0x08 << 6) | OPC_BSHFL, /* 010.bp (010.00 to 010.11) */ + OPC_ALIGN_1 = (0x09 << 6) | OPC_BSHFL, + OPC_ALIGN_2 = (0x0A << 6) | OPC_BSHFL, + OPC_ALIGN_3 = (0x0B << 6) | OPC_BSHFL, OPC_BITSWAP = (0x00 << 6) | OPC_BSHFL /* 00000 */ }; @@ -474,8 +476,14 @@ enum { enum { OPC_DSBH = (0x02 << 6) | OPC_DBSHFL, OPC_DSHD = (0x05 << 6) | OPC_DBSHFL, - OPC_DALIGN = (0x08 << 6) | OPC_DBSHFL, /* 01.bp */ - OPC_DALIGN_END = (0x0F << 6) | OPC_DBSHFL, /* 01.000 to 01.111 */ + OPC_DALIGN = (0x08 << 6) | OPC_DBSHFL, /* 01.bp (01.000 to 01.111) */ + OPC_DALIGN_1 = (0x09 << 6) | OPC_DBSHFL, + OPC_DALIGN_2 = (0x0A << 6) | OPC_DBSHFL, + OPC_DALIGN_3 = (0x0B << 6) | OPC_DBSHFL, + OPC_DALIGN_4 = (0x0C << 6) | OPC_DBSHFL, + OPC_DALIGN_5 = (0x0D << 6) | OPC_DBSHFL, + OPC_DALIGN_6 = (0x0E << 6) | OPC_DBSHFL, + OPC_DALIGN_7 = (0x0F << 6) | OPC_DBSHFL, OPC_DBITSWAP = (0x00 << 6) | OPC_DBSHFL, /* 00000 */ }; @@ -1389,6 +1397,1021 @@ enum { OPC_BINSRI_df = (0x7 << 23) | OPC_MSA_BIT_09, }; + +/* + * AN OVERVIEW OF MXU EXTENSION INSTRUCTION SET + * ============================================ + * + * MXU (full name: MIPS eXtension/enhanced Unit) is an SIMD extension of MIPS32 + * instructions set. It is designed to fit the needs of signal, graphical and + * video processing applications. MXU instruction set is used in Xburst family + * of microprocessors by Ingenic. + * + * MXU unit contains 17 registers called X0-X16. X0 is always zero, and X16 is + * the control register. + * + * The notation used in MXU assembler mnemonics + * -------------------------------------------- + * + * Registers: + * + * XRa, XRb, XRc, XRd - MXU registers + * Rb, Rc, Rd, Rs, Rt - general purpose MIPS registers + * + * Subfields: + * + * aptn1 - 1-bit accumulate add/subtract pattern + * aptn2 - 2-bit accumulate add/subtract pattern + * eptn2 - 2-bit execute add/subtract pattern + * optn2 - 2-bit operand pattern + * optn3 - 3-bit operand pattern + * sft4 - 4-bit shift amount + * strd2 - 2-bit stride amount + * + * Prefixes: + * + * <Operation parallel level><Operand size> + * S 32 + * D 16 + * Q 8 + * + * Suffixes: + * + * E - Expand results + * F - Fixed point multiplication + * L - Low part result + * R - Doing rounding + * V - Variable instead of immediate + * W - Combine above L and V + * + * Operations: + * + * ADD - Add or subtract + * ADDC - Add with carry-in + * ACC - Accumulate + * ASUM - Sum together then accumulate (add or subtract) + * ASUMC - Sum together then accumulate (add or subtract) with carry-in + * AVG - Average between 2 operands + * ABD - Absolute difference + * ALN - Align data + * AND - Logical bitwise 'and' operation + * CPS - Copy sign + * EXTR - Extract bits + * I2M - Move from GPR register to MXU register + * LDD - Load data from memory to XRF + * LDI - Load data from memory to XRF (and increase the address base) + * LUI - Load unsigned immediate + * MUL - Multiply + * MULU - Unsigned multiply + * MADD - 64-bit operand add 32x32 product + * MSUB - 64-bit operand subtract 32x32 product + * MAC - Multiply and accumulate (add or subtract) + * MAD - Multiply and add or subtract + * MAX - Maximum between 2 operands + * MIN - Minimum between 2 operands + * M2I - Move from MXU register to GPR register + * MOVZ - Move if zero + * MOVN - Move if non-zero + * NOR - Logical bitwise 'nor' operation + * OR - Logical bitwise 'or' operation + * STD - Store data from XRF to memory + * SDI - Store data from XRF to memory (and increase the address base) + * SLT - Set of less than comparison + * SAD - Sum of absolute differences + * SLL - Logical shift left + * SLR - Logical shift right + * SAR - Arithmetic shift right + * SAT - Saturation + * SFL - Shuffle + * SCOP - Calculate x’s scope (-1, means x<0; 0, means x==0; 1, means x>0) + * XOR - Logical bitwise 'exclusive or' operation + * + * Load/Store instructions Multiplication instructions + * ----------------------- --------------------------- + * + * S32LDD XRa, Rb, s12 S32MADD XRa, XRd, Rs, Rt + * S32STD XRa, Rb, s12 S32MADDU XRa, XRd, Rs, Rt + * S32LDDV XRa, Rb, rc, strd2 S32MSUB XRa, XRd, Rs, Rt + * S32STDV XRa, Rb, rc, strd2 S32MSUBU XRa, XRd, Rs, Rt + * S32LDI XRa, Rb, s12 S32MUL XRa, XRd, Rs, Rt + * S32SDI XRa, Rb, s12 S32MULU XRa, XRd, Rs, Rt + * S32LDIV XRa, Rb, rc, strd2 D16MUL XRa, XRb, XRc, XRd, optn2 + * S32SDIV XRa, Rb, rc, strd2 D16MULE XRa, XRb, XRc, optn2 + * S32LDDR XRa, Rb, s12 D16MULF XRa, XRb, XRc, optn2 + * S32STDR XRa, Rb, s12 D16MAC XRa, XRb, XRc, XRd, aptn2, optn2 + * S32LDDVR XRa, Rb, rc, strd2 D16MACE XRa, XRb, XRc, XRd, aptn2, optn2 + * S32STDVR XRa, Rb, rc, strd2 D16MACF XRa, XRb, XRc, XRd, aptn2, optn2 + * S32LDIR XRa, Rb, s12 D16MADL XRa, XRb, XRc, XRd, aptn2, optn2 + * S32SDIR XRa, Rb, s12 S16MAD XRa, XRb, XRc, XRd, aptn1, optn2 + * S32LDIVR XRa, Rb, rc, strd2 Q8MUL XRa, XRb, XRc, XRd + * S32SDIVR XRa, Rb, rc, strd2 Q8MULSU XRa, XRb, XRc, XRd + * S16LDD XRa, Rb, s10, eptn2 Q8MAC XRa, XRb, XRc, XRd, aptn2 + * S16STD XRa, Rb, s10, eptn2 Q8MACSU XRa, XRb, XRc, XRd, aptn2 + * S16LDI XRa, Rb, s10, eptn2 Q8MADL XRa, XRb, XRc, XRd, aptn2 + * S16SDI XRa, Rb, s10, eptn2 + * S8LDD XRa, Rb, s8, eptn3 + * S8STD XRa, Rb, s8, eptn3 Addition and subtraction instructions + * S8LDI XRa, Rb, s8, eptn3 ------------------------------------- + * S8SDI XRa, Rb, s8, eptn3 + * LXW Rd, Rs, Rt, strd2 D32ADD XRa, XRb, XRc, XRd, eptn2 + * LXH Rd, Rs, Rt, strd2 D32ADDC XRa, XRb, XRc, XRd + * LXHU Rd, Rs, Rt, strd2 D32ACC XRa, XRb, XRc, XRd, eptn2 + * LXB Rd, Rs, Rt, strd2 D32ACCM XRa, XRb, XRc, XRd, eptn2 + * LXBU Rd, Rs, Rt, strd2 D32ASUM XRa, XRb, XRc, XRd, eptn2 + * S32CPS XRa, XRb, XRc + * Q16ADD XRa, XRb, XRc, XRd, eptn2, optn2 + * Comparison instructions Q16ACC XRa, XRb, XRc, XRd, eptn2 + * ----------------------- Q16ACCM XRa, XRb, XRc, XRd, eptn2 + * D16ASUM XRa, XRb, XRc, XRd, eptn2 + * S32MAX XRa, XRb, XRc D16CPS XRa, XRb, + * S32MIN XRa, XRb, XRc D16AVG XRa, XRb, XRc + * S32SLT XRa, XRb, XRc D16AVGR XRa, XRb, XRc + * S32MOVZ XRa, XRb, XRc Q8ADD XRa, XRb, XRc, eptn2 + * S32MOVN XRa, XRb, XRc Q8ADDE XRa, XRb, XRc, XRd, eptn2 + * D16MAX XRa, XRb, XRc Q8ACCE XRa, XRb, XRc, XRd, eptn2 + * D16MIN XRa, XRb, XRc Q8ABD XRa, XRb, XRc + * D16SLT XRa, XRb, XRc Q8SAD XRa, XRb, XRc, XRd + * D16MOVZ XRa, XRb, XRc Q8AVG XRa, XRb, XRc + * D16MOVN XRa, XRb, XRc Q8AVGR XRa, XRb, XRc + * Q8MAX XRa, XRb, XRc D8SUM XRa, XRb, XRc, XRd + * Q8MIN XRa, XRb, XRc D8SUMC XRa, XRb, XRc, XRd + * Q8SLT XRa, XRb, XRc + * Q8SLTU XRa, XRb, XRc + * Q8MOVZ XRa, XRb, XRc Shift instructions + * Q8MOVN XRa, XRb, XRc ------------------ + * + * D32SLL XRa, XRb, XRc, XRd, sft4 + * Bitwise instructions D32SLR XRa, XRb, XRc, XRd, sft4 + * -------------------- D32SAR XRa, XRb, XRc, XRd, sft4 + * D32SARL XRa, XRb, XRc, sft4 + * S32NOR XRa, XRb, XRc D32SLLV XRa, XRb, Rb + * S32AND XRa, XRb, XRc D32SLRV XRa, XRb, Rb + * S32XOR XRa, XRb, XRc D32SARV XRa, XRb, Rb + * S32OR XRa, XRb, XRc D32SARW XRa, XRb, XRc, Rb + * Q16SLL XRa, XRb, XRc, XRd, sft4 + * Q16SLR XRa, XRb, XRc, XRd, sft4 + * Miscellaneous instructions Q16SAR XRa, XRb, XRc, XRd, sft4 + * ------------------------- Q16SLLV XRa, XRb, Rb + * Q16SLRV XRa, XRb, Rb + * S32SFL XRa, XRb, XRc, XRd, optn2 Q16SARV XRa, XRb, Rb + * S32ALN XRa, XRb, XRc, Rb + * S32ALNI XRa, XRb, XRc, s3 + * S32LUI XRa, s8, optn3 Move instructions + * S32EXTR XRa, XRb, Rb, bits5 ----------------- + * S32EXTRV XRa, XRb, Rs, Rt + * Q16SCOP XRa, XRb, XRc, XRd S32M2I XRa, Rb + * Q16SAT XRa, XRb, XRc S32I2M XRa, Rb + * + * + * bits + * 05..00 + * + * ┌─ 000000 ─ OPC_MXU_S32MADD + * ├─ 000001 ─ OPC_MXU_S32MADDU + * ├─ 000010 ─ <not assigned> (non-MXU OPC_MUL) + * │ + * │ 20..18 + * ├─ 000011 ─ OPC_MXU__POOL00 ─┬─ 000 ─ OPC_MXU_S32MAX + * │ ├─ 001 ─ OPC_MXU_S32MIN + * │ ├─ 010 ─ OPC_MXU_D16MAX + * │ ├─ 011 ─ OPC_MXU_D16MIN + * │ ├─ 100 ─ OPC_MXU_Q8MAX + * │ ├─ 101 ─ OPC_MXU_Q8MIN + * │ ├─ 110 ─ OPC_MXU_Q8SLT + * │ └─ 111 ─ OPC_MXU_Q8SLTU + * ├─ 000100 ─ OPC_MXU_S32MSUB + * ├─ 000101 ─ OPC_MXU_S32MSUBU 20..18 + * ├─ 000110 ─ OPC_MXU__POOL01 ─┬─ 000 ─ OPC_MXU_S32SLT + * │ ├─ 001 ─ OPC_MXU_D16SLT + * │ ├─ 010 ─ OPC_MXU_D16AVG + * │ ├─ 011 ─ OPC_MXU_D16AVGR + * │ ├─ 100 ─ OPC_MXU_Q8AVG + * │ ├─ 101 ─ OPC_MXU_Q8AVGR + * │ └─ 111 ─ OPC_MXU_Q8ADD + * │ + * │ 20..18 + * ├─ 000111 ─ OPC_MXU__POOL02 ─┬─ 000 ─ OPC_MXU_S32CPS + * │ ├─ 010 ─ OPC_MXU_D16CPS + * │ ├─ 100 ─ OPC_MXU_Q8ABD + * │ └─ 110 ─ OPC_MXU_Q16SAT + * ├─ 001000 ─ OPC_MXU_D16MUL + * │ 25..24 + * ├─ 001001 ─ OPC_MXU__POOL03 ─┬─ 00 ─ OPC_MXU_D16MULF + * │ └─ 01 ─ OPC_MXU_D16MULE + * ├─ 001010 ─ OPC_MXU_D16MAC + * ├─ 001011 ─ OPC_MXU_D16MACF + * ├─ 001100 ─ OPC_MXU_D16MADL + * ├─ 001101 ─ OPC_MXU_S16MAD + * ├─ 001110 ─ OPC_MXU_Q16ADD + * ├─ 001111 ─ OPC_MXU_D16MACE 23 + * │ ┌─ 0 ─ OPC_MXU_S32LDD + * ├─ 010000 ─ OPC_MXU__POOL04 ─┴─ 1 ─ OPC_MXU_S32LDDR + * │ + * │ 23 + * ├─ 010001 ─ OPC_MXU__POOL05 ─┬─ 0 ─ OPC_MXU_S32STD + * │ └─ 1 ─ OPC_MXU_S32STDR + * │ + * │ 13..10 + * ├─ 010010 ─ OPC_MXU__POOL06 ─┬─ 0000 ─ OPC_MXU_S32LDDV + * │ └─ 0001 ─ OPC_MXU_S32LDDVR + * │ + * │ 13..10 + * ├─ 010011 ─ OPC_MXU__POOL07 ─┬─ 0000 ─ OPC_MXU_S32STDV + * │ └─ 0001 ─ OPC_MXU_S32STDVR + * │ + * │ 23 + * ├─ 010100 ─ OPC_MXU__POOL08 ─┬─ 0 ─ OPC_MXU_S32LDI + * │ └─ 1 ─ OPC_MXU_S32LDIR + * │ + * │ 23 + * ├─ 010101 ─ OPC_MXU__POOL09 ─┬─ 0 ─ OPC_MXU_S32SDI + * │ └─ 1 ─ OPC_MXU_S32SDIR + * │ + * │ 13..10 + * ├─ 010110 ─ OPC_MXU__POOL10 ─┬─ 0000 ─ OPC_MXU_S32LDIV + * │ └─ 0001 ─ OPC_MXU_S32LDIVR + * │ + * │ 13..10 + * ├─ 010111 ─ OPC_MXU__POOL11 ─┬─ 0000 ─ OPC_MXU_S32SDIV + * │ └─ 0001 ─ OPC_MXU_S32SDIVR + * ├─ 011000 ─ OPC_MXU_D32ADD + * │ 23..22 + * MXU ├─ 011001 ─ OPC_MXU__POOL12 ─┬─ 00 ─ OPC_MXU_D32ACC + * opcodes ─┤ ├─ 01 ─ OPC_MXU_D32ACCM + * │ └─ 10 ─ OPC_MXU_D32ASUM + * ├─ 011010 ─ <not assigned> + * │ 23..22 + * ├─ 011011 ─ OPC_MXU__POOL13 ─┬─ 00 ─ OPC_MXU_Q16ACC + * │ ├─ 01 ─ OPC_MXU_Q16ACCM + * │ └─ 10 ─ OPC_MXU_Q16ASUM + * │ + * │ 23..22 + * ├─ 011100 ─ OPC_MXU__POOL14 ─┬─ 00 ─ OPC_MXU_Q8ADDE + * │ ├─ 01 ─ OPC_MXU_D8SUM + * ├─ 011101 ─ OPC_MXU_Q8ACCE └─ 10 ─ OPC_MXU_D8SUMC + * ├─ 011110 ─ <not assigned> + * ├─ 011111 ─ <not assigned> + * ├─ 100000 ─ <not assigned> (overlaps with CLZ) + * ├─ 100001 ─ <not assigned> (overlaps with CLO) + * ├─ 100010 ─ OPC_MXU_S8LDD + * ├─ 100011 ─ OPC_MXU_S8STD 15..14 + * ├─ 100100 ─ OPC_MXU_S8LDI ┌─ 00 ─ OPC_MXU_S32MUL + * ├─ 100101 ─ OPC_MXU_S8SDI ├─ 00 ─ OPC_MXU_S32MULU + * │ ├─ 00 ─ OPC_MXU_S32EXTR + * ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 00 ─ OPC_MXU_S32EXTRV + * │ + * │ 20..18 + * ├─ 100111 ─ OPC_MXU__POOL16 ─┬─ 000 ─ OPC_MXU_D32SARW + * │ ├─ 001 ─ OPC_MXU_S32ALN + * ├─ 101000 ─ OPC_MXU_LXB ├─ 010 ─ OPC_MXU_S32ALNI + * ├─ 101001 ─ <not assigned> ├─ 011 ─ OPC_MXU_S32NOR + * ├─ 101010 ─ OPC_MXU_S16LDD ├─ 100 ─ OPC_MXU_S32AND + * ├─ 101011 ─ OPC_MXU_S16STD ├─ 101 ─ OPC_MXU_S32OR + * ├─ 101100 ─ OPC_MXU_S16LDI ├─ 110 ─ OPC_MXU_S32XOR + * ├─ 101101 ─ OPC_MXU_S16SDI └─ 111 ─ OPC_MXU_S32LUI + * ├─ 101110 ─ OPC_MXU_S32M2I + * ├─ 101111 ─ OPC_MXU_S32I2M + * ├─ 110000 ─ OPC_MXU_D32SLL + * ├─ 110001 ─ OPC_MXU_D32SLR 20..18 + * ├─ 110010 ─ OPC_MXU_D32SARL ┌─ 000 ─ OPC_MXU_D32SLLV + * ├─ 110011 ─ OPC_MXU_D32SAR ├─ 001 ─ OPC_MXU_D32SLRV + * ├─ 110100 ─ OPC_MXU_Q16SLL ├─ 010 ─ OPC_MXU_D32SARV + * ├─ 110101 ─ OPC_MXU_Q16SLR ├─ 011 ─ OPC_MXU_Q16SLLV + * │ ├─ 100 ─ OPC_MXU_Q16SLRV + * ├─ 110110 ─ OPC_MXU__POOL17 ─┴─ 101 ─ OPC_MXU_Q16SARV + * │ + * ├─ 110111 ─ OPC_MXU_Q16SAR + * │ 23..22 + * ├─ 111000 ─ OPC_MXU__POOL18 ─┬─ 00 ─ OPC_MXU_Q8MUL + * │ └─ 01 ─ OPC_MXU_Q8MULSU + * │ + * │ 20..18 + * ├─ 111001 ─ OPC_MXU__POOL19 ─┬─ 000 ─ OPC_MXU_Q8MOVZ + * │ ├─ 001 ─ OPC_MXU_Q8MOVN + * │ ├─ 010 ─ OPC_MXU_D16MOVZ + * │ ├─ 011 ─ OPC_MXU_D16MOVN + * │ ├─ 100 ─ OPC_MXU_S32MOVZ + * │ └─ 101 ─ OPC_MXU_S32MOV + * │ + * │ 23..22 + * ├─ 111010 ─ OPC_MXU__POOL20 ─┬─ 00 ─ OPC_MXU_Q8MAC + * │ └─ 10 ─ OPC_MXU_Q8MACSU + * ├─ 111011 ─ OPC_MXU_Q16SCOP + * ├─ 111100 ─ OPC_MXU_Q8MADL + * ├─ 111101 ─ OPC_MXU_S32SFL + * ├─ 111110 ─ OPC_MXU_Q8SAD + * └─ 111111 ─ <not assigned> (overlaps with SDBBP) + * + * + * Compiled after: + * + * "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit + * Programming Manual", Ingenic Semiconductor Co, Ltd., 2017 + */ + +enum { + OPC_MXU_S32MADD = 0x00, + OPC_MXU_S32MADDU = 0x01, + OPC__MXU_MUL = 0x02, + OPC_MXU__POOL00 = 0x03, + OPC_MXU_S32MSUB = 0x04, + OPC_MXU_S32MSUBU = 0x05, + OPC_MXU__POOL01 = 0x06, + OPC_MXU__POOL02 = 0x07, + OPC_MXU_D16MUL = 0x08, + OPC_MXU__POOL03 = 0x09, + OPC_MXU_D16MAC = 0x0A, + OPC_MXU_D16MACF = 0x0B, + OPC_MXU_D16MADL = 0x0C, + OPC_MXU_S16MAD = 0x0D, + OPC_MXU_Q16ADD = 0x0E, + OPC_MXU_D16MACE = 0x0F, + OPC_MXU__POOL04 = 0x10, + OPC_MXU__POOL05 = 0x11, + OPC_MXU__POOL06 = 0x12, + OPC_MXU__POOL07 = 0x13, + OPC_MXU__POOL08 = 0x14, + OPC_MXU__POOL09 = 0x15, + OPC_MXU__POOL10 = 0x16, + OPC_MXU__POOL11 = 0x17, + OPC_MXU_D32ADD = 0x18, + OPC_MXU__POOL12 = 0x19, + /* not assigned 0x1A */ + OPC_MXU__POOL13 = 0x1B, + OPC_MXU__POOL14 = 0x1C, + OPC_MXU_Q8ACCE = 0x1D, + /* not assigned 0x1E */ + /* not assigned 0x1F */ + /* not assigned 0x20 */ + /* not assigned 0x21 */ + OPC_MXU_S8LDD = 0x22, + OPC_MXU_S8STD = 0x23, + OPC_MXU_S8LDI = 0x24, + OPC_MXU_S8SDI = 0x25, + OPC_MXU__POOL15 = 0x26, + OPC_MXU__POOL16 = 0x27, + OPC_MXU_LXB = 0x28, + /* not assigned 0x29 */ + OPC_MXU_S16LDD = 0x2A, + OPC_MXU_S16STD = 0x2B, + OPC_MXU_S16LDI = 0x2C, + OPC_MXU_S16SDI = 0x2D, + OPC_MXU_S32M2I = 0x2E, + OPC_MXU_S32I2M = 0x2F, + OPC_MXU_D32SLL = 0x30, + OPC_MXU_D32SLR = 0x31, + OPC_MXU_D32SARL = 0x32, + OPC_MXU_D32SAR = 0x33, + OPC_MXU_Q16SLL = 0x34, + OPC_MXU_Q16SLR = 0x35, + OPC_MXU__POOL17 = 0x36, + OPC_MXU_Q16SAR = 0x37, + OPC_MXU__POOL18 = 0x38, + OPC_MXU__POOL19 = 0x39, + OPC_MXU__POOL20 = 0x3A, + OPC_MXU_Q16SCOP = 0x3B, + OPC_MXU_Q8MADL = 0x3C, + OPC_MXU_S32SFL = 0x3D, + OPC_MXU_Q8SAD = 0x3E, + /* not assigned 0x3F */ +}; + + +/* + * MXU pool 00 + */ +enum { + OPC_MXU_S32MAX = 0x00, + OPC_MXU_S32MIN = 0x01, + OPC_MXU_D16MAX = 0x02, + OPC_MXU_D16MIN = 0x03, + OPC_MXU_Q8MAX = 0x04, + OPC_MXU_Q8MIN = 0x05, + OPC_MXU_Q8SLT = 0x06, + OPC_MXU_Q8SLTU = 0x07, +}; + +/* + * MXU pool 01 + */ +enum { + OPC_MXU_S32SLT = 0x00, + OPC_MXU_D16SLT = 0x01, + OPC_MXU_D16AVG = 0x02, + OPC_MXU_D16AVGR = 0x03, + OPC_MXU_Q8AVG = 0x04, + OPC_MXU_Q8AVGR = 0x05, + OPC_MXU_Q8ADD = 0x07, +}; + +/* + * MXU pool 02 + */ +enum { + OPC_MXU_S32CPS = 0x00, + OPC_MXU_D16CPS = 0x02, + OPC_MXU_Q8ABD = 0x04, + OPC_MXU_Q16SAT = 0x06, +}; + +/* + * MXU pool 03 + */ +enum { + OPC_MXU_D16MULF = 0x00, + OPC_MXU_D16MULE = 0x01, +}; + +/* + * MXU pool 04 + */ +enum { + OPC_MXU_S32LDD = 0x00, + OPC_MXU_S32LDDR = 0x01, +}; + +/* + * MXU pool 05 + */ +enum { + OPC_MXU_S32STD = 0x00, + OPC_MXU_S32STDR = 0x01, +}; + +/* + * MXU pool 06 + */ +enum { + OPC_MXU_S32LDDV = 0x00, + OPC_MXU_S32LDDVR = 0x01, +}; + +/* + * MXU pool 07 + */ +enum { + OPC_MXU_S32STDV = 0x00, + OPC_MXU_S32STDVR = 0x01, +}; + +/* + * MXU pool 08 + */ +enum { + OPC_MXU_S32LDI = 0x00, + OPC_MXU_S32LDIR = 0x01, +}; + +/* + * MXU pool 09 + */ +enum { + OPC_MXU_S32SDI = 0x00, + OPC_MXU_S32SDIR = 0x01, +}; + +/* + * MXU pool 10 + */ +enum { + OPC_MXU_S32LDIV = 0x00, + OPC_MXU_S32LDIVR = 0x01, +}; + +/* + * MXU pool 11 + */ +enum { + OPC_MXU_S32SDIV = 0x00, + OPC_MXU_S32SDIVR = 0x01, +}; + +/* + * MXU pool 12 + */ +enum { + OPC_MXU_D32ACC = 0x00, + OPC_MXU_D32ACCM = 0x01, + OPC_MXU_D32ASUM = 0x02, +}; + +/* + * MXU pool 13 + */ +enum { + OPC_MXU_Q16ACC = 0x00, + OPC_MXU_Q16ACCM = 0x01, + OPC_MXU_Q16ASUM = 0x02, +}; + +/* + * MXU pool 14 + */ +enum { + OPC_MXU_Q8ADDE = 0x00, + OPC_MXU_D8SUM = 0x01, + OPC_MXU_D8SUMC = 0x02, +}; + +/* + * MXU pool 15 + */ +enum { + OPC_MXU_S32MUL = 0x00, + OPC_MXU_S32MULU = 0x01, + OPC_MXU_S32EXTR = 0x02, + OPC_MXU_S32EXTRV = 0x03, +}; + +/* + * MXU pool 16 + */ +enum { + OPC_MXU_D32SARW = 0x00, + OPC_MXU_S32ALN = 0x01, + OPC_MXU_S32ALNI = 0x02, + OPC_MXU_S32NOR = 0x03, + OPC_MXU_S32AND = 0x04, + OPC_MXU_S32OR = 0x05, + OPC_MXU_S32XOR = 0x06, + OPC_MXU_S32LUI = 0x07, +}; + +/* + * MXU pool 17 + */ +enum { + OPC_MXU_D32SLLV = 0x00, + OPC_MXU_D32SLRV = 0x01, + OPC_MXU_D32SARV = 0x03, + OPC_MXU_Q16SLLV = 0x04, + OPC_MXU_Q16SLRV = 0x05, + OPC_MXU_Q16SARV = 0x07, +}; + +/* + * MXU pool 18 + */ +enum { + OPC_MXU_Q8MUL = 0x00, + OPC_MXU_Q8MULSU = 0x01, +}; + +/* + * MXU pool 19 + */ +enum { + OPC_MXU_Q8MOVZ = 0x00, + OPC_MXU_Q8MOVN = 0x01, + OPC_MXU_D16MOVZ = 0x02, + OPC_MXU_D16MOVN = 0x03, + OPC_MXU_S32MOVZ = 0x04, + OPC_MXU_S32MOVN = 0x05, +}; + +/* + * MXU pool 20 + */ +enum { + OPC_MXU_Q8MAC = 0x00, + OPC_MXU_Q8MACSU = 0x01, +}; + +/* + * Overview of the TX79-specific instruction set + * ============================================= + * + * The R5900 and the C790 have 128-bit wide GPRs, where the upper 64 bits + * are only used by the specific quadword (128-bit) LQ/SQ load/store + * instructions and certain multimedia instructions (MMIs). These MMIs + * configure the 128-bit data path as two 64-bit, four 32-bit, eight 16-bit + * or sixteen 8-bit paths. + * + * Reference: + * + * The Toshiba TX System RISC TX79 Core Architecture manual, + * https://wiki.qemu.org/File:C790.pdf + * + * Three-Operand Multiply and Multiply-Add (4 instructions) + * -------------------------------------------------------- + * MADD [rd,] rs, rt Multiply/Add + * MADDU [rd,] rs, rt Multiply/Add Unsigned + * MULT [rd,] rs, rt Multiply (3-operand) + * MULTU [rd,] rs, rt Multiply Unsigned (3-operand) + * + * Multiply Instructions for Pipeline 1 (10 instructions) + * ------------------------------------------------------ + * MULT1 [rd,] rs, rt Multiply Pipeline 1 + * MULTU1 [rd,] rs, rt Multiply Unsigned Pipeline 1 + * DIV1 rs, rt Divide Pipeline 1 + * DIVU1 rs, rt Divide Unsigned Pipeline 1 + * MADD1 [rd,] rs, rt Multiply-Add Pipeline 1 + * MADDU1 [rd,] rs, rt Multiply-Add Unsigned Pipeline 1 + * MFHI1 rd Move From HI1 Register + * MFLO1 rd Move From LO1 Register + * MTHI1 rs Move To HI1 Register + * MTLO1 rs Move To LO1 Register + * + * Arithmetic (19 instructions) + * ---------------------------- + * PADDB rd, rs, rt Parallel Add Byte + * PSUBB rd, rs, rt Parallel Subtract Byte + * PADDH rd, rs, rt Parallel Add Halfword + * PSUBH rd, rs, rt Parallel Subtract Halfword + * PADDW rd, rs, rt Parallel Add Word + * PSUBW rd, rs, rt Parallel Subtract Word + * PADSBH rd, rs, rt Parallel Add/Subtract Halfword + * PADDSB rd, rs, rt Parallel Add with Signed Saturation Byte + * PSUBSB rd, rs, rt Parallel Subtract with Signed Saturation Byte + * PADDSH rd, rs, rt Parallel Add with Signed Saturation Halfword + * PSUBSH rd, rs, rt Parallel Subtract with Signed Saturation Halfword + * PADDSW rd, rs, rt Parallel Add with Signed Saturation Word + * PSUBSW rd, rs, rt Parallel Subtract with Signed Saturation Word + * PADDUB rd, rs, rt Parallel Add with Unsigned saturation Byte + * PSUBUB rd, rs, rt Parallel Subtract with Unsigned saturation Byte + * PADDUH rd, rs, rt Parallel Add with Unsigned saturation Halfword + * PSUBUH rd, rs, rt Parallel Subtract with Unsigned saturation Halfword + * PADDUW rd, rs, rt Parallel Add with Unsigned saturation Word + * PSUBUW rd, rs, rt Parallel Subtract with Unsigned saturation Word + * + * Min/Max (4 instructions) + * ------------------------ + * PMAXH rd, rs, rt Parallel Maximum Halfword + * PMINH rd, rs, rt Parallel Minimum Halfword + * PMAXW rd, rs, rt Parallel Maximum Word + * PMINW rd, rs, rt Parallel Minimum Word + * + * Absolute (2 instructions) + * ------------------------- + * PABSH rd, rt Parallel Absolute Halfword + * PABSW rd, rt Parallel Absolute Word + * + * Logical (4 instructions) + * ------------------------ + * PAND rd, rs, rt Parallel AND + * POR rd, rs, rt Parallel OR + * PXOR rd, rs, rt Parallel XOR + * PNOR rd, rs, rt Parallel NOR + * + * Shift (9 instructions) + * ---------------------- + * PSLLH rd, rt, sa Parallel Shift Left Logical Halfword + * PSRLH rd, rt, sa Parallel Shift Right Logical Halfword + * PSRAH rd, rt, sa Parallel Shift Right Arithmetic Halfword + * PSLLW rd, rt, sa Parallel Shift Left Logical Word + * PSRLW rd, rt, sa Parallel Shift Right Logical Word + * PSRAW rd, rt, sa Parallel Shift Right Arithmetic Word + * PSLLVW rd, rt, rs Parallel Shift Left Logical Variable Word + * PSRLVW rd, rt, rs Parallel Shift Right Logical Variable Word + * PSRAVW rd, rt, rs Parallel Shift Right Arithmetic Variable Word + * + * Compare (6 instructions) + * ------------------------ + * PCGTB rd, rs, rt Parallel Compare for Greater Than Byte + * PCEQB rd, rs, rt Parallel Compare for Equal Byte + * PCGTH rd, rs, rt Parallel Compare for Greater Than Halfword + * PCEQH rd, rs, rt Parallel Compare for Equal Halfword + * PCGTW rd, rs, rt Parallel Compare for Greater Than Word + * PCEQW rd, rs, rt Parallel Compare for Equal Word + * + * LZC (1 instruction) + * ------------------- + * PLZCW rd, rs Parallel Leading Zero or One Count Word + * + * Quadword Load and Store (2 instructions) + * ---------------------------------------- + * LQ rt, offset(base) Load Quadword + * SQ rt, offset(base) Store Quadword + * + * Multiply and Divide (19 instructions) + * ------------------------------------- + * PMULTW rd, rs, rt Parallel Multiply Word + * PMULTUW rd, rs, rt Parallel Multiply Unsigned Word + * PDIVW rs, rt Parallel Divide Word + * PDIVUW rs, rt Parallel Divide Unsigned Word + * PMADDW rd, rs, rt Parallel Multiply-Add Word + * PMADDUW rd, rs, rt Parallel Multiply-Add Unsigned Word + * PMSUBW rd, rs, rt Parallel Multiply-Subtract Word + * PMULTH rd, rs, rt Parallel Multiply Halfword + * PMADDH rd, rs, rt Parallel Multiply-Add Halfword + * PMSUBH rd, rs, rt Parallel Multiply-Subtract Halfword + * PHMADH rd, rs, rt Parallel Horizontal Multiply-Add Halfword + * PHMSBH rd, rs, rt Parallel Horizontal Multiply-Subtract Halfword + * PDIVBW rs, rt Parallel Divide Broadcast Word + * PMFHI rd Parallel Move From HI Register + * PMFLO rd Parallel Move From LO Register + * PMTHI rs Parallel Move To HI Register + * PMTLO rs Parallel Move To LO Register + * PMFHL rd Parallel Move From HI/LO Register + * PMTHL rs Parallel Move To HI/LO Register + * + * Pack/Extend (11 instructions) + * ----------------------------- + * PPAC5 rd, rt Parallel Pack to 5 bits + * PPACB rd, rs, rt Parallel Pack to Byte + * PPACH rd, rs, rt Parallel Pack to Halfword + * PPACW rd, rs, rt Parallel Pack to Word + * PEXT5 rd, rt Parallel Extend Upper from 5 bits + * PEXTUB rd, rs, rt Parallel Extend Upper from Byte + * PEXTLB rd, rs, rt Parallel Extend Lower from Byte + * PEXTUH rd, rs, rt Parallel Extend Upper from Halfword + * PEXTLH rd, rs, rt Parallel Extend Lower from Halfword + * PEXTUW rd, rs, rt Parallel Extend Upper from Word + * PEXTLW rd, rs, rt Parallel Extend Lower from Word + * + * Others (16 instructions) + * ------------------------ + * PCPYH rd, rt Parallel Copy Halfword + * PCPYLD rd, rs, rt Parallel Copy Lower Doubleword + * PCPYUD rd, rs, rt Parallel Copy Upper Doubleword + * PREVH rd, rt Parallel Reverse Halfword + * PINTH rd, rs, rt Parallel Interleave Halfword + * PINTEH rd, rs, rt Parallel Interleave Even Halfword + * PEXEH rd, rt Parallel Exchange Even Halfword + * PEXCH rd, rt Parallel Exchange Center Halfword + * PEXEW rd, rt Parallel Exchange Even Word + * PEXCW rd, rt Parallel Exchange Center Word + * QFSRV rd, rs, rt Quadword Funnel Shift Right Variable + * MFSA rd Move from Shift Amount Register + * MTSA rs Move to Shift Amount Register + * MTSAB rs, immediate Move Byte Count to Shift Amount Register + * MTSAH rs, immediate Move Halfword Count to Shift Amount Register + * PROT3W rd, rt Parallel Rotate 3 Words + * + * The TX79-specific Multimedia Instruction encodings + * ================================================== + * + * TX79 Multimedia Instruction encoding table keys: + * + * * This code is reserved for future use. An attempt to execute it + * causes a Reserved Instruction exception. + * % This code indicates an instruction class. The instruction word + * must be further decoded by examining additional tables that show + * the values for other instruction fields. + * # This code is reserved for the unsupported instructions DMULT, + * DMULTU, DDIV, DDIVU, LL, LLD, SC, SCD, LWC2 and SWC2. An attempt + * to execute it causes a Reserved Instruction exception. + * + * TX79 Multimedia Instructions encoded by opcode field (MMI, LQ, SQ): + * + * 31 26 0 + * +--------+----------------------------------------+ + * | opcode | | + * +--------+----------------------------------------+ + * + * opcode bits 28..26 + * bits | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 + * 31..29 | 000 | 001 | 010 | 011 | 100 | 101 | 110 | 111 + * -------+-------+-------+-------+-------+-------+-------+-------+------- + * 0 000 |SPECIAL| REGIMM| J | JAL | BEQ | BNE | BLEZ | BGTZ + * 1 001 | ADDI | ADDIU | SLTI | SLTIU | ANDI | ORI | XORI | LUI + * 2 010 | COP0 | COP1 | * | * | BEQL | BNEL | BLEZL | BGTZL + * 3 011 | DADDI | DADDIU| LDL | LDR | MMI% | * | LQ | SQ + * 4 100 | LB | LH | LWL | LW | LBU | LHU | LWR | LWU + * 5 101 | SB | SH | SWL | SW | SDL | SDR | SWR | CACHE + * 6 110 | # | LWC1 | # | PREF | # | LDC1 | # | LD + * 7 111 | # | SWC1 | # | * | # | SDC1 | # | SD + */ + +enum { + TX79_CLASS_MMI = 0x1C << 26, /* Same as OPC_SPECIAL2 */ + TX79_LQ = 0x1E << 26, /* Same as OPC_MSA */ + TX79_SQ = 0x1F << 26, /* Same as OPC_SPECIAL3 */ +}; + +/* + * TX79 Multimedia Instructions with opcode field = MMI: + * + * 31 26 5 0 + * +--------+-------------------------------+--------+ + * | MMI | |function| + * +--------+-------------------------------+--------+ + * + * function bits 2..0 + * bits | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 + * 5..3 | 000 | 001 | 010 | 011 | 100 | 101 | 110 | 111 + * -------+-------+-------+-------+-------+-------+-------+-------+------- + * 0 000 | MADD | MADDU | * | * | PLZCW | * | * | * + * 1 001 | MMI0% | MMI2% | * | * | * | * | * | * + * 2 010 | MFHI1 | MTHI1 | MFLO1 | MTLO1 | * | * | * | * + * 3 011 | MULT1 | MULTU1| DIV1 | DIVU1 | * | * | * | * + * 4 100 | MADD1 | MADDU1| * | * | * | * | * | * + * 5 101 | MMI1% | MMI3% | * | * | * | * | * | * + * 6 110 | PMFHL | PMTHL | * | * | PSLLH | * | PSRLH | PSRAH + * 7 111 | * | * | * | * | PSLLW | * | PSRLW | PSRAW + */ + +#define MASK_TX79_MMI(op) (MASK_OP_MAJOR(op) | ((op) & 0x3F)) +enum { + TX79_MMI_MADD = 0x00 | TX79_CLASS_MMI, /* Same as OPC_MADD */ + TX79_MMI_MADDU = 0x01 | TX79_CLASS_MMI, /* Same as OPC_MADDU */ + TX79_MMI_PLZCW = 0x04 | TX79_CLASS_MMI, + TX79_MMI_CLASS_MMI0 = 0x08 | TX79_CLASS_MMI, + TX79_MMI_CLASS_MMI2 = 0x09 | TX79_CLASS_MMI, + TX79_MMI_MFHI1 = 0x10 | TX79_CLASS_MMI, /* Same minor as OPC_MFHI */ + TX79_MMI_MTHI1 = 0x11 | TX79_CLASS_MMI, /* Same minor as OPC_MTHI */ + TX79_MMI_MFLO1 = 0x12 | TX79_CLASS_MMI, /* Same minor as OPC_MFLO */ + TX79_MMI_MTLO1 = 0x13 | TX79_CLASS_MMI, /* Same minor as OPC_MTLO */ + TX79_MMI_MULT1 = 0x18 | TX79_CLASS_MMI, /* Same minor as OPC_MULT */ + TX79_MMI_MULTU1 = 0x19 | TX79_CLASS_MMI, /* Same minor as OPC_MULTU */ + TX79_MMI_DIV1 = 0x1A | TX79_CLASS_MMI, /* Same minor as OPC_DIV */ + TX79_MMI_DIVU1 = 0x1B | TX79_CLASS_MMI, /* Same minor as OPC_DIVU */ + TX79_MMI_MADD1 = 0x20 | TX79_CLASS_MMI, + TX79_MMI_MADDU1 = 0x21 | TX79_CLASS_MMI, + TX79_MMI_CLASS_MMI1 = 0x28 | TX79_CLASS_MMI, + TX79_MMI_CLASS_MMI3 = 0x29 | TX79_CLASS_MMI, + TX79_MMI_PMFHL = 0x30 | TX79_CLASS_MMI, + TX79_MMI_PMTHL = 0x31 | TX79_CLASS_MMI, + TX79_MMI_PSLLH = 0x34 | TX79_CLASS_MMI, + TX79_MMI_PSRLH = 0x36 | TX79_CLASS_MMI, + TX79_MMI_PSRAH = 0x37 | TX79_CLASS_MMI, + TX79_MMI_PSLLW = 0x3C | TX79_CLASS_MMI, + TX79_MMI_PSRLW = 0x3E | TX79_CLASS_MMI, + TX79_MMI_PSRAW = 0x3F | TX79_CLASS_MMI, +}; + +/* + * TX79 Multimedia Instructions with opcode field = MMI and bits 5..0 = MMI0: + * + * 31 26 10 6 5 0 + * +--------+----------------------+--------+--------+ + * | MMI | |function| MMI0 | + * +--------+----------------------+--------+--------+ + * + * function bits 7..6 + * bits | 0 | 1 | 2 | 3 + * 10..8 | 00 | 01 | 10 | 11 + * -------+-------+-------+-------+------- + * 0 000 | PADDW | PSUBW | PCGTW | PMAXW + * 1 001 | PADDH | PSUBH | PCGTH | PMAXH + * 2 010 | PADDB | PSUBB | PCGTB | * + * 3 011 | * | * | * | * + * 4 100 | PADDSW| PSUBSW| PEXTLW| PPACW + * 5 101 | PADDSH| PSUBSH| PEXTLH| PPACH + * 6 110 | PADDSB| PSUBSB| PEXTLB| PPACB + * 7 111 | * | * | PEXT5 | PPAC5 + */ + +#define MASK_TX79_MMI0(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF)) +enum { + TX79_MMI0_PADDW = (0x00 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PSUBW = (0x01 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PCGTW = (0x02 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PMAXW = (0x03 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PADDH = (0x04 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PSUBH = (0x05 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PCGTH = (0x06 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PMAXH = (0x07 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PADDB = (0x08 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PSUBB = (0x09 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PCGTB = (0x0A << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PADDSW = (0x10 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PSUBSW = (0x11 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PEXTLW = (0x12 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PPACW = (0x13 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PADDSH = (0x14 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PSUBSH = (0x15 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PEXTLH = (0x16 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PPACH = (0x17 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PADDSB = (0x18 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PSUBSB = (0x19 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PEXTLB = (0x1A << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PPACB = (0x1B << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PEXT5 = (0x1E << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PPAC5 = (0x1F << 6) | TX79_MMI_CLASS_MMI0, +}; + +/* + * TX79 Multimedia Instructions with opcode field = MMI and bits 5..0 = MMI1: + * + * 31 26 10 6 5 0 + * +--------+----------------------+--------+--------+ + * | MMI | |function| MMI1 | + * +--------+----------------------+--------+--------+ + * + * function bits 7..6 + * bits | 0 | 1 | 2 | 3 + * 10..8 | 00 | 01 | 10 | 11 + * -------+-------+-------+-------+------- + * 0 000 | * | PABSW | PCEQW | PMINW + * 1 001 | PADSBH| PABSH | PCEQH | PMINH + * 2 010 | * | * | PCEQB | * + * 3 011 | * | * | * | * + * 4 100 | PADDUW| PSUBUW| PEXTUW| * + * 5 101 | PADDUH| PSUBUH| PEXTUH| * + * 6 110 | PADDUB| PSUBUB| PEXTUB| QFSRV + * 7 111 | * | * | * | * + */ + +#define MASK_TX79_MMI1(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF)) +enum { + TX79_MMI1_PABSW = (0x01 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PCEQW = (0x02 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PMINW = (0x03 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PADSBH = (0x04 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PABSH = (0x05 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PCEQH = (0x06 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PMINH = (0x07 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PCEQB = (0x0A << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PADDUW = (0x10 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PSUBUW = (0x11 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PEXTUW = (0x12 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PADDUH = (0x14 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PSUBUH = (0x15 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PEXTUH = (0x16 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PADDUB = (0x18 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PSUBUB = (0x19 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PEXTUB = (0x1A << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_QFSRV = (0x1B << 6) | TX79_MMI_CLASS_MMI1, +}; + +/* + * TX79 Multimedia Instructions with opcode field = MMI and bits 5..0 = MMI2: + * + * 31 26 10 6 5 0 + * +--------+----------------------+--------+--------+ + * | MMI | |function| MMI2 | + * +--------+----------------------+--------+--------+ + * + * function bits 7..6 + * bits | 0 | 1 | 2 | 3 + * 10..8 | 00 | 01 | 10 | 11 + * -------+-------+-------+-------+------- + * 0 000 | PMADDW| * | PSLLVW| PSRLVW + * 1 001 | PMSUBW| * | * | * + * 2 010 | PMFHI | PMFLO | PINTH | * + * 3 011 | PMULTW| PDIVW | PCPYLD| * + * 4 100 | PMADDH| PHMADH| PAND | PXOR + * 5 101 | PMSUBH| PHMSBH| * | * + * 6 110 | * | * | PEXEH | PREVH + * 7 111 | PMULTH| PDIVBW| PEXEW | PROT3W + */ + +#define MASK_TX79_MMI2(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF)) +enum { + TX79_MMI2_PMADDW = (0x00 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PSLLVW = (0x02 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PSRLVW = (0x03 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PMSUBW = (0x04 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PMFHI = (0x08 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PMFLO = (0x09 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PINTH = (0x0A << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PMULTW = (0x0C << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PDIVW = (0x0D << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PCPYLD = (0x0E << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PMADDH = (0x10 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PHMADH = (0x11 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PAND = (0x12 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PXOR = (0x13 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PMSUBH = (0x14 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PHMSBH = (0x15 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PEXEH = (0x1A << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PREVH = (0x1B << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PMULTH = (0x1C << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PDIVBW = (0x1D << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PEXEW = (0x1E << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PROT3W = (0x1F << 6) | TX79_MMI_CLASS_MMI2, +}; + +/* + * TX79 Multimedia Instructions with opcode field = MMI and bits 5..0 = MMI3: + * + * 31 26 10 6 5 0 + * +--------+----------------------+--------+--------+ + * | MMI | |function| MMI3 | + * +--------+----------------------+--------+--------+ + * + * function bits 7..6 + * bits | 0 | 1 | 2 | 3 + * 10..8 | 00 | 01 | 10 | 11 + * -------+-------+-------+-------+------- + * 0 000 |PMADDUW| * | * | PSRAVW + * 1 001 | * | * | * | * + * 2 010 | PMTHI | PMTLO | PINTEH| * + * 3 011 |PMULTUW| PDIVUW| PCPYUD| * + * 4 100 | * | * | POR | PNOR + * 5 101 | * | * | * | * + * 6 110 | * | * | PEXCH | PCPYH + * 7 111 | * | * | PEXCW | * + */ + +#define MASK_TX79_MMI3(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF)) +enum { + TX79_MMI3_PMADDUW = (0x00 << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PSRAVW = (0x03 << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PMTHI = (0x08 << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PMTLO = (0x09 << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PINTEH = (0x0A << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PMULTUW = (0x0C << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PDIVUW = (0x0D << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PCPYUD = (0x0E << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_POR = (0x12 << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PNOR = (0x13 << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PEXCH = (0x1A << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PCPYH = (0x1B << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PEXCW = (0x1E << 6) | TX79_MMI_CLASS_MMI3, +}; + /* global register indices */ static TCGv cpu_gpr[32], cpu_PC; static TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC]; @@ -1398,6 +2421,10 @@ static TCGv_i32 fpu_fcr0, fpu_fcr31; static TCGv_i64 fpu_f64[32]; static TCGv_i64 msa_wr_d[64]; +/* MXU registers */ +static TCGv mxu_gpr[NUMBER_OF_MXU_REGISTERS - 1]; +static TCGv mxu_CR; + #include "exec/gen-icount.h" #define gen_helper_0e0i(name, arg) do { \ @@ -1447,8 +2474,9 @@ typedef struct DisasContext { target_ulong saved_pc; target_ulong page_start; uint32_t opcode; - int insn_flags; + uint64_t insn_flags; int32_t CP0_Config1; + int32_t CP0_Config2; int32_t CP0_Config3; int32_t CP0_Config5; /* Routine used to access memory */ @@ -1519,6 +2547,11 @@ static const char * const msaregnames[] = { "w30.d0", "w30.d1", "w31.d0", "w31.d1", }; +static const char * const mxuregnames[] = { + "XR1", "XR2", "XR3", "XR4", "XR5", "XR6", "XR7", "XR8", + "XR9", "XR10", "XR11", "XR12", "XR13", "XR14", "XR15", "MXU_CR", +}; + #define LOG_DISAS(...) \ do { \ if (MIPS_DEBUG_DISAS) { \ @@ -1600,6 +2633,36 @@ static inline void gen_store_srsgpr (int from, int to) } } +/* MXU General purpose registers moves. */ +static inline void gen_load_mxu_gpr(TCGv t, unsigned int reg) +{ + if (reg == 0) { + tcg_gen_movi_tl(t, 0); + } else if (reg <= 15) { + tcg_gen_mov_tl(t, mxu_gpr[reg - 1]); + } +} + +static inline void gen_store_mxu_gpr(TCGv t, unsigned int reg) +{ + if (reg > 0 && reg <= 15) { + tcg_gen_mov_tl(mxu_gpr[reg - 1], t); + } +} + +/* MXU control register moves. */ +static inline void gen_load_mxu_cr(TCGv t) +{ + tcg_gen_mov_tl(t, mxu_CR); +} + +static inline void gen_store_mxu_cr(TCGv t) +{ + /* TODO: Add handling of RW rules for MXU_CR. */ + tcg_gen_mov_tl(mxu_CR, t); +} + + /* Tests */ static inline void gen_save_pc(target_ulong pc) { @@ -1857,9 +2920,20 @@ static inline void check_dsp(DisasContext *ctx) } } -static inline void check_dspr2(DisasContext *ctx) +static inline void check_dsp_r2(DisasContext *ctx) +{ + if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSP_R2))) { + if (ctx->insn_flags & ASE_DSP) { + generate_exception_end(ctx, EXCP_DSPDIS); + } else { + generate_exception_end(ctx, EXCP_RI); + } + } +} + +static inline void check_dsp_r3(DisasContext *ctx) { - if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSPR2))) { + if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSP_R3))) { if (ctx->insn_flags & ASE_DSP) { generate_exception_end(ctx, EXCP_DSPDIS); } else { @@ -1870,7 +2944,7 @@ static inline void check_dspr2(DisasContext *ctx) /* This code generates a "reserved instruction" exception if the CPU does not support the instruction set corresponding to flags. */ -static inline void check_insn(DisasContext *ctx, int flags) +static inline void check_insn(DisasContext *ctx, uint64_t flags) { if (unlikely(!(ctx->insn_flags & flags))) { generate_exception_end(ctx, EXCP_RI); @@ -1880,13 +2954,28 @@ static inline void check_insn(DisasContext *ctx, int flags) /* This code generates a "reserved instruction" exception if the CPU has corresponding flag set which indicates that the instruction has been removed. */ -static inline void check_insn_opc_removed(DisasContext *ctx, int flags) +static inline void check_insn_opc_removed(DisasContext *ctx, uint64_t flags) { if (unlikely(ctx->insn_flags & flags)) { generate_exception_end(ctx, EXCP_RI); } } +/* + * The Linux kernel traps certain reserved instruction exceptions to + * emulate the corresponding instructions. QEMU is the kernel in user + * mode, so those traps are emulated by accepting the instructions. + * + * A reserved instruction exception is generated for flagged CPUs if + * QEMU runs in system mode. + */ +static inline void check_insn_opc_user_only(DisasContext *ctx, uint64_t flags) +{ +#ifndef CONFIG_USER_ONLY + check_insn_opc_removed(ctx, flags); +#endif +} + /* This code generates a "reserved instruction" exception if the CPU does not support 64-bit paired-single (PS) floating point data type */ static inline void check_ps(DisasContext *ctx) @@ -1927,6 +3016,19 @@ static inline void check_xnp(DisasContext *ctx) } } +#ifndef CONFIG_USER_ONLY +/* + * This code generates a "reserved instruction" exception if the + * Config3 PW bit is NOT set. + */ +static inline void check_pw(DisasContext *ctx) +{ + if (unlikely(!(ctx->CP0_Config3 & (1 << CP0C3_PW)))) { + generate_exception_end(ctx, EXCP_RI); + } +} +#endif + /* * This code generates a "reserved instruction" exception if the * Config3 MT bit is NOT set. @@ -1968,6 +3070,35 @@ static inline void check_nms(DisasContext *ctx) } } +/* + * This code generates a "reserved instruction" exception if the + * Config5 NMS bit is set, and Config1 DL, Config1 IL, Config2 SL, + * Config2 TL, and Config5 L2C are unset. + */ +static inline void check_nms_dl_il_sl_tl_l2c(DisasContext *ctx) +{ + if (unlikely(ctx->CP0_Config5 & (1 << CP0C5_NMS)) && + !(ctx->CP0_Config1 & (1 << CP0C1_DL)) && + !(ctx->CP0_Config1 & (1 << CP0C1_IL)) && + !(ctx->CP0_Config2 & (1 << CP0C2_SL)) && + !(ctx->CP0_Config2 & (1 << CP0C2_TL)) && + !(ctx->CP0_Config5 & (1 << CP0C5_L2C))) + { + generate_exception_end(ctx, EXCP_RI); + } +} + +/* + * This code generates a "reserved instruction" exception if the + * Config5 EVA bit is NOT set. + */ +static inline void check_eva(DisasContext *ctx) +{ + if (unlikely(!(ctx->CP0_Config5 & (1 << CP0C5_EVA)))) { + generate_exception_end(ctx, EXCP_RI); + } +} + /* Define small wrappers for gen_load_fpr* so that we have a uniform calling interface for 32 and 64-bit FPRs. No sense in changing @@ -3231,17 +4362,21 @@ static void gen_shift(DisasContext *ctx, uint32_t opc, /* Arithmetic on HI/LO registers */ static void gen_HILO(DisasContext *ctx, uint32_t opc, int acc, int reg) { - if (reg == 0 && (opc == OPC_MFHI || opc == OPC_MFLO)) { + if (reg == 0 && (opc == OPC_MFHI || opc == TX79_MMI_MFHI1 || + opc == OPC_MFLO || opc == TX79_MMI_MFLO1)) { /* Treat as NOP. */ return; } if (acc != 0) { - check_dsp(ctx); + if (!(ctx->insn_flags & INSN_R5900)) { + check_dsp(ctx); + } } switch (opc) { case OPC_MFHI: + case TX79_MMI_MFHI1: #if defined(TARGET_MIPS64) if (acc != 0) { tcg_gen_ext32s_tl(cpu_gpr[reg], cpu_HI[acc]); @@ -3252,6 +4387,7 @@ static void gen_HILO(DisasContext *ctx, uint32_t opc, int acc, int reg) } break; case OPC_MFLO: + case TX79_MMI_MFLO1: #if defined(TARGET_MIPS64) if (acc != 0) { tcg_gen_ext32s_tl(cpu_gpr[reg], cpu_LO[acc]); @@ -3262,6 +4398,7 @@ static void gen_HILO(DisasContext *ctx, uint32_t opc, int acc, int reg) } break; case OPC_MTHI: + case TX79_MMI_MTHI1: if (reg != 0) { #if defined(TARGET_MIPS64) if (acc != 0) { @@ -3276,6 +4413,7 @@ static void gen_HILO(DisasContext *ctx, uint32_t opc, int acc, int reg) } break; case OPC_MTLO: + case TX79_MMI_MTLO1: if (reg != 0) { #if defined(TARGET_MIPS64) if (acc != 0) { @@ -3588,11 +4726,14 @@ static void gen_muldiv(DisasContext *ctx, uint32_t opc, gen_load_gpr(t1, rt); if (acc != 0) { - check_dsp(ctx); + if (!(ctx->insn_flags & INSN_R5900)) { + check_dsp(ctx); + } } switch (opc) { case OPC_DIV: + case TX79_MMI_DIV1: { TCGv t2 = tcg_temp_new(); TCGv t3 = tcg_temp_new(); @@ -3614,6 +4755,7 @@ static void gen_muldiv(DisasContext *ctx, uint32_t opc, } break; case OPC_DIVU: + case TX79_MMI_DIVU1: { TCGv t2 = tcg_const_tl(0); TCGv t3 = tcg_const_tl(1); @@ -3768,6 +4910,84 @@ static void gen_muldiv(DisasContext *ctx, uint32_t opc, tcg_temp_free(t1); } +/* + * These MULT and MULTU instructions implemented in for example the + * Toshiba/Sony R5900 and the Toshiba TX19, TX39 and TX79 core + * architectures are special three-operand variants with the syntax + * + * MULT[U][1] rd, rs, rt + * + * such that + * + * (rd, LO, HI) <- rs * rt + * + * where the low-order 32-bits of the result is placed into both the + * GPR rd and the special register LO. The high-order 32-bits of the + * result is placed into the special register HI. + * + * If the GPR rd is omitted in assembly language, it is taken to be 0, + * which is the zero register that always reads as 0. + */ +static void gen_mul_txx9(DisasContext *ctx, uint32_t opc, + int rd, int rs, int rt) +{ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + int acc = 0; + + gen_load_gpr(t0, rs); + gen_load_gpr(t1, rt); + + switch (opc) { + case TX79_MMI_MULT1: + acc = 1; + /* Fall through */ + case OPC_MULT: + { + TCGv_i32 t2 = tcg_temp_new_i32(); + TCGv_i32 t3 = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(t2, t0); + tcg_gen_trunc_tl_i32(t3, t1); + tcg_gen_muls2_i32(t2, t3, t2, t3); + if (rd) { + tcg_gen_ext_i32_tl(cpu_gpr[rd], t2); + } + tcg_gen_ext_i32_tl(cpu_LO[acc], t2); + tcg_gen_ext_i32_tl(cpu_HI[acc], t3); + tcg_temp_free_i32(t2); + tcg_temp_free_i32(t3); + } + break; + case TX79_MMI_MULTU1: + acc = 1; + /* Fall through */ + case OPC_MULTU: + { + TCGv_i32 t2 = tcg_temp_new_i32(); + TCGv_i32 t3 = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(t2, t0); + tcg_gen_trunc_tl_i32(t3, t1); + tcg_gen_mulu2_i32(t2, t3, t2, t3); + if (rd) { + tcg_gen_ext_i32_tl(cpu_gpr[rd], t2); + } + tcg_gen_ext_i32_tl(cpu_LO[acc], t2); + tcg_gen_ext_i32_tl(cpu_HI[acc], t3); + tcg_temp_free_i32(t2); + tcg_temp_free_i32(t3); + } + break; + default: + MIPS_INVAL("mul TXx9"); + generate_exception_end(ctx, EXCP_RI); + goto out; + } + + out: + tcg_temp_free(t0); + tcg_temp_free(t1); +} + static void gen_mul_vr54xx (DisasContext *ctx, uint32_t opc, int rd, int rs, int rt) { @@ -5537,6 +6757,21 @@ static void gen_mfc0(DisasContext *ctx, TCGv arg, int reg, int sel) tcg_gen_ext32s_tl(arg, arg); rn = "SegCtl2"; break; + case 5: + check_pw(ctx); + gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWBase)); + rn = "PWBase"; + break; + case 6: + check_pw(ctx); + gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWField)); + rn = "PWField"; + break; + case 7: + check_pw(ctx); + gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWSize)); + rn = "PWSize"; + break; default: goto cp0_unimplemented; } @@ -5572,6 +6807,11 @@ static void gen_mfc0(DisasContext *ctx, TCGv arg, int reg, int sel) gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf4)); rn = "SRSConf4"; break; + case 6: + check_pw(ctx); + gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWCtl)); + rn = "PWCtl"; + break; default: goto cp0_unimplemented; } @@ -6238,6 +7478,21 @@ static void gen_mtc0(DisasContext *ctx, TCGv arg, int reg, int sel) gen_helper_mtc0_segctl2(cpu_env, arg); rn = "SegCtl2"; break; + case 5: + check_pw(ctx); + gen_mtc0_store32(arg, offsetof(CPUMIPSState, CP0_PWBase)); + rn = "PWBase"; + break; + case 6: + check_pw(ctx); + gen_helper_mtc0_pwfield(cpu_env, arg); + rn = "PWField"; + break; + case 7: + check_pw(ctx); + gen_helper_mtc0_pwsize(cpu_env, arg); + rn = "PWSize"; + break; default: goto cp0_unimplemented; } @@ -6273,6 +7528,11 @@ static void gen_mtc0(DisasContext *ctx, TCGv arg, int reg, int sel) gen_helper_mtc0_srsconf4(cpu_env, arg); rn = "SRSConf4"; break; + case 6: + check_pw(ctx); + gen_helper_mtc0_pwctl(cpu_env, arg); + rn = "PWCtl"; + break; default: goto cp0_unimplemented; } @@ -6948,6 +8208,21 @@ static void gen_dmfc0(DisasContext *ctx, TCGv arg, int reg, int sel) tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_SegCtl2)); rn = "SegCtl2"; break; + case 5: + check_pw(ctx); + tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWBase)); + rn = "PWBase"; + break; + case 6: + check_pw(ctx); + tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWField)); + rn = "PWField"; + break; + case 7: + check_pw(ctx); + tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWSize)); + rn = "PWSize"; + break; default: goto cp0_unimplemented; } @@ -6983,6 +8258,11 @@ static void gen_dmfc0(DisasContext *ctx, TCGv arg, int reg, int sel) gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf4)); rn = "SRSConf4"; break; + case 6: + check_pw(ctx); + gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWCtl)); + rn = "PWCtl"; + break; default: goto cp0_unimplemented; } @@ -7631,6 +8911,21 @@ static void gen_dmtc0(DisasContext *ctx, TCGv arg, int reg, int sel) gen_helper_mtc0_segctl2(cpu_env, arg); rn = "SegCtl2"; break; + case 5: + check_pw(ctx); + tcg_gen_st_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWBase)); + rn = "PWBase"; + break; + case 6: + check_pw(ctx); + gen_helper_mtc0_pwfield(cpu_env, arg); + rn = "PWField"; + break; + case 7: + check_pw(ctx); + gen_helper_mtc0_pwsize(cpu_env, arg); + rn = "PWSize"; + break; default: goto cp0_unimplemented; } @@ -7666,6 +8961,11 @@ static void gen_dmtc0(DisasContext *ctx, TCGv arg, int reg, int sel) gen_helper_mtc0_srsconf4(cpu_env, arg); rn = "SRSConf4"; break; + case 6: + check_pw(ctx); + gen_helper_mtc0_pwctl(cpu_env, arg); + rn = "PWCtl"; + break; default: goto cp0_unimplemented; } @@ -14999,15 +16299,15 @@ static void decode_micromips32_opc(CPUMIPSState *env, DisasContext *ctx) case 0x38: /* cmovs */ switch ((ctx->opcode >> 6) & 0x7) { - case MOVN_FMT: /* SELNEZ_FMT */ + case MOVN_FMT: /* SELEQZ_FMT */ if (ctx->insn_flags & ISA_MIPS32R6) { - /* SELNEZ_FMT */ + /* SELEQZ_FMT */ switch ((ctx->opcode >> 9) & 0x3) { case FMT_SDPS_S: - gen_sel_s(ctx, OPC_SELNEZ_S, rd, rt, rs); + gen_sel_s(ctx, OPC_SELEQZ_S, rd, rt, rs); break; case FMT_SDPS_D: - gen_sel_d(ctx, OPC_SELNEZ_D, rd, rt, rs); + gen_sel_d(ctx, OPC_SELEQZ_D, rd, rt, rs); break; default: goto pool32f_invalid; @@ -15021,15 +16321,15 @@ static void decode_micromips32_opc(CPUMIPSState *env, DisasContext *ctx) check_insn_opc_removed(ctx, ISA_MIPS32R6); FINSN_3ARG_SDPS(MOVN); break; - case MOVZ_FMT: /* SELEQZ_FMT */ + case MOVZ_FMT: /* SELNEZ_FMT */ if (ctx->insn_flags & ISA_MIPS32R6) { - /* SELEQZ_FMT */ + /* SELNEZ_FMT */ switch ((ctx->opcode >> 9) & 0x3) { case FMT_SDPS_S: - gen_sel_s(ctx, OPC_SELEQZ_S, rd, rt, rs); + gen_sel_s(ctx, OPC_SELNEZ_S, rd, rt, rs); break; case FMT_SDPS_D: - gen_sel_d(ctx, OPC_SELEQZ_D, rd, rt, rs); + gen_sel_d(ctx, OPC_SELNEZ_D, rd, rt, rs); break; default: goto pool32f_invalid; @@ -16285,6 +17585,16 @@ enum { NM_SOV = 0x7a, }; +/* CRC32 instruction pool */ +enum { + NM_CRC32B = 0x00, + NM_CRC32H = 0x01, + NM_CRC32W = 0x02, + NM_CRC32CB = 0x04, + NM_CRC32CH = 0x05, + NM_CRC32CW = 0x06, +}; + /* POOL32A5 instruction pool */ enum { NM_CMP_EQ_PH = 0x00, @@ -16488,6 +17798,40 @@ enum { NM_P_SC = 0x0b, }; +/* P.LS.E0 instruction pool */ +enum { + NM_LBE = 0x00, + NM_SBE = 0x01, + NM_LBUE = 0x02, + NM_P_PREFE = 0x03, + NM_LHE = 0x04, + NM_SHE = 0x05, + NM_LHUE = 0x06, + NM_CACHEE = 0x07, + NM_LWE = 0x08, + NM_SWE = 0x09, + NM_P_LLE = 0x0a, + NM_P_SCE = 0x0b, +}; + +/* P.PREFE instruction pool */ +enum { + NM_SYNCIE = 0x00, + NM_PREFE = 0x01, +}; + +/* P.LLE instruction pool */ +enum { + NM_LLE = 0x00, + NM_LLWPE = 0x01, +}; + +/* P.SCE instruction pool */ +enum { + NM_SCE = 0x00, + NM_SCWPE = 0x01, +}; + /* P.LS.WM instruction pool */ enum { NM_LWM = 0x00, @@ -17444,7 +18788,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, case NM_POOL32AXF_2_0_7: switch (extract32(ctx->opcode, 9, 3)) { case NM_DPA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpa_w_ph(t0, v1, v0, cpu_env); break; case NM_DPAQ_S_W_PH: @@ -17452,7 +18796,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, gen_helper_dpaq_s_w_ph(t0, v1, v0, cpu_env); break; case NM_DPS_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dps_w_ph(t0, v1, v0, cpu_env); break; case NM_DPSQ_S_W_PH: @@ -17467,7 +18811,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, case NM_POOL32AXF_2_8_15: switch (extract32(ctx->opcode, 9, 3)) { case NM_DPAX_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpax_w_ph(t0, v0, v1, cpu_env); break; case NM_DPAQ_SA_L_W: @@ -17475,7 +18819,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, gen_helper_dpaq_sa_l_w(t0, v0, v1, cpu_env); break; case NM_DPSX_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpsx_w_ph(t0, v0, v1, cpu_env); break; case NM_DPSQ_SA_L_W: @@ -17494,7 +18838,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, gen_helper_dpau_h_qbl(t0, v0, v1, cpu_env); break; case NM_DPAQX_S_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpaqx_s_w_ph(t0, v0, v1, cpu_env); break; case NM_DPSU_H_QBL: @@ -17502,11 +18846,11 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, gen_helper_dpsu_h_qbl(t0, v0, v1, cpu_env); break; case NM_DPSQX_S_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpsqx_s_w_ph(t0, v0, v1, cpu_env); break; case NM_MULSA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_mulsa_w_ph(t0, v0, v1, cpu_env); break; default: @@ -17521,7 +18865,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, gen_helper_dpau_h_qbr(t0, v1, v0, cpu_env); break; case NM_DPAQX_SA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpaqx_sa_w_ph(t0, v1, v0, cpu_env); break; case NM_DPSU_H_QBR: @@ -17529,7 +18873,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, gen_helper_dpsu_h_qbr(t0, v1, v0, cpu_env); break; case NM_DPSQX_SA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpsqx_sa_w_ph(t0, v1, v0, cpu_env); break; case NM_MULSAQ_S_W_PH: @@ -17571,7 +18915,7 @@ static void gen_pool32axf_2_nanomips_insn(DisasContext *ctx, uint32_t opc, gen_pool32axf_2_multiply(ctx, opc, v0_t, v1_t, rd); break; case NM_BALIGN: - check_dspr2(ctx); + check_dsp_r2(ctx); if (rt != 0) { gen_load_gpr(t0, rs); rd &= 3; @@ -17801,7 +19145,7 @@ static void gen_pool32axf_4_nanomips_insn(DisasContext *ctx, uint32_t opc, switch (opc) { case NM_ABSQ_S_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_absq_s_qb(v0_t, v0_t, cpu_env); gen_store_gpr(v0_t, ret); break; @@ -17940,7 +19284,7 @@ static void gen_pool32axf_7_nanomips_insn(DisasContext *ctx, uint32_t opc, switch (opc) { case NM_SHRA_R_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); tcg_gen_movi_tl(t0, rd >> 2); switch (extract32(ctx->opcode, 12, 1)) { case 0: @@ -17956,7 +19300,7 @@ static void gen_pool32axf_7_nanomips_insn(DisasContext *ctx, uint32_t opc, } break; case NM_SHRL_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); tcg_gen_movi_tl(t0, rd >> 1); gen_helper_shrl_ph(t0, t0, rs_t); gen_store_gpr(t0, rt); @@ -18881,19 +20225,19 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, gen_store_gpr(v1_t, ret); break; case NM_CMPGDU_EQ_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgu_eq_qb(v1_t, v1_t, v2_t); tcg_gen_deposit_tl(cpu_dspctrl, cpu_dspctrl, v1_t, 24, 4); gen_store_gpr(v1_t, ret); break; case NM_CMPGDU_LT_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgu_lt_qb(v1_t, v1_t, v2_t); tcg_gen_deposit_tl(cpu_dspctrl, cpu_dspctrl, v1_t, 24, 4); gen_store_gpr(v1_t, ret); break; case NM_CMPGDU_LE_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgu_le_qb(v1_t, v1_t, v2_t); tcg_gen_deposit_tl(cpu_dspctrl, cpu_dspctrl, v1_t, 24, 4); gen_store_gpr(v1_t, ret); @@ -18949,7 +20293,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_ADDQH_R_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* ADDQH_PH */ @@ -18964,7 +20308,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_ADDQH_R_W: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* ADDQH_W */ @@ -18994,7 +20338,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_ADDU_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* ADDU_PH */ @@ -19009,7 +20353,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_ADDUH_R_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* ADDUH_QB */ @@ -19039,7 +20383,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_SHRAV_R_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* SHRAV_QB */ @@ -19069,7 +20413,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_SUBQH_R_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* SUBQH_PH */ @@ -19084,7 +20428,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_SUBQH_R_W: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* SUBQH_W */ @@ -19114,7 +20458,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_SUBU_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* SUBU_PH */ @@ -19129,7 +20473,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_SUBUH_R_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* SUBUH_QB */ @@ -19159,7 +20503,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_PRECR_SRA_R_PH_W: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* PRECR_SRA_PH_W */ @@ -19199,22 +20543,22 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, gen_store_gpr(v1_t, ret); break; case NM_MULQ_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_mulq_s_ph(v1_t, v1_t, v2_t, cpu_env); gen_store_gpr(v1_t, ret); break; case NM_MULQ_RS_W: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_mulq_rs_w(v1_t, v1_t, v2_t, cpu_env); gen_store_gpr(v1_t, ret); break; case NM_MULQ_S_W: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_mulq_s_w(v1_t, v1_t, v2_t, cpu_env); gen_store_gpr(v1_t, ret); break; case NM_APPEND: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_load_gpr(t0, rs); if (rd != 0) { tcg_gen_deposit_tl(cpu_gpr[rt], t0, cpu_gpr[rt], rd, 32 - rd); @@ -19232,7 +20576,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, gen_store_gpr(v1_t, ret); break; case NM_SHRLV_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shrl_ph(v1_t, v1_t, v2_t); gen_store_gpr(v1_t, ret); break; @@ -19274,7 +20618,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, gen_store_gpr(v1_t, ret); break; case NM_MUL_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* MUL_PH */ @@ -19289,7 +20633,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_PRECR_QB_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_precr_qb_ph(v1_t, v1_t, v2_t); gen_store_gpr(v1_t, ret); break; @@ -19326,8 +20670,8 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, case 0: /* SHRA_PH */ gen_helper_shra_ph(v1_t, t0, v1_t); - break; gen_store_gpr(v1_t, rt); + break; case 1: /* SHRA_R_PH */ gen_helper_shra_r_ph(v1_t, t0, v1_t); @@ -19984,6 +21328,107 @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx) break; } break; + case NM_P_LS_E0: + switch (extract32(ctx->opcode, 11, 4)) { + case NM_LBE: + check_eva(ctx); + check_cp0_enabled(ctx); + gen_ld(ctx, OPC_LBE, rt, rs, s); + break; + case NM_SBE: + check_eva(ctx); + check_cp0_enabled(ctx); + gen_st(ctx, OPC_SBE, rt, rs, s); + break; + case NM_LBUE: + check_eva(ctx); + check_cp0_enabled(ctx); + gen_ld(ctx, OPC_LBUE, rt, rs, s); + break; + case NM_P_PREFE: + if (rt == 31) { + /* case NM_SYNCIE */ + check_eva(ctx); + check_cp0_enabled(ctx); + /* Break the TB to be able to sync copied instructions + immediately */ + ctx->base.is_jmp = DISAS_STOP; + } else { + /* case NM_PREFE */ + check_eva(ctx); + check_cp0_enabled(ctx); + /* Treat as NOP. */ + } + break; + case NM_LHE: + check_eva(ctx); + check_cp0_enabled(ctx); + gen_ld(ctx, OPC_LHE, rt, rs, s); + break; + case NM_SHE: + check_eva(ctx); + check_cp0_enabled(ctx); + gen_st(ctx, OPC_SHE, rt, rs, s); + break; + case NM_LHUE: + check_eva(ctx); + check_cp0_enabled(ctx); + gen_ld(ctx, OPC_LHUE, rt, rs, s); + break; + case NM_CACHEE: + check_nms_dl_il_sl_tl_l2c(ctx); + gen_cache_operation(ctx, rt, rs, s); + break; + case NM_LWE: + check_eva(ctx); + check_cp0_enabled(ctx); + gen_ld(ctx, OPC_LWE, rt, rs, s); + break; + case NM_SWE: + check_eva(ctx); + check_cp0_enabled(ctx); + gen_st(ctx, OPC_SWE, rt, rs, s); + break; + case NM_P_LLE: + switch (extract32(ctx->opcode, 2, 2)) { + case NM_LLE: + check_xnp(ctx); + check_eva(ctx); + check_cp0_enabled(ctx); + gen_ld(ctx, OPC_LLE, rt, rs, s); + break; + case NM_LLWPE: + check_xnp(ctx); + check_eva(ctx); + check_cp0_enabled(ctx); + gen_llwp(ctx, rs, 0, rt, extract32(ctx->opcode, 3, 5)); + break; + default: + generate_exception_end(ctx, EXCP_RI); + break; + } + break; + case NM_P_SCE: + switch (extract32(ctx->opcode, 2, 2)) { + case NM_SCE: + check_xnp(ctx); + check_eva(ctx); + check_cp0_enabled(ctx); + gen_st_cond(ctx, OPC_SCE, rt, rs, s); + break; + case NM_SCWPE: + check_xnp(ctx); + check_eva(ctx); + check_cp0_enabled(ctx); + gen_scwp(ctx, rs, 0, rt, extract32(ctx->opcode, 3, 5)); + break; + default: + generate_exception_end(ctx, EXCP_RI); + break; + } + break; + } + break; case NM_P_LS_WM: case NM_P_LS_UAWM: check_nms(ctx); @@ -20098,7 +21543,7 @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx) gen_compute_branch_cp1_nm(ctx, OPC_BC1NEZ, rt, s); break; case NM_BPOSGE32C: - check_dspr2(ctx); + check_dsp_r3(ctx); { int32_t imm = extract32(ctx->opcode, 1, 13) | extract32(ctx->opcode, 0, 1) << 13; @@ -20607,7 +22052,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, switch (op1) { /* OPC_MULT_G_2E is equal OPC_ADDUH_QB_DSP */ case OPC_MULT_G_2E: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (op2) { case OPC_ADDUH_QB: gen_helper_adduh_qb(cpu_gpr[ret], v1_t, v2_t); @@ -20650,7 +22095,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, case OPC_ABSQ_S_PH_DSP: switch (op2) { case OPC_ABSQ_S_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_absq_s_qb(cpu_gpr[ret], v2_t, cpu_env); break; case OPC_ABSQ_S_PH: @@ -20729,11 +22174,11 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_addu_s_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_ADDU_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_addu_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_ADDU_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_addu_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_SUBQ_PH: @@ -20757,11 +22202,11 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_subu_s_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_SUBU_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_subu_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_SUBU_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_subu_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_ADDSC: @@ -20785,7 +22230,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, case OPC_CMPU_EQ_QB_DSP: switch (op2) { case OPC_PRECR_QB_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_precr_qb_ph(cpu_gpr[ret], v1_t, v2_t); break; case OPC_PRECRQ_QB_PH: @@ -20793,7 +22238,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_precrq_qb_ph(cpu_gpr[ret], v1_t, v2_t); break; case OPC_PRECR_SRA_PH_W: - check_dspr2(ctx); + check_dsp_r2(ctx); { TCGv_i32 sa_t = tcg_const_i32(v2); gen_helper_precr_sra_ph_w(cpu_gpr[ret], sa_t, v1_t, @@ -20802,7 +22247,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, break; } case OPC_PRECR_SRA_R_PH_W: - check_dspr2(ctx); + check_dsp_r2(ctx); { TCGv_i32 sa_t = tcg_const_i32(v2); gen_helper_precr_sra_r_ph_w(cpu_gpr[ret], sa_t, v1_t, @@ -20884,7 +22329,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_preceu_qh_obra(cpu_gpr[ret], v2_t); break; case OPC_ABSQ_S_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_absq_s_ob(cpu_gpr[ret], v2_t, cpu_env); break; case OPC_ABSQ_S_PW: @@ -20928,19 +22373,19 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_subu_s_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_SUBU_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_subu_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_SUBU_S_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_subu_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_SUBUH_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_subuh_ob(cpu_gpr[ret], v1_t, v2_t); break; case OPC_SUBUH_R_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_subuh_r_ob(cpu_gpr[ret], v1_t, v2_t); break; case OPC_ADDQ_PW: @@ -20968,19 +22413,19 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_addu_s_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_ADDU_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_addu_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_ADDU_S_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_addu_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_ADDUH_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_adduh_ob(cpu_gpr[ret], v1_t, v2_t); break; case OPC_ADDUH_R_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_adduh_r_ob(cpu_gpr[ret], v1_t, v2_t); break; } @@ -20988,11 +22433,11 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, case OPC_CMPU_EQ_OB_DSP: switch (op2) { case OPC_PRECR_OB_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_precr_ob_qh(cpu_gpr[ret], v1_t, v2_t); break; case OPC_PRECR_SRA_QH_PW: - check_dspr2(ctx); + check_dsp_r2(ctx); { TCGv_i32 ret_t = tcg_const_i32(ret); gen_helper_precr_sra_qh_pw(v2_t, v1_t, v2_t, ret_t); @@ -21000,7 +22445,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, break; } case OPC_PRECR_SRA_R_QH_PW: - check_dspr2(ctx); + check_dsp_r2(ctx); { TCGv_i32 sa_v = tcg_const_i32(ret); gen_helper_precr_sra_r_qh_pw(v2_t, v1_t, v2_t, sa_v); @@ -21103,27 +22548,27 @@ static void gen_mipsdsp_shift(DisasContext *ctx, uint32_t opc, gen_helper_shrl_qb(cpu_gpr[ret], v1_t, v2_t); break; case OPC_SHRL_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shrl_ph(cpu_gpr[ret], t0, v2_t); break; case OPC_SHRLV_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shrl_ph(cpu_gpr[ret], v1_t, v2_t); break; case OPC_SHRA_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_qb(cpu_gpr[ret], t0, v2_t); break; case OPC_SHRA_R_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_r_qb(cpu_gpr[ret], t0, v2_t); break; case OPC_SHRAV_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_qb(cpu_gpr[ret], v1_t, v2_t); break; case OPC_SHRAV_R_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_r_qb(cpu_gpr[ret], v1_t, v2_t); break; case OPC_SHRA_PH: @@ -21202,19 +22647,19 @@ static void gen_mipsdsp_shift(DisasContext *ctx, uint32_t opc, gen_helper_shll_s_qh(cpu_gpr[ret], v2_t, v1_t, cpu_env); break; case OPC_SHRA_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_ob(cpu_gpr[ret], v2_t, t0); break; case OPC_SHRAV_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_ob(cpu_gpr[ret], v2_t, v1_t); break; case OPC_SHRA_R_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_r_ob(cpu_gpr[ret], v2_t, t0); break; case OPC_SHRAV_R_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_r_ob(cpu_gpr[ret], v2_t, v1_t); break; case OPC_SHRA_PW: @@ -21258,11 +22703,11 @@ static void gen_mipsdsp_shift(DisasContext *ctx, uint32_t opc, gen_helper_shrl_ob(cpu_gpr[ret], v2_t, v1_t); break; case OPC_SHRL_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shrl_qh(cpu_gpr[ret], v2_t, t0); break; case OPC_SHRLV_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shrl_qh(cpu_gpr[ret], v2_t, v1_t); break; default: /* Invalid */ @@ -21303,7 +22748,7 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, /* OPC_MULT_G_2E, OPC_ADDUH_QB_DSP, OPC_MUL_PH_DSP have * the same mask and op1. */ case OPC_MULT_G_2E: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (op2) { case OPC_MUL_PH: gen_helper_mul_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env); @@ -21338,11 +22783,11 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_dpsu_h_qbr(t0, v1_t, v2_t, cpu_env); break; case OPC_DPA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpa_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPAX_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpax_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPAQ_S_W_PH: @@ -21350,19 +22795,19 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_dpaq_s_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPAQX_S_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpaqx_s_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPAQX_SA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpaqx_sa_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPS_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dps_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPSX_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpsx_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPSQ_S_W_PH: @@ -21370,11 +22815,11 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_dpsq_s_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPSQX_S_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpsqx_s_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPSQX_SA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpsqx_sa_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_MULSAQ_S_W_PH: @@ -21406,7 +22851,7 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_maq_sa_w_phr(t0, v1_t, v2_t, cpu_env); break; case OPC_MULSA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_mulsa_w_ph(t0, v1_t, v2_t, cpu_env); break; } @@ -21435,7 +22880,7 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_dmsubu(v1_t, v2_t, t0, cpu_env); break; case OPC_DPA_W_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpa_w_qh(v1_t, v2_t, t0, cpu_env); break; case OPC_DPAQ_S_W_QH: @@ -21455,7 +22900,7 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_dpau_h_obr(v1_t, v2_t, t0, cpu_env); break; case OPC_DPS_W_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dps_w_qh(v1_t, v2_t, t0, cpu_env); break; case OPC_DPSQ_S_W_QH: @@ -21549,7 +22994,7 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_muleq_s_w_phr(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_MULQ_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_mulq_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; } @@ -21773,7 +23218,7 @@ static void gen_mipsdsp_add_cmp_pick(DisasContext *ctx, gen_helper_cmpgu_le_qb(cpu_gpr[ret], v1_t, v2_t); break; case OPC_CMPGDU_EQ_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgu_eq_qb(t1, v1_t, v2_t); tcg_gen_mov_tl(cpu_gpr[ret], t1); tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF); @@ -21781,7 +23226,7 @@ static void gen_mipsdsp_add_cmp_pick(DisasContext *ctx, tcg_gen_or_tl(cpu_dspctrl, cpu_dspctrl, t1); break; case OPC_CMPGDU_LT_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgu_lt_qb(t1, v1_t, v2_t); tcg_gen_mov_tl(cpu_gpr[ret], t1); tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF); @@ -21789,7 +23234,7 @@ static void gen_mipsdsp_add_cmp_pick(DisasContext *ctx, tcg_gen_or_tl(cpu_dspctrl, cpu_dspctrl, t1); break; case OPC_CMPGDU_LE_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgu_le_qb(t1, v1_t, v2_t); tcg_gen_mov_tl(cpu_gpr[ret], t1); tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF); @@ -21850,15 +23295,15 @@ static void gen_mipsdsp_add_cmp_pick(DisasContext *ctx, gen_helper_cmp_le_qh(v1_t, v2_t, cpu_env); break; case OPC_CMPGDU_EQ_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgdu_eq_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_CMPGDU_LT_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgdu_lt_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_CMPGDU_LE_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgdu_le_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_CMPGU_EQ_OB: @@ -21916,7 +23361,7 @@ static void gen_mipsdsp_append(CPUMIPSState *env, DisasContext *ctx, { TCGv t0; - check_dspr2(ctx); + check_dsp_r2(ctx); if (rt == 0) { /* Treat as NOP. */ @@ -22351,7 +23796,7 @@ static void decode_opc_special_legacy(CPUMIPSState *env, DisasContext *ctx) case OPC_MOVN: /* Conditional move */ case OPC_MOVZ: check_insn(ctx, ISA_MIPS4 | ISA_MIPS32 | - INSN_LOONGSON2E | INSN_LOONGSON2F); + INSN_LOONGSON2E | INSN_LOONGSON2F | INSN_R5900); gen_cond_move(ctx, op1, rd, rs, rt); break; case OPC_MFHI: /* Move from HI/LO */ @@ -22378,6 +23823,8 @@ static void decode_opc_special_legacy(CPUMIPSState *env, DisasContext *ctx) check_insn(ctx, INSN_VR54XX); op1 = MASK_MUL_VR54XX(ctx->opcode); gen_mul_vr54xx(ctx, op1, rd, rs, rt); + } else if (ctx->insn_flags & INSN_R5900) { + gen_mul_txx9(ctx, op1, rd, rs, rt); } else { gen_muldiv(ctx, op1, rd & 3, rs, rt); } @@ -22392,6 +23839,7 @@ static void decode_opc_special_legacy(CPUMIPSState *env, DisasContext *ctx) case OPC_DDIV: case OPC_DDIVU: check_insn(ctx, ISA_MIPS3); + check_insn_opc_user_only(ctx, INSN_R5900); check_mips_64(ctx); gen_muldiv(ctx, op1, 0, rs, rt); break; @@ -22624,6 +24072,1578 @@ static void decode_opc_special(CPUMIPSState *env, DisasContext *ctx) } } + +/* MXU accumulate add/subtract 1-bit pattern 'aptn1' */ +#define MXU_APTN1_A 0 +#define MXU_APTN1_S 1 + +/* MXU accumulate add/subtract 2-bit pattern 'aptn2' */ +#define MXU_APTN2_AA 0 +#define MXU_APTN2_AS 1 +#define MXU_APTN2_SA 2 +#define MXU_APTN2_SS 3 + +/* MXU execute add/subtract 2-bit pattern 'eptn2' */ +#define MXU_EPTN2_AA 0 +#define MXU_EPTN2_AS 1 +#define MXU_EPTN2_SA 2 +#define MXU_EPTN2_SS 3 + +/* MXU operand getting pattern 'optn2' */ +#define MXU_OPTN2_WW 0 +#define MXU_OPTN2_LW 1 +#define MXU_OPTN2_HW 2 +#define MXU_OPTN2_XW 3 + +/* MXU operand getting pattern 'optn3' */ +#define MXU_OPTN3_PTN0 0 +#define MXU_OPTN3_PTN1 1 +#define MXU_OPTN3_PTN2 2 +#define MXU_OPTN3_PTN3 3 +#define MXU_OPTN3_PTN4 4 +#define MXU_OPTN3_PTN5 5 +#define MXU_OPTN3_PTN6 6 +#define MXU_OPTN3_PTN7 7 + + +/* + * S32I2M XRa, rb - Register move from GRF to XRF + */ +static void gen_mxu_s32i2m(DisasContext *ctx) +{ + TCGv t0; + uint32_t XRa, Rb; + + t0 = tcg_temp_new(); + + XRa = extract32(ctx->opcode, 6, 5); + Rb = extract32(ctx->opcode, 16, 5); + + gen_load_gpr(t0, Rb); + if (XRa <= 15) { + gen_store_mxu_gpr(t0, XRa); + } else if (XRa == 16) { + gen_store_mxu_cr(t0); + } + + tcg_temp_free(t0); +} + +/* + * S32M2I XRa, rb - Register move from XRF to GRF + */ +static void gen_mxu_s32m2i(DisasContext *ctx) +{ + TCGv t0; + uint32_t XRa, Rb; + + t0 = tcg_temp_new(); + + XRa = extract32(ctx->opcode, 6, 5); + Rb = extract32(ctx->opcode, 16, 5); + + if (XRa <= 15) { + gen_load_mxu_gpr(t0, XRa); + } else if (XRa == 16) { + gen_load_mxu_cr(t0); + } + + gen_store_gpr(t0, Rb); + + tcg_temp_free(t0); +} + +/* + * S8LDD XRa, Rb, s8, optn3 - Load a byte from memory to XRF + */ +static void gen_mxu_s8ldd(DisasContext *ctx) +{ + TCGv t0, t1; + uint32_t XRa, Rb, s8, optn3; + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + + XRa = extract32(ctx->opcode, 6, 4); + s8 = extract32(ctx->opcode, 10, 8); + optn3 = extract32(ctx->opcode, 18, 3); + Rb = extract32(ctx->opcode, 21, 5); + + gen_load_gpr(t0, Rb); + tcg_gen_addi_tl(t0, t0, (int8_t)s8); + + switch (optn3) { + /* XRa[7:0] = tmp8 */ + case MXU_OPTN3_PTN0: + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); + gen_load_mxu_gpr(t0, XRa); + tcg_gen_deposit_tl(t0, t0, t1, 0, 8); + break; + /* XRa[15:8] = tmp8 */ + case MXU_OPTN3_PTN1: + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); + gen_load_mxu_gpr(t0, XRa); + tcg_gen_deposit_tl(t0, t0, t1, 8, 8); + break; + /* XRa[23:16] = tmp8 */ + case MXU_OPTN3_PTN2: + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); + gen_load_mxu_gpr(t0, XRa); + tcg_gen_deposit_tl(t0, t0, t1, 16, 8); + break; + /* XRa[31:24] = tmp8 */ + case MXU_OPTN3_PTN3: + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); + gen_load_mxu_gpr(t0, XRa); + tcg_gen_deposit_tl(t0, t0, t1, 24, 8); + break; + /* XRa = {8'b0, tmp8, 8'b0, tmp8} */ + case MXU_OPTN3_PTN4: + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); + tcg_gen_deposit_tl(t0, t1, t1, 16, 16); + break; + /* XRa = {tmp8, 8'b0, tmp8, 8'b0} */ + case MXU_OPTN3_PTN5: + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); + tcg_gen_shli_tl(t1, t1, 8); + tcg_gen_deposit_tl(t0, t1, t1, 16, 16); + break; + /* XRa = {{8{sign of tmp8}}, tmp8, {8{sign of tmp8}}, tmp8} */ + case MXU_OPTN3_PTN6: + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_SB); + tcg_gen_mov_tl(t0, t1); + tcg_gen_andi_tl(t0, t0, 0xFF00FFFF); + tcg_gen_shli_tl(t1, t1, 16); + tcg_gen_or_tl(t0, t0, t1); + break; + /* XRa = {tmp8, tmp8, tmp8, tmp8} */ + case MXU_OPTN3_PTN7: + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); + tcg_gen_deposit_tl(t1, t1, t1, 8, 8); + tcg_gen_deposit_tl(t0, t1, t1, 16, 16); + break; + } + + gen_store_mxu_gpr(t0, XRa); + + tcg_temp_free(t0); + tcg_temp_free(t1); +} + +/* + * D16MUL XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication + */ +static void gen_mxu_d16mul(DisasContext *ctx) +{ + TCGv t0, t1, t2, t3; + uint32_t XRa, XRb, XRc, XRd, optn2; + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + t2 = tcg_temp_new(); + t3 = tcg_temp_new(); + + XRa = extract32(ctx->opcode, 6, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRd = extract32(ctx->opcode, 18, 4); + optn2 = extract32(ctx->opcode, 22, 2); + + gen_load_mxu_gpr(t1, XRb); + tcg_gen_sextract_tl(t0, t1, 0, 16); + tcg_gen_sextract_tl(t1, t1, 16, 16); + gen_load_mxu_gpr(t3, XRc); + tcg_gen_sextract_tl(t2, t3, 0, 16); + tcg_gen_sextract_tl(t3, t3, 16, 16); + + switch (optn2) { + case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */ + tcg_gen_mul_tl(t3, t1, t3); + tcg_gen_mul_tl(t2, t0, t2); + break; + case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */ + tcg_gen_mul_tl(t3, t0, t3); + tcg_gen_mul_tl(t2, t0, t2); + break; + case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */ + tcg_gen_mul_tl(t3, t1, t3); + tcg_gen_mul_tl(t2, t1, t2); + break; + case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */ + tcg_gen_mul_tl(t3, t0, t3); + tcg_gen_mul_tl(t2, t1, t2); + break; + } + gen_store_mxu_gpr(t3, XRa); + gen_store_mxu_gpr(t2, XRd); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(t2); + tcg_temp_free(t3); +} + +/* + * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2 - Signed 16 bit pattern multiply + * and accumulate + */ +static void gen_mxu_d16mac(DisasContext *ctx) +{ + TCGv t0, t1, t2, t3; + uint32_t XRa, XRb, XRc, XRd, optn2, aptn2; + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + t2 = tcg_temp_new(); + t3 = tcg_temp_new(); + + XRa = extract32(ctx->opcode, 6, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRd = extract32(ctx->opcode, 18, 4); + optn2 = extract32(ctx->opcode, 22, 2); + aptn2 = extract32(ctx->opcode, 24, 2); + + gen_load_mxu_gpr(t1, XRb); + tcg_gen_sextract_tl(t0, t1, 0, 16); + tcg_gen_sextract_tl(t1, t1, 16, 16); + + gen_load_mxu_gpr(t3, XRc); + tcg_gen_sextract_tl(t2, t3, 0, 16); + tcg_gen_sextract_tl(t3, t3, 16, 16); + + switch (optn2) { + case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */ + tcg_gen_mul_tl(t3, t1, t3); + tcg_gen_mul_tl(t2, t0, t2); + break; + case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */ + tcg_gen_mul_tl(t3, t0, t3); + tcg_gen_mul_tl(t2, t0, t2); + break; + case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */ + tcg_gen_mul_tl(t3, t1, t3); + tcg_gen_mul_tl(t2, t1, t2); + break; + case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */ + tcg_gen_mul_tl(t3, t0, t3); + tcg_gen_mul_tl(t2, t1, t2); + break; + } + gen_load_mxu_gpr(t0, XRa); + gen_load_mxu_gpr(t1, XRd); + + switch (aptn2) { + case MXU_APTN2_AA: + tcg_gen_add_tl(t3, t0, t3); + tcg_gen_add_tl(t2, t1, t2); + break; + case MXU_APTN2_AS: + tcg_gen_add_tl(t3, t0, t3); + tcg_gen_sub_tl(t2, t1, t2); + break; + case MXU_APTN2_SA: + tcg_gen_sub_tl(t3, t0, t3); + tcg_gen_add_tl(t2, t1, t2); + break; + case MXU_APTN2_SS: + tcg_gen_sub_tl(t3, t0, t3); + tcg_gen_sub_tl(t2, t1, t2); + break; + } + gen_store_mxu_gpr(t3, XRa); + gen_store_mxu_gpr(t2, XRd); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(t2); + tcg_temp_free(t3); +} + +/* + * Q8MUL XRa, XRb, XRc, XRd - Parallel unsigned 8 bit pattern multiply + * Q8MULSU XRa, XRb, XRc, XRd - Parallel signed 8 bit pattern multiply + */ +static void gen_mxu_q8mul_q8mulsu(DisasContext *ctx) +{ + TCGv t0, t1, t2, t3, t4, t5, t6, t7; + uint32_t XRa, XRb, XRc, XRd, sel; + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + t2 = tcg_temp_new(); + t3 = tcg_temp_new(); + t4 = tcg_temp_new(); + t5 = tcg_temp_new(); + t6 = tcg_temp_new(); + t7 = tcg_temp_new(); + + XRa = extract32(ctx->opcode, 6, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRd = extract32(ctx->opcode, 18, 4); + sel = extract32(ctx->opcode, 22, 2); + + gen_load_mxu_gpr(t3, XRb); + gen_load_mxu_gpr(t7, XRc); + + if (sel == 0x2) { + /* Q8MULSU */ + tcg_gen_ext8s_tl(t0, t3); + tcg_gen_shri_tl(t3, t3, 8); + tcg_gen_ext8s_tl(t1, t3); + tcg_gen_shri_tl(t3, t3, 8); + tcg_gen_ext8s_tl(t2, t3); + tcg_gen_shri_tl(t3, t3, 8); + tcg_gen_ext8s_tl(t3, t3); + } else { + /* Q8MUL */ + tcg_gen_ext8u_tl(t0, t3); + tcg_gen_shri_tl(t3, t3, 8); + tcg_gen_ext8u_tl(t1, t3); + tcg_gen_shri_tl(t3, t3, 8); + tcg_gen_ext8u_tl(t2, t3); + tcg_gen_shri_tl(t3, t3, 8); + tcg_gen_ext8u_tl(t3, t3); + } + + tcg_gen_ext8u_tl(t4, t7); + tcg_gen_shri_tl(t7, t7, 8); + tcg_gen_ext8u_tl(t5, t7); + tcg_gen_shri_tl(t7, t7, 8); + tcg_gen_ext8u_tl(t6, t7); + tcg_gen_shri_tl(t7, t7, 8); + tcg_gen_ext8u_tl(t7, t7); + + tcg_gen_mul_tl(t0, t0, t4); + tcg_gen_mul_tl(t1, t1, t5); + tcg_gen_mul_tl(t2, t2, t6); + tcg_gen_mul_tl(t3, t3, t7); + + tcg_gen_andi_tl(t0, t0, 0xFFFF); + tcg_gen_andi_tl(t1, t1, 0xFFFF); + tcg_gen_andi_tl(t2, t2, 0xFFFF); + tcg_gen_andi_tl(t3, t3, 0xFFFF); + + tcg_gen_shli_tl(t1, t1, 16); + tcg_gen_shli_tl(t3, t3, 16); + + tcg_gen_or_tl(t0, t0, t1); + tcg_gen_or_tl(t1, t2, t3); + + gen_store_mxu_gpr(t0, XRd); + gen_store_mxu_gpr(t1, XRa); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(t2); + tcg_temp_free(t3); + tcg_temp_free(t4); + tcg_temp_free(t5); + tcg_temp_free(t6); + tcg_temp_free(t7); +} + +/* + * S32LDD XRa, Rb, S12 - Load a word from memory to XRF + * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF, reversed byte seq. + */ +static void gen_mxu_s32ldd_s32lddr(DisasContext *ctx) +{ + TCGv t0, t1; + uint32_t XRa, Rb, s12, sel; + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + + XRa = extract32(ctx->opcode, 6, 4); + s12 = extract32(ctx->opcode, 10, 10); + sel = extract32(ctx->opcode, 20, 1); + Rb = extract32(ctx->opcode, 21, 5); + + gen_load_gpr(t0, Rb); + + tcg_gen_movi_tl(t1, s12); + tcg_gen_shli_tl(t1, t1, 2); + if (s12 & 0x200) { + tcg_gen_ori_tl(t1, t1, 0xFFFFF000); + } + tcg_gen_add_tl(t1, t0, t1); + tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, MO_SL); + + if (sel == 1) { + /* S32LDDR */ + tcg_gen_bswap32_tl(t1, t1); + } + gen_store_mxu_gpr(t1, XRa); + + tcg_temp_free(t0); + tcg_temp_free(t1); +} + + +/* + * Decoding engine for MXU + * ======================= + */ + +/* + * + * Decode MXU pool00 + * + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---------+-----+-------+-------+-------+-----------+ + * | SPECIAL2 |0 0 0 0 0|x x x| XRc | XRb | XRa |MXU__POOL00| + * +-----------+---------+-----+-------+-------+-------+-----------+ + * + */ +static void decode_opc_mxu__pool00(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 18, 3); + + switch (opcode) { + case OPC_MXU_S32MAX: + /* TODO: Implement emulation of S32MAX instruction. */ + MIPS_INVAL("OPC_MXU_S32MAX"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32MIN: + /* TODO: Implement emulation of S32MIN instruction. */ + MIPS_INVAL("OPC_MXU_S32MIN"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D16MAX: + /* TODO: Implement emulation of D16MAX instruction. */ + MIPS_INVAL("OPC_MXU_D16MAX"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D16MIN: + /* TODO: Implement emulation of D16MIN instruction. */ + MIPS_INVAL("OPC_MXU_D16MIN"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q8MAX: + /* TODO: Implement emulation of Q8MAX instruction. */ + MIPS_INVAL("OPC_MXU_Q8MAX"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q8MIN: + /* TODO: Implement emulation of Q8MIN instruction. */ + MIPS_INVAL("OPC_MXU_Q8MIN"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q8SLT: + /* TODO: Implement emulation of Q8SLT instruction. */ + MIPS_INVAL("OPC_MXU_Q8SLT"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q8SLTU: + /* TODO: Implement emulation of Q8SLTU instruction. */ + MIPS_INVAL("OPC_MXU_Q8SLTU"); + generate_exception_end(ctx, EXCP_RI); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +/* + * + * Decode MXU pool01 + * + * S32SLT, D16SLT, D16AVG, D16AVGR, Q8AVG, Q8AVGR: + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---------+-----+-------+-------+-------+-----------+ + * | SPECIAL2 |0 0 0 0 0|x x x| XRc | XRb | XRa |MXU__POOL01| + * +-----------+---------+-----+-------+-------+-------+-----------+ + * + * Q8ADD: + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---+-----+-----+-------+-------+-------+-----------+ + * | SPECIAL2 |en2|0 0 0|x x x| XRc | XRb | XRa |MXU__POOL01| + * +-----------+---+-----+-----+-------+-------+-------+-----------+ + * + */ +static void decode_opc_mxu__pool01(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 18, 3); + + switch (opcode) { + case OPC_MXU_S32SLT: + /* TODO: Implement emulation of S32SLT instruction. */ + MIPS_INVAL("OPC_MXU_S32SLT"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D16SLT: + /* TODO: Implement emulation of D16SLT instruction. */ + MIPS_INVAL("OPC_MXU_D16SLT"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D16AVG: + /* TODO: Implement emulation of D16AVG instruction. */ + MIPS_INVAL("OPC_MXU_D16AVG"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D16AVGR: + /* TODO: Implement emulation of D16AVGR instruction. */ + MIPS_INVAL("OPC_MXU_D16AVGR"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q8AVG: + /* TODO: Implement emulation of Q8AVG instruction. */ + MIPS_INVAL("OPC_MXU_Q8AVG"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q8AVGR: + /* TODO: Implement emulation of Q8AVGR instruction. */ + MIPS_INVAL("OPC_MXU_Q8AVGR"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q8ADD: + /* TODO: Implement emulation of Q8ADD instruction. */ + MIPS_INVAL("OPC_MXU_Q8ADD"); + generate_exception_end(ctx, EXCP_RI); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +/* + * + * Decode MXU pool02 + * + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---------+-----+-------+-------+-------+-----------+ + * | SPECIAL2 |0 0 0 0 0|x x x| XRc | XRb | XRa |MXU__POOL02| + * +-----------+---------+-----+-------+-------+-------+-----------+ + * + */ +static void decode_opc_mxu__pool02(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 18, 3); + + switch (opcode) { + case OPC_MXU_S32CPS: + /* TODO: Implement emulation of S32CPS instruction. */ + MIPS_INVAL("OPC_MXU_S32CPS"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D16CPS: + /* TODO: Implement emulation of D16CPS instruction. */ + MIPS_INVAL("OPC_MXU_D16CPS"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q8ABD: + /* TODO: Implement emulation of Q8ABD instruction. */ + MIPS_INVAL("OPC_MXU_Q8ABD"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q16SAT: + /* TODO: Implement emulation of Q16SAT instruction. */ + MIPS_INVAL("OPC_MXU_Q16SAT"); + generate_exception_end(ctx, EXCP_RI); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +/* + * + * Decode MXU pool03 + * + * D16MULF: + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---+---+-------+-------+-------+-------+-----------+ + * | SPECIAL2 |x x|on2|0 0 0 0| XRc | XRb | XRa |MXU__POOL03| + * +-----------+---+---+-------+-------+-------+-------+-----------+ + * + * D16MULE: + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---+---+-------+-------+-------+-------+-----------+ + * | SPECIAL2 |x x|on2| Xd | XRc | XRb | XRa |MXU__POOL03| + * +-----------+---+---+-------+-------+-------+-------+-----------+ + * + */ +static void decode_opc_mxu__pool03(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 24, 2); + + switch (opcode) { + case OPC_MXU_D16MULF: + /* TODO: Implement emulation of D16MULF instruction. */ + MIPS_INVAL("OPC_MXU_D16MULF"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D16MULE: + /* TODO: Implement emulation of D16MULE instruction. */ + MIPS_INVAL("OPC_MXU_D16MULE"); + generate_exception_end(ctx, EXCP_RI); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +/* + * + * Decode MXU pool04 + * + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---------+-+-------------------+-------+-----------+ + * | SPECIAL2 | rb |x| s12 | XRa |MXU__POOL04| + * +-----------+---------+-+-------------------+-------+-----------+ + * + */ +static void decode_opc_mxu__pool04(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 20, 1); + + switch (opcode) { + case OPC_MXU_S32LDD: + case OPC_MXU_S32LDDR: + gen_mxu_s32ldd_s32lddr(ctx); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +/* + * + * Decode MXU pool05 + * + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---------+-+-------------------+-------+-----------+ + * | SPECIAL2 | rb |x| s12 | XRa |MXU__POOL05| + * +-----------+---------+-+-------------------+-------+-----------+ + * + */ +static void decode_opc_mxu__pool05(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 20, 1); + + switch (opcode) { + case OPC_MXU_S32STD: + /* TODO: Implement emulation of S32STD instruction. */ + MIPS_INVAL("OPC_MXU_S32STD"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32STDR: + /* TODO: Implement emulation of S32STDR instruction. */ + MIPS_INVAL("OPC_MXU_S32STDR"); + generate_exception_end(ctx, EXCP_RI); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +/* + * + * Decode MXU pool06 + * + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---------+---------+---+-------+-------+-----------+ + * | SPECIAL2 | rb | rc |st2|x x x x| XRa |MXU__POOL06| + * +-----------+---------+---------+---+-------+-------+-----------+ + * + */ +static void decode_opc_mxu__pool06(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 10, 4); + + switch (opcode) { + case OPC_MXU_S32LDDV: + /* TODO: Implement emulation of S32LDDV instruction. */ + MIPS_INVAL("OPC_MXU_S32LDDV"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32LDDVR: + /* TODO: Implement emulation of S32LDDVR instruction. */ + MIPS_INVAL("OPC_MXU_S32LDDVR"); + generate_exception_end(ctx, EXCP_RI); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +/* + * + * Decode MXU pool07 + * + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---------+---------+---+-------+-------+-----------+ + * | SPECIAL2 | rb | rc |st2|x x x x| XRa |MXU__POOL07| + * +-----------+---------+---------+---+-------+-------+-----------+ + * + */ +static void decode_opc_mxu__pool07(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 10, 4); + + switch (opcode) { + case OPC_MXU_S32STDV: + /* TODO: Implement emulation of S32TDV instruction. */ + MIPS_INVAL("OPC_MXU_S32TDV"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32STDVR: + /* TODO: Implement emulation of S32TDVR instruction. */ + MIPS_INVAL("OPC_MXU_S32TDVR"); + generate_exception_end(ctx, EXCP_RI); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +/* + * + * Decode MXU pool08 + * + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---------+-+-------------------+-------+-----------+ + * | SPECIAL2 | rb |x| s12 | XRa |MXU__POOL08| + * +-----------+---------+-+-------------------+-------+-----------+ + * +*/ +static void decode_opc_mxu__pool08(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 20, 1); + + switch (opcode) { + case OPC_MXU_S32LDI: + /* TODO: Implement emulation of S32LDI instruction. */ + MIPS_INVAL("OPC_MXU_S32LDI"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32LDIR: + /* TODO: Implement emulation of S32LDIR instruction. */ + MIPS_INVAL("OPC_MXU_S32LDIR"); + generate_exception_end(ctx, EXCP_RI); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +/* + * + * Decode MXU pool09 + * + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---------+-+-------------------+-------+-----------+ + * | SPECIAL2 | rb |x| s12 | XRa |MXU__POOL09| + * +-----------+---------+-+-------------------+-------+-----------+ + * + */ +static void decode_opc_mxu__pool09(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 5, 0); + + switch (opcode) { + case OPC_MXU_S32SDI: + /* TODO: Implement emulation of S32SDI instruction. */ + MIPS_INVAL("OPC_MXU_S32SDI"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32SDIR: + /* TODO: Implement emulation of S32SDIR instruction. */ + MIPS_INVAL("OPC_MXU_S32SDIR"); + generate_exception_end(ctx, EXCP_RI); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +/* + * + * Decode MXU pool10 + * + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---------+---------+---+-------+-------+-----------+ + * | SPECIAL2 | rb | rc |st2|x x x x| XRa |MXU__POOL10| + * +-----------+---------+---------+---+-------+-------+-----------+ + * + */ +static void decode_opc_mxu__pool10(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 5, 0); + + switch (opcode) { + case OPC_MXU_S32LDIV: + /* TODO: Implement emulation of S32LDIV instruction. */ + MIPS_INVAL("OPC_MXU_S32LDIV"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32LDIVR: + /* TODO: Implement emulation of S32LDIVR instruction. */ + MIPS_INVAL("OPC_MXU_S32LDIVR"); + generate_exception_end(ctx, EXCP_RI); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +/* + * + * Decode MXU pool11 + * + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---------+---------+---+-------+-------+-----------+ + * | SPECIAL2 | rb | rc |st2|x x x x| XRa |MXU__POOL11| + * +-----------+---------+---------+---+-------+-------+-----------+ + * + */ +static void decode_opc_mxu__pool11(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 10, 4); + + switch (opcode) { + case OPC_MXU_S32SDIV: + /* TODO: Implement emulation of S32SDIV instruction. */ + MIPS_INVAL("OPC_MXU_S32SDIV"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32SDIVR: + /* TODO: Implement emulation of S32SDIVR instruction. */ + MIPS_INVAL("OPC_MXU_S32SDIVR"); + generate_exception_end(ctx, EXCP_RI); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +/* + * + * Decode MXU pool12 + * + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---+---+-------+-------+-------+-------+-----------+ + * | SPECIAL2 |an2|x x| Xd | XRc | XRb | XRa |MXU__POOL12| + * +-----------+---+---+-------+-------+-------+-------+-----------+ + * + */ +static void decode_opc_mxu__pool12(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 22, 2); + + switch (opcode) { + case OPC_MXU_D32ACC: + /* TODO: Implement emulation of D32ACC instruction. */ + MIPS_INVAL("OPC_MXU_D32ACC"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D32ACCM: + /* TODO: Implement emulation of D32ACCM instruction. */ + MIPS_INVAL("OPC_MXU_D32ACCM"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D32ASUM: + /* TODO: Implement emulation of D32ASUM instruction. */ + MIPS_INVAL("OPC_MXU_D32ASUM"); + generate_exception_end(ctx, EXCP_RI); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +/* + * + * Decode MXU pool13 + * + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---+---+-------+-------+-------+-------+-----------+ + * | SPECIAL2 |en2|x x|0 0 0 0| XRc | XRb | XRa |MXU__POOL13| + * +-----------+---+---+-------+-------+-------+-------+-----------+ + * + */ +static void decode_opc_mxu__pool13(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 22, 2); + + switch (opcode) { + case OPC_MXU_Q16ACC: + /* TODO: Implement emulation of Q16ACC instruction. */ + MIPS_INVAL("OPC_MXU_Q16ACC"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q16ACCM: + /* TODO: Implement emulation of Q16ACCM instruction. */ + MIPS_INVAL("OPC_MXU_Q16ACCM"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q16ASUM: + /* TODO: Implement emulation of Q16ASUM instruction. */ + MIPS_INVAL("OPC_MXU_Q16ASUM"); + generate_exception_end(ctx, EXCP_RI); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +/* + * + * Decode MXU pool14 + * + * Q8ADDE, Q8ACCE: + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---+---+-------+-------+-------+-------+-----------+ + * | SPECIAL2 |0 0|x x| XRd | XRc | XRb | XRa |MXU__POOL14| + * +-----------+---+---+-------+-------+-------+-------+-----------+ + * + * D8SUM, D8SUMC: + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---+---+-------+-------+-------+-------+-----------+ + * | SPECIAL2 |en2|x x|0 0 0 0| XRc | XRb | XRa |MXU__POOL14| + * +-----------+---+---+-------+-------+-------+-------+-----------+ + * + */ +static void decode_opc_mxu__pool14(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 22, 2); + + switch (opcode) { + case OPC_MXU_Q8ADDE: + /* TODO: Implement emulation of Q8ADDE instruction. */ + MIPS_INVAL("OPC_MXU_Q8ADDE"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D8SUM: + /* TODO: Implement emulation of D8SUM instruction. */ + MIPS_INVAL("OPC_MXU_D8SUM"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D8SUMC: + /* TODO: Implement emulation of D8SUMC instruction. */ + MIPS_INVAL("OPC_MXU_D8SUMC"); + generate_exception_end(ctx, EXCP_RI); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +/* + * + * Decode MXU pool15 + * + * S32MUL, S32MULU, S32EXTRV: + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---------+---------+---+-------+-------+-----------+ + * | SPECIAL2 | rs | rt |x x| XRd | XRa |MXU__POOL15| + * +-----------+---------+---------+---+-------+-------+-----------+ + * + * S32EXTR: + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---------+---------+---+-------+-------+-----------+ + * | SPECIAL2 | rb | sft5 |x x| XRd | XRa |MXU__POOL15| + * +-----------+---------+---------+---+-------+-------+-----------+ + * + */ +static void decode_opc_mxu__pool15(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 14, 2); + + switch (opcode) { + case OPC_MXU_S32MUL: + /* TODO: Implement emulation of S32MUL instruction. */ + MIPS_INVAL("OPC_MXU_S32MUL"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32MULU: + /* TODO: Implement emulation of S32MULU instruction. */ + MIPS_INVAL("OPC_MXU_S32MULU"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32EXTR: + /* TODO: Implement emulation of S32EXTR instruction. */ + MIPS_INVAL("OPC_MXU_S32EXTR"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32EXTRV: + /* TODO: Implement emulation of S32EXTRV instruction. */ + MIPS_INVAL("OPC_MXU_S32EXTRV"); + generate_exception_end(ctx, EXCP_RI); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +/* + * + * Decode MXU pool16 + * + * D32SARW: + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---------+-----+-------+-------+-------+-----------+ + * | SPECIAL2 | rb |x x x| XRc | XRb | XRa |MXU__POOL16| + * +-----------+---------+-----+-------+-------+-------+-----------+ + * + * S32ALN: + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---------+-----+-------+-------+-------+-----------+ + * | SPECIAL2 | rs |x x x| XRc | XRb | XRa |MXU__POOL16| + * +-----------+---------+-----+-------+-------+-------+-----------+ + * + * S32ALNI: + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+-----+---+-----+-------+-------+-------+-----------+ + * | SPECIAL2 | s3 |0 0|x x x| XRc | XRb | XRa |MXU__POOL16| + * +-----------+-----+---+-----+-------+-------+-------+-----------+ + * + * S32NOR, S32AND, S32OR, S32XOR: + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---------+-----+-------+-------+-------+-----------+ + * | SPECIAL2 |0 0 0 0 0|x x x| XRc | XRb | XRa |MXU__POOL16| + * +-----------+---------+-----+-------+-------+-------+-----------+ + * + * S32LUI: + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+-----+---+-----+-------+---------------+-----------+ + * | SPECIAL2 |optn3|0 0|x x x| XRc | s8 |MXU__POOL16| + * +-----------+-----+---+-----+-------+---------------+-----------+ + * + */ +static void decode_opc_mxu__pool16(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 18, 3); + + switch (opcode) { + case OPC_MXU_D32SARW: + /* TODO: Implement emulation of D32SARW instruction. */ + MIPS_INVAL("OPC_MXU_D32SARW"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32ALN: + /* TODO: Implement emulation of S32ALN instruction. */ + MIPS_INVAL("OPC_MXU_S32ALN"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32ALNI: + /* TODO: Implement emulation of S32ALNI instruction. */ + MIPS_INVAL("OPC_MXU_S32ALNI"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32NOR: + /* TODO: Implement emulation of S32NOR instruction. */ + MIPS_INVAL("OPC_MXU_S32NOR"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32AND: + /* TODO: Implement emulation of S32AND instruction. */ + MIPS_INVAL("OPC_MXU_S32AND"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32OR: + /* TODO: Implement emulation of S32OR instruction. */ + MIPS_INVAL("OPC_MXU_S32OR"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32XOR: + /* TODO: Implement emulation of S32XOR instruction. */ + MIPS_INVAL("OPC_MXU_S32XOR"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32LUI: + /* TODO: Implement emulation of S32LUI instruction. */ + MIPS_INVAL("OPC_MXU_S32LUI"); + generate_exception_end(ctx, EXCP_RI); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +/* + * + * Decode MXU pool17 + * + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---------+-----+-------+-------+-------+-----------+ + * | SPECIAL2 | rb |x x x| XRd | XRa |0 0 0 0|MXU__POOL17| + * +-----------+---------+-----+-------+-------+-------+-----------+ + * + */ +static void decode_opc_mxu__pool17(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 18, 3); + + switch (opcode) { + case OPC_MXU_D32SLLV: + /* TODO: Implement emulation of D32SLLV instruction. */ + MIPS_INVAL("OPC_MXU_D32SLLV"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D32SLRV: + /* TODO: Implement emulation of D32SLRV instruction. */ + MIPS_INVAL("OPC_MXU_D32SLRV"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D32SARV: + /* TODO: Implement emulation of D32SARV instruction. */ + MIPS_INVAL("OPC_MXU_D32SARV"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q16SLLV: + /* TODO: Implement emulation of Q16SLLV instruction. */ + MIPS_INVAL("OPC_MXU_Q16SLLV"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q16SLRV: + /* TODO: Implement emulation of Q16SLRV instruction. */ + MIPS_INVAL("OPC_MXU_Q16SLRV"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q16SARV: + /* TODO: Implement emulation of Q16SARV instruction. */ + MIPS_INVAL("OPC_MXU_Q16SARV"); + generate_exception_end(ctx, EXCP_RI); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +/* + * + * Decode MXU pool18 + * + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---+---+-------+-------+-------+-------+-----------+ + * | SPECIAL2 |0 0|x x| XRd | XRc | XRb | XRa |MXU__POOL18| + * +-----------+---+---+-------+-------+-------+-------+-----------+ + * + */ +static void decode_opc_mxu__pool18(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 22, 2); + + switch (opcode) { + case OPC_MXU_Q8MUL: + case OPC_MXU_Q8MULSU: + gen_mxu_q8mul_q8mulsu(ctx); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +/* + * + * Decode MXU pool19 + * + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---------+-----+-------+-------+-------+-----------+ + * | SPECIAL2 |0 0 0 0 0|x x x| XRc | XRb | XRa |MXU__POOL19| + * +-----------+---------+-----+-------+-------+-------+-----------+ + * + */ +static void decode_opc_mxu__pool19(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 18, 3); + + switch (opcode) { + case OPC_MXU_Q8MOVZ: + /* TODO: Implement emulation of Q8MOVZ instruction. */ + MIPS_INVAL("OPC_MXU_Q8MOVZ"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q8MOVN: + /* TODO: Implement emulation of Q8MOVN instruction. */ + MIPS_INVAL("OPC_MXU_Q8MOVN"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D16MOVZ: + /* TODO: Implement emulation of D16MOVZ instruction. */ + MIPS_INVAL("OPC_MXU_D16MOVZ"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D16MOVN: + /* TODO: Implement emulation of D16MOVN instruction. */ + MIPS_INVAL("OPC_MXU_D16MOVN"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32MOVZ: + /* TODO: Implement emulation of S32MOVZ instruction. */ + MIPS_INVAL("OPC_MXU_S32MOVZ"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32MOVN: + /* TODO: Implement emulation of S32MOVN instruction. */ + MIPS_INVAL("OPC_MXU_S32MOVN"); + generate_exception_end(ctx, EXCP_RI); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +/* + * + * Decode MXU pool20 + * + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---+---+-------+-------+-------+-------+-----------+ + * | SPECIAL2 |an2|x x| XRd | XRc | XRb | XRa |MXU__POOL20| + * +-----------+---+---+-------+-------+-------+-------+-----------+ + * + */ +static void decode_opc_mxu__pool20(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 22, 2); + + switch (opcode) { + case OPC_MXU_Q8MAC: + /* TODO: Implement emulation of Q8MAC instruction. */ + MIPS_INVAL("OPC_MXU_Q8MAC"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q8MACSU: + /* TODO: Implement emulation of Q8MACSU instruction. */ + MIPS_INVAL("OPC_MXU_Q8MACSU"); + generate_exception_end(ctx, EXCP_RI); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + + +/* + * Main MXU decoding function + * + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-----------+---------------------------------------+-----------+ + * | SPECIAL2 | |x x x x x x| + * +-----------+---------------------------------------+-----------+ + * + */ +static void decode_opc_mxu(CPUMIPSState *env, DisasContext *ctx) +{ + /* + * TODO: Investigate necessity of including handling of + * CLZ, CLO, SDBB in this function, as they belong to + * SPECIAL2 opcode space for regular pre-R6 MIPS ISAs. + */ + uint32_t opcode = extract32(ctx->opcode, 0, 6); + + if (opcode == OPC__MXU_MUL) { + uint32_t rs, rt, rd, op1; + + rs = extract32(ctx->opcode, 21, 5); + rt = extract32(ctx->opcode, 16, 5); + rd = extract32(ctx->opcode, 11, 5); + op1 = MASK_SPECIAL2(ctx->opcode); + + gen_arith(ctx, op1, rd, rs, rt); + + return; + } + + if (opcode == OPC_MXU_S32M2I) { + gen_mxu_s32m2i(ctx); + return; + } + + if (opcode == OPC_MXU_S32I2M) { + gen_mxu_s32i2m(ctx); + return; + } + + { + TCGv t_mxu_cr = tcg_temp_new(); + TCGLabel *l_exit = gen_new_label(); + + gen_load_mxu_cr(t_mxu_cr); + tcg_gen_andi_tl(t_mxu_cr, t_mxu_cr, MXU_CR_MXU_EN); + tcg_gen_brcondi_tl(TCG_COND_NE, t_mxu_cr, MXU_CR_MXU_EN, l_exit); + + switch (opcode) { + case OPC_MXU_S32MADD: + /* TODO: Implement emulation of S32MADD instruction. */ + MIPS_INVAL("OPC_MXU_S32MADD"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32MADDU: + /* TODO: Implement emulation of S32MADDU instruction. */ + MIPS_INVAL("OPC_MXU_S32MADDU"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU__POOL00: + decode_opc_mxu__pool00(env, ctx); + break; + case OPC_MXU_S32MSUB: + /* TODO: Implement emulation of S32MSUB instruction. */ + MIPS_INVAL("OPC_MXU_S32MSUB"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32MSUBU: + /* TODO: Implement emulation of S32MSUBU instruction. */ + MIPS_INVAL("OPC_MXU_S32MSUBU"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU__POOL01: + decode_opc_mxu__pool01(env, ctx); + break; + case OPC_MXU__POOL02: + decode_opc_mxu__pool02(env, ctx); + break; + case OPC_MXU_D16MUL: + gen_mxu_d16mul(ctx); + break; + case OPC_MXU__POOL03: + decode_opc_mxu__pool03(env, ctx); + break; + case OPC_MXU_D16MAC: + gen_mxu_d16mac(ctx); + break; + case OPC_MXU_D16MACF: + /* TODO: Implement emulation of D16MACF instruction. */ + MIPS_INVAL("OPC_MXU_D16MACF"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D16MADL: + /* TODO: Implement emulation of D16MADL instruction. */ + MIPS_INVAL("OPC_MXU_D16MADL"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S16MAD: + /* TODO: Implement emulation of S16MAD instruction. */ + MIPS_INVAL("OPC_MXU_S16MAD"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q16ADD: + /* TODO: Implement emulation of Q16ADD instruction. */ + MIPS_INVAL("OPC_MXU_Q16ADD"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D16MACE: + /* TODO: Implement emulation of D16MACE instruction. */ + MIPS_INVAL("OPC_MXU_D16MACE"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU__POOL04: + decode_opc_mxu__pool04(env, ctx); + break; + case OPC_MXU__POOL05: + decode_opc_mxu__pool05(env, ctx); + break; + case OPC_MXU__POOL06: + decode_opc_mxu__pool06(env, ctx); + break; + case OPC_MXU__POOL07: + decode_opc_mxu__pool07(env, ctx); + break; + case OPC_MXU__POOL08: + decode_opc_mxu__pool08(env, ctx); + break; + case OPC_MXU__POOL09: + decode_opc_mxu__pool09(env, ctx); + break; + case OPC_MXU__POOL10: + decode_opc_mxu__pool10(env, ctx); + break; + case OPC_MXU__POOL11: + decode_opc_mxu__pool11(env, ctx); + break; + case OPC_MXU_D32ADD: + /* TODO: Implement emulation of D32ADD instruction. */ + MIPS_INVAL("OPC_MXU_D32ADD"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU__POOL12: + decode_opc_mxu__pool12(env, ctx); + break; + case OPC_MXU__POOL13: + decode_opc_mxu__pool13(env, ctx); + break; + case OPC_MXU__POOL14: + decode_opc_mxu__pool14(env, ctx); + break; + case OPC_MXU_Q8ACCE: + /* TODO: Implement emulation of Q8ACCE instruction. */ + MIPS_INVAL("OPC_MXU_Q8ACCE"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S8LDD: + gen_mxu_s8ldd(ctx); + break; + case OPC_MXU_S8STD: + /* TODO: Implement emulation of S8STD instruction. */ + MIPS_INVAL("OPC_MXU_S8STD"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S8LDI: + /* TODO: Implement emulation of S8LDI instruction. */ + MIPS_INVAL("OPC_MXU_S8LDI"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S8SDI: + /* TODO: Implement emulation of S8SDI instruction. */ + MIPS_INVAL("OPC_MXU_S8SDI"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU__POOL15: + decode_opc_mxu__pool15(env, ctx); + break; + case OPC_MXU__POOL16: + decode_opc_mxu__pool16(env, ctx); + break; + case OPC_MXU_LXB: + /* TODO: Implement emulation of LXB instruction. */ + MIPS_INVAL("OPC_MXU_LXB"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S16LDD: + /* TODO: Implement emulation of S16LDD instruction. */ + MIPS_INVAL("OPC_MXU_S16LDD"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S16STD: + /* TODO: Implement emulation of S16STD instruction. */ + MIPS_INVAL("OPC_MXU_S16STD"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S16LDI: + /* TODO: Implement emulation of S16LDI instruction. */ + MIPS_INVAL("OPC_MXU_S16LDI"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S16SDI: + /* TODO: Implement emulation of S16SDI instruction. */ + MIPS_INVAL("OPC_MXU_S16SDI"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D32SLL: + /* TODO: Implement emulation of D32SLL instruction. */ + MIPS_INVAL("OPC_MXU_D32SLL"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D32SLR: + /* TODO: Implement emulation of D32SLR instruction. */ + MIPS_INVAL("OPC_MXU_D32SLR"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D32SARL: + /* TODO: Implement emulation of D32SARL instruction. */ + MIPS_INVAL("OPC_MXU_D32SARL"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_D32SAR: + /* TODO: Implement emulation of D32SAR instruction. */ + MIPS_INVAL("OPC_MXU_D32SAR"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q16SLL: + /* TODO: Implement emulation of Q16SLL instruction. */ + MIPS_INVAL("OPC_MXU_Q16SLL"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q16SLR: + /* TODO: Implement emulation of Q16SLR instruction. */ + MIPS_INVAL("OPC_MXU_Q16SLR"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU__POOL17: + decode_opc_mxu__pool17(env, ctx); + break; + case OPC_MXU_Q16SAR: + /* TODO: Implement emulation of Q16SAR instruction. */ + MIPS_INVAL("OPC_MXU_Q16SAR"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU__POOL18: + decode_opc_mxu__pool18(env, ctx); + break; + case OPC_MXU__POOL19: + decode_opc_mxu__pool19(env, ctx); + break; + case OPC_MXU__POOL20: + decode_opc_mxu__pool20(env, ctx); + break; + case OPC_MXU_Q16SCOP: + /* TODO: Implement emulation of Q16SCOP instruction. */ + MIPS_INVAL("OPC_MXU_Q16SCOP"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q8MADL: + /* TODO: Implement emulation of Q8MADL instruction. */ + MIPS_INVAL("OPC_MXU_Q8MADL"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_S32SFL: + /* TODO: Implement emulation of S32SFL instruction. */ + MIPS_INVAL("OPC_MXU_S32SFL"); + generate_exception_end(ctx, EXCP_RI); + break; + case OPC_MXU_Q8SAD: + /* TODO: Implement emulation of Q8SAD instruction. */ + MIPS_INVAL("OPC_MXU_Q8SAD"); + generate_exception_end(ctx, EXCP_RI); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + generate_exception_end(ctx, EXCP_RI); + } + + gen_set_label(l_exit); + tcg_temp_free(t_mxu_cr); + } +} + + static void decode_opc_special2_legacy(CPUMIPSState *env, DisasContext *ctx) { int rs, rt, rd; @@ -22738,7 +25758,9 @@ static void decode_opc_special3_r6(CPUMIPSState *env, DisasContext *ctx) op2 = MASK_BSHFL(ctx->opcode); switch (op2) { case OPC_ALIGN: - case OPC_ALIGN_END: + case OPC_ALIGN_1: + case OPC_ALIGN_2: + case OPC_ALIGN_3: gen_align(ctx, 32, rd, rs, rt, sa & 3); break; case OPC_BITSWAP: @@ -22764,7 +25786,13 @@ static void decode_opc_special3_r6(CPUMIPSState *env, DisasContext *ctx) op2 = MASK_DBSHFL(ctx->opcode); switch (op2) { case OPC_DALIGN: - case OPC_DALIGN_END: + case OPC_DALIGN_1: + case OPC_DALIGN_2: + case OPC_DALIGN_3: + case OPC_DALIGN_4: + case OPC_DALIGN_5: + case OPC_DALIGN_6: + case OPC_DALIGN_7: gen_align(ctx, 64, rd, rs, rt, sa & 7); break; case OPC_DBITSWAP: @@ -22801,7 +25829,7 @@ static void decode_opc_special3_legacy(CPUMIPSState *env, DisasContext *ctx) case OPC_MULTU_G_2E: /* OPC_MULT_G_2E, OPC_ADDUH_QB_DSP, OPC_MUL_PH_DSP have * the same mask and op1. */ - if ((ctx->insn_flags & ASE_DSPR2) && (op1 == OPC_MULT_G_2E)) { + if ((ctx->insn_flags & ASE_DSP_R2) && (op1 == OPC_MULT_G_2E)) { op2 = MASK_ADDUH_QB(ctx->opcode); switch (op2) { case OPC_ADDUH_QB: @@ -23308,6 +26336,250 @@ static void decode_opc_special3_legacy(CPUMIPSState *env, DisasContext *ctx) } } +static void decode_tx79_mmi0(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opc = MASK_TX79_MMI0(ctx->opcode); + + switch (opc) { + case TX79_MMI0_PADDW: /* TODO: TX79_MMI0_PADDW */ + case TX79_MMI0_PSUBW: /* TODO: TX79_MMI0_PSUBW */ + case TX79_MMI0_PCGTW: /* TODO: TX79_MMI0_PCGTW */ + case TX79_MMI0_PMAXW: /* TODO: TX79_MMI0_PMAXW */ + case TX79_MMI0_PADDH: /* TODO: TX79_MMI0_PADDH */ + case TX79_MMI0_PSUBH: /* TODO: TX79_MMI0_PSUBH */ + case TX79_MMI0_PCGTH: /* TODO: TX79_MMI0_PCGTH */ + case TX79_MMI0_PMAXH: /* TODO: TX79_MMI0_PMAXH */ + case TX79_MMI0_PADDB: /* TODO: TX79_MMI0_PADDB */ + case TX79_MMI0_PSUBB: /* TODO: TX79_MMI0_PSUBB */ + case TX79_MMI0_PCGTB: /* TODO: TX79_MMI0_PCGTB */ + case TX79_MMI0_PADDSW: /* TODO: TX79_MMI0_PADDSW */ + case TX79_MMI0_PSUBSW: /* TODO: TX79_MMI0_PSUBSW */ + case TX79_MMI0_PEXTLW: /* TODO: TX79_MMI0_PEXTLW */ + case TX79_MMI0_PPACW: /* TODO: TX79_MMI0_PPACW */ + case TX79_MMI0_PADDSH: /* TODO: TX79_MMI0_PADDSH */ + case TX79_MMI0_PSUBSH: /* TODO: TX79_MMI0_PSUBSH */ + case TX79_MMI0_PEXTLH: /* TODO: TX79_MMI0_PEXTLH */ + case TX79_MMI0_PPACH: /* TODO: TX79_MMI0_PPACH */ + case TX79_MMI0_PADDSB: /* TODO: TX79_MMI0_PADDSB */ + case TX79_MMI0_PSUBSB: /* TODO: TX79_MMI0_PSUBSB */ + case TX79_MMI0_PEXTLB: /* TODO: TX79_MMI0_PEXTLB */ + case TX79_MMI0_PPACB: /* TODO: TX79_MMI0_PPACB */ + case TX79_MMI0_PEXT5: /* TODO: TX79_MMI0_PEXT5 */ + case TX79_MMI0_PPAC5: /* TODO: TX79_MMI0_PPAC5 */ + generate_exception_end(ctx, EXCP_RI); /* TODO: TX79_MMI_CLASS_MMI0 */ + break; + default: + MIPS_INVAL("TX79 MMI class MMI0"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +static void decode_tx79_mmi1(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opc = MASK_TX79_MMI1(ctx->opcode); + + switch (opc) { + case TX79_MMI1_PABSW: /* TODO: TX79_MMI1_PABSW */ + case TX79_MMI1_PCEQW: /* TODO: TX79_MMI1_PCEQW */ + case TX79_MMI1_PMINW: /* TODO: TX79_MMI1_PMINW */ + case TX79_MMI1_PADSBH: /* TODO: TX79_MMI1_PADSBH */ + case TX79_MMI1_PABSH: /* TODO: TX79_MMI1_PABSH */ + case TX79_MMI1_PCEQH: /* TODO: TX79_MMI1_PCEQH */ + case TX79_MMI1_PMINH: /* TODO: TX79_MMI1_PMINH */ + case TX79_MMI1_PCEQB: /* TODO: TX79_MMI1_PCEQB */ + case TX79_MMI1_PADDUW: /* TODO: TX79_MMI1_PADDUW */ + case TX79_MMI1_PSUBUW: /* TODO: TX79_MMI1_PSUBUW */ + case TX79_MMI1_PEXTUW: /* TODO: TX79_MMI1_PEXTUW */ + case TX79_MMI1_PADDUH: /* TODO: TX79_MMI1_PADDUH */ + case TX79_MMI1_PSUBUH: /* TODO: TX79_MMI1_PSUBUH */ + case TX79_MMI1_PEXTUH: /* TODO: TX79_MMI1_PEXTUH */ + case TX79_MMI1_PADDUB: /* TODO: TX79_MMI1_PADDUB */ + case TX79_MMI1_PSUBUB: /* TODO: TX79_MMI1_PSUBUB */ + case TX79_MMI1_PEXTUB: /* TODO: TX79_MMI1_PEXTUB */ + case TX79_MMI1_QFSRV: /* TODO: TX79_MMI1_QFSRV */ + generate_exception_end(ctx, EXCP_RI); /* TODO: TX79_MMI_CLASS_MMI1 */ + break; + default: + MIPS_INVAL("TX79 MMI class MMI1"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +static void decode_tx79_mmi2(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opc = MASK_TX79_MMI2(ctx->opcode); + + switch (opc) { + case TX79_MMI2_PMADDW: /* TODO: TX79_MMI2_PMADDW */ + case TX79_MMI2_PSLLVW: /* TODO: TX79_MMI2_PSLLVW */ + case TX79_MMI2_PSRLVW: /* TODO: TX79_MMI2_PSRLVW */ + case TX79_MMI2_PMSUBW: /* TODO: TX79_MMI2_PMSUBW */ + case TX79_MMI2_PMFHI: /* TODO: TX79_MMI2_PMFHI */ + case TX79_MMI2_PMFLO: /* TODO: TX79_MMI2_PMFLO */ + case TX79_MMI2_PINTH: /* TODO: TX79_MMI2_PINTH */ + case TX79_MMI2_PMULTW: /* TODO: TX79_MMI2_PMULTW */ + case TX79_MMI2_PDIVW: /* TODO: TX79_MMI2_PDIVW */ + case TX79_MMI2_PCPYLD: /* TODO: TX79_MMI2_PCPYLD */ + case TX79_MMI2_PMADDH: /* TODO: TX79_MMI2_PMADDH */ + case TX79_MMI2_PHMADH: /* TODO: TX79_MMI2_PHMADH */ + case TX79_MMI2_PAND: /* TODO: TX79_MMI2_PAND */ + case TX79_MMI2_PXOR: /* TODO: TX79_MMI2_PXOR */ + case TX79_MMI2_PMSUBH: /* TODO: TX79_MMI2_PMSUBH */ + case TX79_MMI2_PHMSBH: /* TODO: TX79_MMI2_PHMSBH */ + case TX79_MMI2_PEXEH: /* TODO: TX79_MMI2_PEXEH */ + case TX79_MMI2_PREVH: /* TODO: TX79_MMI2_PREVH */ + case TX79_MMI2_PMULTH: /* TODO: TX79_MMI2_PMULTH */ + case TX79_MMI2_PDIVBW: /* TODO: TX79_MMI2_PDIVBW */ + case TX79_MMI2_PEXEW: /* TODO: TX79_MMI2_PEXEW */ + case TX79_MMI2_PROT3W: /* TODO: TX79_MMI2_PROT3W */ + generate_exception_end(ctx, EXCP_RI); /* TODO: TX79_MMI_CLASS_MMI2 */ + break; + default: + MIPS_INVAL("TX79 MMI class MMI2"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +static void decode_tx79_mmi3(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opc = MASK_TX79_MMI3(ctx->opcode); + + switch (opc) { + case TX79_MMI3_PMADDUW: /* TODO: TX79_MMI3_PMADDUW */ + case TX79_MMI3_PSRAVW: /* TODO: TX79_MMI3_PSRAVW */ + case TX79_MMI3_PMTHI: /* TODO: TX79_MMI3_PMTHI */ + case TX79_MMI3_PMTLO: /* TODO: TX79_MMI3_PMTLO */ + case TX79_MMI3_PINTEH: /* TODO: TX79_MMI3_PINTEH */ + case TX79_MMI3_PMULTUW: /* TODO: TX79_MMI3_PMULTUW */ + case TX79_MMI3_PDIVUW: /* TODO: TX79_MMI3_PDIVUW */ + case TX79_MMI3_PCPYUD: /* TODO: TX79_MMI3_PCPYUD */ + case TX79_MMI3_POR: /* TODO: TX79_MMI3_POR */ + case TX79_MMI3_PNOR: /* TODO: TX79_MMI3_PNOR */ + case TX79_MMI3_PEXCH: /* TODO: TX79_MMI3_PEXCH */ + case TX79_MMI3_PCPYH: /* TODO: TX79_MMI3_PCPYH */ + case TX79_MMI3_PEXCW: /* TODO: TX79_MMI3_PEXCW */ + generate_exception_end(ctx, EXCP_RI); /* TODO: TX79_MMI_CLASS_MMI3 */ + break; + default: + MIPS_INVAL("TX79 MMI class MMI3"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +static void decode_tx79_mmi(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opc = MASK_TX79_MMI(ctx->opcode); + int rs = extract32(ctx->opcode, 21, 5); + int rt = extract32(ctx->opcode, 16, 5); + int rd = extract32(ctx->opcode, 11, 5); + + switch (opc) { + case TX79_MMI_CLASS_MMI0: + decode_tx79_mmi0(env, ctx); + break; + case TX79_MMI_CLASS_MMI1: + decode_tx79_mmi1(env, ctx); + break; + case TX79_MMI_CLASS_MMI2: + decode_tx79_mmi2(env, ctx); + break; + case TX79_MMI_CLASS_MMI3: + decode_tx79_mmi3(env, ctx); + break; + case TX79_MMI_MULT1: + case TX79_MMI_MULTU1: + gen_mul_txx9(ctx, opc, rd, rs, rt); + break; + case TX79_MMI_DIV1: + case TX79_MMI_DIVU1: + gen_muldiv(ctx, opc, 1, rs, rt); + break; + case TX79_MMI_MTLO1: + case TX79_MMI_MTHI1: + gen_HILO(ctx, opc, 1, rs); + break; + case TX79_MMI_MFLO1: + case TX79_MMI_MFHI1: + gen_HILO(ctx, opc, 1, rd); + break; + case TX79_MMI_MADD: /* TODO: TX79_MMI_MADD */ + case TX79_MMI_MADDU: /* TODO: TX79_MMI_MADDU */ + case TX79_MMI_PLZCW: /* TODO: TX79_MMI_PLZCW */ + case TX79_MMI_MADD1: /* TODO: TX79_MMI_MADD1 */ + case TX79_MMI_MADDU1: /* TODO: TX79_MMI_MADDU1 */ + case TX79_MMI_PMFHL: /* TODO: TX79_MMI_PMFHL */ + case TX79_MMI_PMTHL: /* TODO: TX79_MMI_PMTHL */ + case TX79_MMI_PSLLH: /* TODO: TX79_MMI_PSLLH */ + case TX79_MMI_PSRLH: /* TODO: TX79_MMI_PSRLH */ + case TX79_MMI_PSRAH: /* TODO: TX79_MMI_PSRAH */ + case TX79_MMI_PSLLW: /* TODO: TX79_MMI_PSLLW */ + case TX79_MMI_PSRLW: /* TODO: TX79_MMI_PSRLW */ + case TX79_MMI_PSRAW: /* TODO: TX79_MMI_PSRAW */ + generate_exception_end(ctx, EXCP_RI); /* TODO: TX79_CLASS_MMI */ + break; + default: + MIPS_INVAL("TX79 MMI class"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +static void decode_tx79_lq(CPUMIPSState *env, DisasContext *ctx) +{ + generate_exception_end(ctx, EXCP_RI); /* TODO: TX79_LQ */ +} + +static void gen_tx79_sq(DisasContext *ctx, int base, int rt, int offset) +{ + generate_exception_end(ctx, EXCP_RI); /* TODO: TX79_SQ */ +} + +/* + * The TX79-specific instruction Store Quadword + * + * +--------+-------+-------+------------------------+ + * | 011111 | base | rt | offset | SQ + * +--------+-------+-------+------------------------+ + * 6 5 5 16 + * + * has the same opcode as the Read Hardware Register instruction + * + * +--------+-------+-------+-------+-------+--------+ + * | 011111 | 00000 | rt | rd | 00000 | 111011 | RDHWR + * +--------+-------+-------+-------+-------+--------+ + * 6 5 5 5 5 6 + * + * that is required, trapped and emulated by the Linux kernel. However, all + * RDHWR encodings yield address error exceptions on the TX79 since the SQ + * offset is odd. Therefore all valid SQ instructions can execute normally. + * In user mode, QEMU must verify the upper and lower 11 bits to distinguish + * between SQ and RDHWR, as the Linux kernel does. + */ +static void decode_tx79_sq(CPUMIPSState *env, DisasContext *ctx) +{ + int base = extract32(ctx->opcode, 21, 5); + int rt = extract32(ctx->opcode, 16, 5); + int offset = extract32(ctx->opcode, 0, 16); + +#ifdef CONFIG_USER_ONLY + uint32_t op1 = MASK_SPECIAL3(ctx->opcode); + uint32_t op2 = extract32(ctx->opcode, 6, 5); + + if (base == 0 && op2 == 0 && op1 == OPC_RDHWR) { + int rd = extract32(ctx->opcode, 11, 5); + + gen_rdhwr(ctx, rt, rd, 0); + return; + } +#endif + + gen_tx79_sq(ctx, base, rt, offset); +} + static void decode_opc_special3(CPUMIPSState *env, DisasContext *ctx) { int rs, rt, rd, sa; @@ -23380,7 +26652,9 @@ static void decode_opc_special3(CPUMIPSState *env, DisasContext *ctx) op2 = MASK_BSHFL(ctx->opcode); switch (op2) { case OPC_ALIGN: - case OPC_ALIGN_END: + case OPC_ALIGN_1: + case OPC_ALIGN_2: + case OPC_ALIGN_3: case OPC_BITSWAP: check_insn(ctx, ISA_MIPS32R6); decode_opc_special3_r6(env, ctx); @@ -23406,7 +26680,13 @@ static void decode_opc_special3(CPUMIPSState *env, DisasContext *ctx) op2 = MASK_DBSHFL(ctx->opcode); switch (op2) { case OPC_DALIGN: - case OPC_DALIGN_END: + case OPC_DALIGN_1: + case OPC_DALIGN_2: + case OPC_DALIGN_3: + case OPC_DALIGN_4: + case OPC_DALIGN_5: + case OPC_DALIGN_6: + case OPC_DALIGN_7: case OPC_DBITSWAP: check_insn(ctx, ISA_MIPS32R6); decode_opc_special3_r6(env, ctx); @@ -24605,10 +27885,20 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) decode_opc_special(env, ctx); break; case OPC_SPECIAL2: - decode_opc_special2_legacy(env, ctx); + if ((ctx->insn_flags & INSN_R5900) && (ctx->insn_flags & ASE_MMI)) { + decode_tx79_mmi(env, ctx); + } else if (ctx->insn_flags & ASE_MXU) { + decode_opc_mxu(env, ctx); + } else { + decode_opc_special2_legacy(env, ctx); + } break; case OPC_SPECIAL3: - decode_opc_special3(env, ctx); + if (ctx->insn_flags & INSN_R5900) { + decode_tx79_sq(env, ctx); /* TX79_SQ */ + } else { + decode_opc_special3(env, ctx); + } break; case OPC_REGIMM: op1 = MASK_REGIMM(ctx->opcode); @@ -24895,6 +28185,7 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) break; case OPC_LL: /* Load and stores */ check_insn(ctx, ISA_MIPS2); + check_insn_opc_user_only(ctx, INSN_R5900); /* Fallthrough */ case OPC_LWL: case OPC_LWR: @@ -24920,6 +28211,7 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) case OPC_SC: check_insn(ctx, ISA_MIPS2); check_insn_opc_removed(ctx, ISA_MIPS32R6); + check_insn_opc_user_only(ctx, INSN_R5900); gen_st_cond(ctx, op, rt, rs, imm); break; case OPC_CACHE: @@ -24933,7 +28225,8 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) break; case OPC_PREF: check_insn_opc_removed(ctx, ISA_MIPS32R6); - check_insn(ctx, ISA_MIPS4 | ISA_MIPS32); + check_insn(ctx, ISA_MIPS4 | ISA_MIPS32 | + INSN_R5900); /* Treat as NOP. */ break; @@ -25185,9 +28478,11 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) #if defined(TARGET_MIPS64) /* MIPS64 opcodes */ + case OPC_LLD: + check_insn_opc_user_only(ctx, INSN_R5900); + /* fall through */ case OPC_LDL: case OPC_LDR: - case OPC_LLD: check_insn_opc_removed(ctx, ISA_MIPS32R6); /* fall through */ case OPC_LWU: @@ -25208,6 +28503,7 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) case OPC_SCD: check_insn_opc_removed(ctx, ISA_MIPS32R6); check_insn(ctx, ISA_MIPS3); + check_insn_opc_user_only(ctx, INSN_R5900); check_mips_64(ctx); gen_st_cond(ctx, op, rt, rs, imm); break; @@ -25262,8 +28558,12 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) } break; case OPC_MSA: /* OPC_MDMX */ - /* MDMX: Not implemented. */ - gen_msa(env, ctx); + if (ctx->insn_flags & INSN_R5900) { + decode_tx79_lq(env, ctx); /* TX79_LQ */ + } else { + /* MDMX: Not implemented. */ + gen_msa(env, ctx); + } break; case OPC_PCREL: check_insn(ctx, ISA_MIPS32R6); @@ -25285,6 +28585,7 @@ static void mips_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->saved_pc = -1; ctx->insn_flags = env->insn_flags; ctx->CP0_Config1 = env->CP0_Config1; + ctx->CP0_Config2 = env->CP0_Config2; ctx->CP0_Config3 = env->CP0_Config3; ctx->CP0_Config5 = env->CP0_Config5; ctx->btarget = 0; @@ -25585,6 +28886,17 @@ void mips_tcg_init(void) fpu_fcr31 = tcg_global_mem_new_i32(cpu_env, offsetof(CPUMIPSState, active_fpu.fcr31), "fcr31"); + + for (i = 0; i < NUMBER_OF_MXU_REGISTERS - 1; i++) { + mxu_gpr[i] = tcg_global_mem_new(cpu_env, + offsetof(CPUMIPSState, + active_tc.mxu_gpr[i]), + mxuregnames[i]); + } + + mxu_CR = tcg_global_mem_new(cpu_env, + offsetof(CPUMIPSState, active_tc.mxu_cr), + mxuregnames[NUMBER_OF_MXU_REGISTERS - 1]); } #include "translate_init.inc.c" @@ -25799,6 +29111,24 @@ void cpu_state_reset(CPUMIPSState *env) env->CP0_Status |= (1 << CP0St_FR); } + if (env->insn_flags & ISA_MIPS32R6) { + /* PTW = 1 */ + env->CP0_PWSize = 0x40; + /* GDI = 12 */ + /* UDI = 12 */ + /* MDI = 12 */ + /* PRI = 12 */ + /* PTEI = 2 */ + env->CP0_PWField = 0x0C30C302; + } else { + /* GDI = 0 */ + /* UDI = 0 */ + /* MDI = 0 */ + /* PRI = 0 */ + /* PTEI = 2 */ + env->CP0_PWField = 0x02; + } + if (env->CP0_Config3 & (1 << CP0C3_ISA) & (1 << (CP0C3_ISA + 1))) { /* microMIPS on reset when Config3.ISA is 3 */ env->hflags |= MIPS_HFLAG_M16; diff --git a/target/mips/translate_init.inc.c b/target/mips/translate_init.inc.c index b3320b9dc7..85da4a269c 100644 --- a/target/mips/translate_init.inc.c +++ b/target/mips/translate_init.inc.c @@ -320,7 +320,7 @@ const mips_def_t mips_defs[] = .CP1_fcr31_rw_bitmask = 0xFF83FFFF, .SEGBITS = 32, .PABITS = 32, - .insn_flags = CPU_MIPS32R2 | ASE_MIPS16 | ASE_DSP | ASE_DSPR2, + .insn_flags = CPU_MIPS32R2 | ASE_MIPS16 | ASE_DSP | ASE_DSP_R2, .mmu_type = MMU_TYPE_R4000, }, { @@ -411,6 +411,65 @@ const mips_def_t mips_defs[] = .mmu_type = MMU_TYPE_R4000, }, { + /* + * The Toshiba TX System RISC TX79 Core Architecture manual + * + * https://wiki.qemu.org/File:C790.pdf + * + * describes the C790 processor that is a follow-up to the R5900. + * There are a few notable differences in that the R5900 FPU + * + * - is not IEEE 754-1985 compliant, + * - does not implement double format, and + * - its machine code is nonstandard. + */ + .name = "R5900", + .CP0_PRid = 0x00002E00, + /* No L2 cache, icache size 32k, dcache size 32k, uncached coherency. */ + .CP0_Config0 = (0x3 << 9) | (0x3 << 6) | (0x2 << CP0C0_K0), + .CP0_Status_rw_bitmask = 0xF4C79C1F, +#ifdef CONFIG_USER_ONLY + /* + * R5900 hardware traps to the Linux kernel for IEEE 754-1985 and LL/SC + * emulation. For user only, QEMU is the kernel, so we emulate the traps + * by simply emulating the instructions directly. + * + * Note: Config1 is only used internally, the R5900 has only Config0. + */ + .CP0_Config1 = (1 << CP0C1_FP) | (47 << CP0C1_MMU), + .CP0_LLAddr_rw_bitmask = 0xFFFFFFFF, + .CP0_LLAddr_shift = 4, + .CP1_fcr0 = (0x38 << FCR0_PRID) | (0x0 << FCR0_REV), + .CP1_fcr31 = 0, + .CP1_fcr31_rw_bitmask = 0x0183FFFF, +#else + /* + * The R5900 COP1 FPU implements single-precision floating-point + * operations but is not entirely IEEE 754-1985 compatible. In + * particular, + * + * - NaN (not a number) and +/- infinities are not supported; + * - exception mechanisms are not fully supported; + * - denormalized numbers are not supported; + * - rounding towards nearest and +/- infinities are not supported; + * - computed results usually differs in the least significant bit; + * - saturations can differ more than the least significant bit. + * + * Since only rounding towards zero is supported, the two least + * significant bits of FCR31 are hardwired to 01. + * + * FPU emulation is disabled here until it is implemented. + * + * Note: Config1 is only used internally, the R5900 has only Config0. + */ + .CP0_Config1 = (47 << CP0C1_MMU), +#endif /* !CONFIG_USER_ONLY */ + .SEGBITS = 32, + .PABITS = 32, + .insn_flags = CPU_R5900 | ASE_MMI, + .mmu_type = MMU_TYPE_R4000, + }, + { /* A generic CPU supporting MIPS32 Release 6 ISA. FIXME: Support IEEE 754-2008 FP. Eventually this should be replaced by a real CPU model. */ @@ -485,7 +544,8 @@ const mips_def_t mips_defs[] = .CP1_fcr31 = (1 << FCR31_ABS2008) | (1 << FCR31_NAN2008), .SEGBITS = 32, .PABITS = 32, - .insn_flags = CPU_NANOMIPS32 | ASE_DSP | ASE_DSPR2 | ASE_MT, + .insn_flags = CPU_NANOMIPS32 | ASE_DSP | ASE_DSP_R2 | ASE_DSP_R3 | + ASE_MT, .mmu_type = MMU_TYPE_R4000, }, #if defined(TARGET_MIPS64) @@ -761,7 +821,7 @@ const mips_def_t mips_defs[] = .mmu_type = MMU_TYPE_R4000, }, { - /* A generic CPU providing MIPS64 ASE DSP 2 features. + /* A generic CPU providing MIPS64 DSP R2 ASE features. FIXME: Eventually this should be replaced by a real CPU model. */ .name = "mips64dspr2", .CP0_PRid = 0x00010000, @@ -786,7 +846,7 @@ const mips_def_t mips_defs[] = .CP1_fcr31_rw_bitmask = 0xFF83FFFF, .SEGBITS = 42, .PABITS = 36, - .insn_flags = CPU_MIPS64R2 | ASE_DSP | ASE_DSPR2, + .insn_flags = CPU_MIPS64R2 | ASE_DSP | ASE_DSP_R2, .mmu_type = MMU_TYPE_R4000, }, diff --git a/target/ppc/helper.h b/target/ppc/helper.h index ef64248bc4..7a1481fd0b 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -800,7 +800,7 @@ DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32) DEF_HELPER_1(tbegin, void, env) DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env) -#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128) +#ifdef TARGET_PPC64 DEF_HELPER_FLAGS_3(lq_le_parallel, TCG_CALL_NO_WG, i64, env, tl, i32) DEF_HELPER_FLAGS_3(lq_be_parallel, TCG_CALL_NO_WG, i64, env, tl, i32) DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG, diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c index 8f0d86d104..a1485fad9b 100644 --- a/target/ppc/mem_helper.c +++ b/target/ppc/mem_helper.c @@ -25,6 +25,7 @@ #include "exec/cpu_ldst.h" #include "tcg.h" #include "internal.h" +#include "qemu/atomic128.h" //#define DEBUG_OP @@ -215,11 +216,15 @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong addr, uint32_t reg, return i; } -#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128) +#ifdef TARGET_PPC64 uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr, uint32_t opidx) { - Int128 ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC()); + Int128 ret; + + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_ATOMIC128); + ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC()); env->retxh = int128_gethi(ret); return int128_getlo(ret); } @@ -227,7 +232,11 @@ uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr, uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr, uint32_t opidx) { - Int128 ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC()); + Int128 ret; + + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_ATOMIC128); + ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC()); env->retxh = int128_gethi(ret); return int128_getlo(ret); } @@ -235,14 +244,22 @@ uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr, void helper_stq_le_parallel(CPUPPCState *env, target_ulong addr, uint64_t lo, uint64_t hi, uint32_t opidx) { - Int128 val = int128_make128(lo, hi); + Int128 val; + + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_ATOMIC128); + val = int128_make128(lo, hi); helper_atomic_sto_le_mmu(env, addr, val, opidx, GETPC()); } void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr, uint64_t lo, uint64_t hi, uint32_t opidx) { - Int128 val = int128_make128(lo, hi); + Int128 val; + + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_ATOMIC128); + val = int128_make128(lo, hi); helper_atomic_sto_be_mmu(env, addr, val, opidx, GETPC()); } @@ -252,6 +269,9 @@ uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr, { bool success = false; + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_CMPXCHG128); + if (likely(addr == env->reserve_addr)) { Int128 oldv, cmpv, newv; @@ -271,6 +291,9 @@ uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr, { bool success = false; + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_CMPXCHG128); + if (likely(addr == env->reserve_addr)) { Int128 oldv, cmpv, newv; diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 881743571b..4e59dd5f42 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -33,6 +33,7 @@ #include "trace-tcg.h" #include "exec/translator.h" #include "exec/log.h" +#include "qemu/atomic128.h" #define CPU_SINGLE_STEP 0x1 @@ -2654,22 +2655,22 @@ static void gen_lq(DisasContext *ctx) hi = cpu_gpr[rd]; if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { -#ifdef CONFIG_ATOMIC128 - TCGv_i32 oi = tcg_temp_new_i32(); - if (ctx->le_mode) { - tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); - gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); + if (HAVE_ATOMIC128) { + TCGv_i32 oi = tcg_temp_new_i32(); + if (ctx->le_mode) { + tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); + gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); + } else { + tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); + gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); + } + tcg_temp_free_i32(oi); + tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); } else { - tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); - gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); + /* Restart with exclusive lock. */ + gen_helper_exit_atomic(cpu_env); + ctx->base.is_jmp = DISAS_NORETURN; } - tcg_temp_free_i32(oi); - tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); -#else - /* Restart with exclusive lock. */ - gen_helper_exit_atomic(cpu_env); - ctx->base.is_jmp = DISAS_NORETURN; -#endif } else if (ctx->le_mode) { tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ); gen_addr_add(ctx, EA, EA, 8); @@ -2805,21 +2806,21 @@ static void gen_std(DisasContext *ctx) hi = cpu_gpr[rs]; if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { -#ifdef CONFIG_ATOMIC128 - TCGv_i32 oi = tcg_temp_new_i32(); - if (ctx->le_mode) { - tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); - gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi); + if (HAVE_ATOMIC128) { + TCGv_i32 oi = tcg_temp_new_i32(); + if (ctx->le_mode) { + tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); + gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi); + } else { + tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); + gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi); + } + tcg_temp_free_i32(oi); } else { - tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); - gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi); + /* Restart with exclusive lock. */ + gen_helper_exit_atomic(cpu_env); + ctx->base.is_jmp = DISAS_NORETURN; } - tcg_temp_free_i32(oi); -#else - /* Restart with exclusive lock. */ - gen_helper_exit_atomic(cpu_env); - ctx->base.is_jmp = DISAS_NORETURN; -#endif } else if (ctx->le_mode) { tcg_gen_qemu_st_i64(lo, EA, ctx->mem_idx, MO_LEQ); gen_addr_add(ctx, EA, EA, 8); @@ -3404,26 +3405,26 @@ static void gen_lqarx(DisasContext *ctx) hi = cpu_gpr[rd]; if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { -#ifdef CONFIG_ATOMIC128 - TCGv_i32 oi = tcg_temp_new_i32(); - if (ctx->le_mode) { - tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16, - ctx->mem_idx)); - gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); + if (HAVE_ATOMIC128) { + TCGv_i32 oi = tcg_temp_new_i32(); + if (ctx->le_mode) { + tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16, + ctx->mem_idx)); + gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); + } else { + tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16, + ctx->mem_idx)); + gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); + } + tcg_temp_free_i32(oi); + tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); } else { - tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16, - ctx->mem_idx)); - gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); + /* Restart with exclusive lock. */ + gen_helper_exit_atomic(cpu_env); + ctx->base.is_jmp = DISAS_NORETURN; + tcg_temp_free(EA); + return; } - tcg_temp_free_i32(oi); - tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); -#else - /* Restart with exclusive lock. */ - gen_helper_exit_atomic(cpu_env); - ctx->base.is_jmp = DISAS_NORETURN; - tcg_temp_free(EA); - return; -#endif } else if (ctx->le_mode) { tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ | MO_ALIGN_16); tcg_gen_mov_tl(cpu_reserve, EA); @@ -3461,20 +3462,22 @@ static void gen_stqcx_(DisasContext *ctx) hi = cpu_gpr[rs]; if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { - TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16); -#ifdef CONFIG_ATOMIC128 - if (ctx->le_mode) { - gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi); + if (HAVE_CMPXCHG128) { + TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16); + if (ctx->le_mode) { + gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, + EA, lo, hi, oi); + } else { + gen_helper_stqcx_be_parallel(cpu_crf[0], cpu_env, + EA, lo, hi, oi); + } + tcg_temp_free_i32(oi); } else { - gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi); + /* Restart with exclusive lock. */ + gen_helper_exit_atomic(cpu_env); + ctx->base.is_jmp = DISAS_NORETURN; } -#else - /* Restart with exclusive lock. */ - gen_helper_exit_atomic(cpu_env); - ctx->base.is_jmp = DISAS_NORETURN; -#endif tcg_temp_free(EA); - tcg_temp_free_i32(oi); } else { TCGLabel *lab_fail = gen_new_label(); TCGLabel *lab_over = gen_new_label(); diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c index 263e63cb03..ee9432eb15 100644 --- a/target/ppc/translate_init.inc.c +++ b/target/ppc/translate_init.inc.c @@ -8381,8 +8381,8 @@ static void getset_compat_deprecated(Object *obj, Visitor *v, const char *name, QNull *null = NULL; if (!qtest_enabled()) { - error_report("CPU 'compat' property is deprecated and has no effect; " - "use max-cpu-compat machine property instead"); + warn_report("CPU 'compat' property is deprecated and has no effect; " + "use max-cpu-compat machine property instead"); } visit_type_null(v, name, &null, NULL); qobject_unref(null); diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs index abd0a7cde3..fcc5d34c1f 100644 --- a/target/riscv/Makefile.objs +++ b/target/riscv/Makefile.objs @@ -1 +1 @@ -obj-y += translate.o op_helper.o helper.o cpu.o fpu_helper.o gdbstub.o pmp.o +obj-y += translate.o op_helper.o cpu_helper.o cpu.o fpu_helper.o gdbstub.o pmp.o diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index d630e8fd6c..a025a0a3ba 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -74,8 +74,10 @@ const char * const riscv_intr_names[] = { "s_external", "h_external", "m_external", - "coprocessor", - "host" + "reserved", + "reserved", + "reserved", + "reserved" }; typedef struct RISCVCPUInfo { diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index d4f36295f0..4ee09b9cff 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -126,13 +126,18 @@ struct CPURISCVState { target_ulong mhartid; target_ulong mstatus; + /* * CAUTION! Unlike the rest of this struct, mip is accessed asynchonously - * by I/O threads and other vCPUs, so hold the iothread mutex before - * operating on it. CPU_INTERRUPT_HARD should be in effect iff this is - * non-zero. Use riscv_cpu_set_local_interrupt. + * by I/O threads. It should be read with atomic_read. It should be updated + * using riscv_cpu_update_mip with the iothread mutex held. The iothread + * mutex must be held because mip must be consistent with the CPU inturrept + * state. riscv_cpu_update_mip calls cpu_interrupt or cpu_reset_interrupt + * wuth the invariant that CPU_INTERRUPT_HARD is set iff mip is non-zero. + * mip is 32-bits to allow atomic_read on 32-bit hosts. */ - uint32_t mip; /* allow atomic_read for >= 32-bit hosts */ + uint32_t mip; + target_ulong mie; target_ulong mideleg; @@ -247,7 +252,6 @@ void riscv_cpu_do_unaligned_access(CPUState *cs, vaddr addr, uintptr_t retaddr); int riscv_cpu_handle_mmu_fault(CPUState *cpu, vaddr address, int size, int rw, int mmu_idx); - char *riscv_isa_string(RISCVCPU *cpu); void riscv_cpu_list(FILE *f, fprintf_function cpu_fprintf); @@ -255,6 +259,10 @@ void riscv_cpu_list(FILE *f, fprintf_function cpu_fprintf); #define cpu_list riscv_cpu_list #define cpu_mmu_index riscv_cpu_mmu_index +#ifndef CONFIG_USER_ONLY +uint32_t riscv_cpu_update_mip(RISCVCPU *cpu, uint32_t mask, uint32_t value); +#define BOOL_TO_MASK(x) (-!!(x)) /* helper for riscv_cpu_update_mip value */ +#endif void riscv_set_mode(CPURISCVState *env, target_ulong newpriv); void riscv_translate_init(void); @@ -285,10 +293,6 @@ void csr_write_helper(CPURISCVState *env, target_ulong val_to_write, target_ulong csrno); target_ulong csr_read_helper(CPURISCVState *env, target_ulong csrno); -#ifndef CONFIG_USER_ONLY -void riscv_set_local_interrupt(RISCVCPU *cpu, target_ulong mask, int value); -#endif - #include "exec/cpu-all.h" #endif /* RISCV_CPU_H */ diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h index 12b4757088..5439f4719e 100644 --- a/target/riscv/cpu_bits.h +++ b/target/riscv/cpu_bits.h @@ -6,242 +6,283 @@ (((target_ulong)(val) * ((mask) & ~((mask) << 1))) & \ (target_ulong)(mask))) -#define PGSHIFT 12 - -#define FSR_RD_SHIFT 5 -#define FSR_RD (0x7 << FSR_RD_SHIFT) - -#define FPEXC_NX 0x01 -#define FPEXC_UF 0x02 -#define FPEXC_OF 0x04 -#define FPEXC_DZ 0x08 -#define FPEXC_NV 0x10 - -#define FSR_AEXC_SHIFT 0 -#define FSR_NVA (FPEXC_NV << FSR_AEXC_SHIFT) -#define FSR_OFA (FPEXC_OF << FSR_AEXC_SHIFT) -#define FSR_UFA (FPEXC_UF << FSR_AEXC_SHIFT) -#define FSR_DZA (FPEXC_DZ << FSR_AEXC_SHIFT) -#define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT) -#define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA) - -/* CSR numbers */ -#define CSR_FFLAGS 0x1 -#define CSR_FRM 0x2 -#define CSR_FCSR 0x3 -#define CSR_CYCLE 0xc00 -#define CSR_TIME 0xc01 -#define CSR_INSTRET 0xc02 -#define CSR_HPMCOUNTER3 0xc03 -#define CSR_HPMCOUNTER4 0xc04 -#define CSR_HPMCOUNTER5 0xc05 -#define CSR_HPMCOUNTER6 0xc06 -#define CSR_HPMCOUNTER7 0xc07 -#define CSR_HPMCOUNTER8 0xc08 -#define CSR_HPMCOUNTER9 0xc09 -#define CSR_HPMCOUNTER10 0xc0a -#define CSR_HPMCOUNTER11 0xc0b -#define CSR_HPMCOUNTER12 0xc0c -#define CSR_HPMCOUNTER13 0xc0d -#define CSR_HPMCOUNTER14 0xc0e -#define CSR_HPMCOUNTER15 0xc0f -#define CSR_HPMCOUNTER16 0xc10 -#define CSR_HPMCOUNTER17 0xc11 -#define CSR_HPMCOUNTER18 0xc12 -#define CSR_HPMCOUNTER19 0xc13 -#define CSR_HPMCOUNTER20 0xc14 -#define CSR_HPMCOUNTER21 0xc15 -#define CSR_HPMCOUNTER22 0xc16 -#define CSR_HPMCOUNTER23 0xc17 -#define CSR_HPMCOUNTER24 0xc18 -#define CSR_HPMCOUNTER25 0xc19 -#define CSR_HPMCOUNTER26 0xc1a -#define CSR_HPMCOUNTER27 0xc1b -#define CSR_HPMCOUNTER28 0xc1c -#define CSR_HPMCOUNTER29 0xc1d -#define CSR_HPMCOUNTER30 0xc1e -#define CSR_HPMCOUNTER31 0xc1f -#define CSR_SSTATUS 0x100 -#define CSR_SIE 0x104 -#define CSR_STVEC 0x105 -#define CSR_SCOUNTEREN 0x106 -#define CSR_SSCRATCH 0x140 -#define CSR_SEPC 0x141 -#define CSR_SCAUSE 0x142 -#define CSR_SBADADDR 0x143 -#define CSR_SIP 0x144 -#define CSR_SPTBR 0x180 -#define CSR_SATP 0x180 -#define CSR_MSTATUS 0x300 -#define CSR_MISA 0x301 -#define CSR_MEDELEG 0x302 -#define CSR_MIDELEG 0x303 -#define CSR_MIE 0x304 -#define CSR_MTVEC 0x305 -#define CSR_MCOUNTEREN 0x306 -#define CSR_MSCRATCH 0x340 -#define CSR_MEPC 0x341 -#define CSR_MCAUSE 0x342 -#define CSR_MBADADDR 0x343 -#define CSR_MIP 0x344 -#define CSR_PMPCFG0 0x3a0 -#define CSR_PMPCFG1 0x3a1 -#define CSR_PMPCFG2 0x3a2 -#define CSR_PMPCFG3 0x3a3 -#define CSR_PMPADDR0 0x3b0 -#define CSR_PMPADDR1 0x3b1 -#define CSR_PMPADDR2 0x3b2 -#define CSR_PMPADDR3 0x3b3 -#define CSR_PMPADDR4 0x3b4 -#define CSR_PMPADDR5 0x3b5 -#define CSR_PMPADDR6 0x3b6 -#define CSR_PMPADDR7 0x3b7 -#define CSR_PMPADDR8 0x3b8 -#define CSR_PMPADDR9 0x3b9 -#define CSR_PMPADDR10 0x3ba -#define CSR_PMPADDR11 0x3bb -#define CSR_PMPADDR12 0x3bc -#define CSR_PMPADDR13 0x3bd -#define CSR_PMPADDR14 0x3be -#define CSR_PMPADDR15 0x3bf -#define CSR_TSELECT 0x7a0 -#define CSR_TDATA1 0x7a1 -#define CSR_TDATA2 0x7a2 -#define CSR_TDATA3 0x7a3 -#define CSR_DCSR 0x7b0 -#define CSR_DPC 0x7b1 -#define CSR_DSCRATCH 0x7b2 -#define CSR_MCYCLE 0xb00 -#define CSR_MINSTRET 0xb02 -#define CSR_MHPMCOUNTER3 0xb03 -#define CSR_MHPMCOUNTER4 0xb04 -#define CSR_MHPMCOUNTER5 0xb05 -#define CSR_MHPMCOUNTER6 0xb06 -#define CSR_MHPMCOUNTER7 0xb07 -#define CSR_MHPMCOUNTER8 0xb08 -#define CSR_MHPMCOUNTER9 0xb09 -#define CSR_MHPMCOUNTER10 0xb0a -#define CSR_MHPMCOUNTER11 0xb0b -#define CSR_MHPMCOUNTER12 0xb0c -#define CSR_MHPMCOUNTER13 0xb0d -#define CSR_MHPMCOUNTER14 0xb0e -#define CSR_MHPMCOUNTER15 0xb0f -#define CSR_MHPMCOUNTER16 0xb10 -#define CSR_MHPMCOUNTER17 0xb11 -#define CSR_MHPMCOUNTER18 0xb12 -#define CSR_MHPMCOUNTER19 0xb13 -#define CSR_MHPMCOUNTER20 0xb14 -#define CSR_MHPMCOUNTER21 0xb15 -#define CSR_MHPMCOUNTER22 0xb16 -#define CSR_MHPMCOUNTER23 0xb17 -#define CSR_MHPMCOUNTER24 0xb18 -#define CSR_MHPMCOUNTER25 0xb19 -#define CSR_MHPMCOUNTER26 0xb1a -#define CSR_MHPMCOUNTER27 0xb1b -#define CSR_MHPMCOUNTER28 0xb1c -#define CSR_MHPMCOUNTER29 0xb1d -#define CSR_MHPMCOUNTER30 0xb1e -#define CSR_MHPMCOUNTER31 0xb1f -#define CSR_MUCOUNTEREN 0x320 -#define CSR_MSCOUNTEREN 0x321 -#define CSR_MHPMEVENT3 0x323 -#define CSR_MHPMEVENT4 0x324 -#define CSR_MHPMEVENT5 0x325 -#define CSR_MHPMEVENT6 0x326 -#define CSR_MHPMEVENT7 0x327 -#define CSR_MHPMEVENT8 0x328 -#define CSR_MHPMEVENT9 0x329 -#define CSR_MHPMEVENT10 0x32a -#define CSR_MHPMEVENT11 0x32b -#define CSR_MHPMEVENT12 0x32c -#define CSR_MHPMEVENT13 0x32d -#define CSR_MHPMEVENT14 0x32e -#define CSR_MHPMEVENT15 0x32f -#define CSR_MHPMEVENT16 0x330 -#define CSR_MHPMEVENT17 0x331 -#define CSR_MHPMEVENT18 0x332 -#define CSR_MHPMEVENT19 0x333 -#define CSR_MHPMEVENT20 0x334 -#define CSR_MHPMEVENT21 0x335 -#define CSR_MHPMEVENT22 0x336 -#define CSR_MHPMEVENT23 0x337 -#define CSR_MHPMEVENT24 0x338 -#define CSR_MHPMEVENT25 0x339 -#define CSR_MHPMEVENT26 0x33a -#define CSR_MHPMEVENT27 0x33b -#define CSR_MHPMEVENT28 0x33c -#define CSR_MHPMEVENT29 0x33d -#define CSR_MHPMEVENT30 0x33e -#define CSR_MHPMEVENT31 0x33f -#define CSR_MVENDORID 0xf11 -#define CSR_MARCHID 0xf12 -#define CSR_MIMPID 0xf13 -#define CSR_MHARTID 0xf14 -#define CSR_CYCLEH 0xc80 -#define CSR_TIMEH 0xc81 -#define CSR_INSTRETH 0xc82 -#define CSR_HPMCOUNTER3H 0xc83 -#define CSR_HPMCOUNTER4H 0xc84 -#define CSR_HPMCOUNTER5H 0xc85 -#define CSR_HPMCOUNTER6H 0xc86 -#define CSR_HPMCOUNTER7H 0xc87 -#define CSR_HPMCOUNTER8H 0xc88 -#define CSR_HPMCOUNTER9H 0xc89 -#define CSR_HPMCOUNTER10H 0xc8a -#define CSR_HPMCOUNTER11H 0xc8b -#define CSR_HPMCOUNTER12H 0xc8c -#define CSR_HPMCOUNTER13H 0xc8d -#define CSR_HPMCOUNTER14H 0xc8e -#define CSR_HPMCOUNTER15H 0xc8f -#define CSR_HPMCOUNTER16H 0xc90 -#define CSR_HPMCOUNTER17H 0xc91 -#define CSR_HPMCOUNTER18H 0xc92 -#define CSR_HPMCOUNTER19H 0xc93 -#define CSR_HPMCOUNTER20H 0xc94 -#define CSR_HPMCOUNTER21H 0xc95 -#define CSR_HPMCOUNTER22H 0xc96 -#define CSR_HPMCOUNTER23H 0xc97 -#define CSR_HPMCOUNTER24H 0xc98 -#define CSR_HPMCOUNTER25H 0xc99 -#define CSR_HPMCOUNTER26H 0xc9a -#define CSR_HPMCOUNTER27H 0xc9b -#define CSR_HPMCOUNTER28H 0xc9c -#define CSR_HPMCOUNTER29H 0xc9d -#define CSR_HPMCOUNTER30H 0xc9e -#define CSR_HPMCOUNTER31H 0xc9f -#define CSR_MCYCLEH 0xb80 -#define CSR_MINSTRETH 0xb82 -#define CSR_MHPMCOUNTER3H 0xb83 -#define CSR_MHPMCOUNTER4H 0xb84 -#define CSR_MHPMCOUNTER5H 0xb85 -#define CSR_MHPMCOUNTER6H 0xb86 -#define CSR_MHPMCOUNTER7H 0xb87 -#define CSR_MHPMCOUNTER8H 0xb88 -#define CSR_MHPMCOUNTER9H 0xb89 -#define CSR_MHPMCOUNTER10H 0xb8a -#define CSR_MHPMCOUNTER11H 0xb8b -#define CSR_MHPMCOUNTER12H 0xb8c -#define CSR_MHPMCOUNTER13H 0xb8d -#define CSR_MHPMCOUNTER14H 0xb8e -#define CSR_MHPMCOUNTER15H 0xb8f -#define CSR_MHPMCOUNTER16H 0xb90 -#define CSR_MHPMCOUNTER17H 0xb91 -#define CSR_MHPMCOUNTER18H 0xb92 -#define CSR_MHPMCOUNTER19H 0xb93 -#define CSR_MHPMCOUNTER20H 0xb94 -#define CSR_MHPMCOUNTER21H 0xb95 -#define CSR_MHPMCOUNTER22H 0xb96 -#define CSR_MHPMCOUNTER23H 0xb97 -#define CSR_MHPMCOUNTER24H 0xb98 -#define CSR_MHPMCOUNTER25H 0xb99 -#define CSR_MHPMCOUNTER26H 0xb9a -#define CSR_MHPMCOUNTER27H 0xb9b -#define CSR_MHPMCOUNTER28H 0xb9c -#define CSR_MHPMCOUNTER29H 0xb9d -#define CSR_MHPMCOUNTER30H 0xb9e -#define CSR_MHPMCOUNTER31H 0xb9f - -/* mstatus bits */ +/* Floating point round mode */ +#define FSR_RD_SHIFT 5 +#define FSR_RD (0x7 << FSR_RD_SHIFT) + +/* Floating point accrued exception flags */ +#define FPEXC_NX 0x01 +#define FPEXC_UF 0x02 +#define FPEXC_OF 0x04 +#define FPEXC_DZ 0x08 +#define FPEXC_NV 0x10 + +/* Floating point status register bits */ +#define FSR_AEXC_SHIFT 0 +#define FSR_NVA (FPEXC_NV << FSR_AEXC_SHIFT) +#define FSR_OFA (FPEXC_OF << FSR_AEXC_SHIFT) +#define FSR_UFA (FPEXC_UF << FSR_AEXC_SHIFT) +#define FSR_DZA (FPEXC_DZ << FSR_AEXC_SHIFT) +#define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT) +#define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA) + +/* Control and Status Registers */ + +/* User Trap Setup */ +#define CSR_USTATUS 0x000 +#define CSR_UIE 0x004 +#define CSR_UTVEC 0x005 + +/* User Trap Handling */ +#define CSR_USCRATCH 0x040 +#define CSR_UEPC 0x041 +#define CSR_UCAUSE 0x042 +#define CSR_UTVAL 0x043 +#define CSR_UIP 0x044 + +/* User Floating-Point CSRs */ +#define CSR_FFLAGS 0x001 +#define CSR_FRM 0x002 +#define CSR_FCSR 0x003 + +/* User Timers and Counters */ +#define CSR_CYCLE 0xc00 +#define CSR_TIME 0xc01 +#define CSR_INSTRET 0xc02 +#define CSR_HPMCOUNTER3 0xc03 +#define CSR_HPMCOUNTER4 0xc04 +#define CSR_HPMCOUNTER5 0xc05 +#define CSR_HPMCOUNTER6 0xc06 +#define CSR_HPMCOUNTER7 0xc07 +#define CSR_HPMCOUNTER8 0xc08 +#define CSR_HPMCOUNTER9 0xc09 +#define CSR_HPMCOUNTER10 0xc0a +#define CSR_HPMCOUNTER11 0xc0b +#define CSR_HPMCOUNTER12 0xc0c +#define CSR_HPMCOUNTER13 0xc0d +#define CSR_HPMCOUNTER14 0xc0e +#define CSR_HPMCOUNTER15 0xc0f +#define CSR_HPMCOUNTER16 0xc10 +#define CSR_HPMCOUNTER17 0xc11 +#define CSR_HPMCOUNTER18 0xc12 +#define CSR_HPMCOUNTER19 0xc13 +#define CSR_HPMCOUNTER20 0xc14 +#define CSR_HPMCOUNTER21 0xc15 +#define CSR_HPMCOUNTER22 0xc16 +#define CSR_HPMCOUNTER23 0xc17 +#define CSR_HPMCOUNTER24 0xc18 +#define CSR_HPMCOUNTER25 0xc19 +#define CSR_HPMCOUNTER26 0xc1a +#define CSR_HPMCOUNTER27 0xc1b +#define CSR_HPMCOUNTER28 0xc1c +#define CSR_HPMCOUNTER29 0xc1d +#define CSR_HPMCOUNTER30 0xc1e +#define CSR_HPMCOUNTER31 0xc1f +#define CSR_CYCLEH 0xc80 +#define CSR_TIMEH 0xc81 +#define CSR_INSTRETH 0xc82 +#define CSR_HPMCOUNTER3H 0xc83 +#define CSR_HPMCOUNTER4H 0xc84 +#define CSR_HPMCOUNTER5H 0xc85 +#define CSR_HPMCOUNTER6H 0xc86 +#define CSR_HPMCOUNTER7H 0xc87 +#define CSR_HPMCOUNTER8H 0xc88 +#define CSR_HPMCOUNTER9H 0xc89 +#define CSR_HPMCOUNTER10H 0xc8a +#define CSR_HPMCOUNTER11H 0xc8b +#define CSR_HPMCOUNTER12H 0xc8c +#define CSR_HPMCOUNTER13H 0xc8d +#define CSR_HPMCOUNTER14H 0xc8e +#define CSR_HPMCOUNTER15H 0xc8f +#define CSR_HPMCOUNTER16H 0xc90 +#define CSR_HPMCOUNTER17H 0xc91 +#define CSR_HPMCOUNTER18H 0xc92 +#define CSR_HPMCOUNTER19H 0xc93 +#define CSR_HPMCOUNTER20H 0xc94 +#define CSR_HPMCOUNTER21H 0xc95 +#define CSR_HPMCOUNTER22H 0xc96 +#define CSR_HPMCOUNTER23H 0xc97 +#define CSR_HPMCOUNTER24H 0xc98 +#define CSR_HPMCOUNTER25H 0xc99 +#define CSR_HPMCOUNTER26H 0xc9a +#define CSR_HPMCOUNTER27H 0xc9b +#define CSR_HPMCOUNTER28H 0xc9c +#define CSR_HPMCOUNTER29H 0xc9d +#define CSR_HPMCOUNTER30H 0xc9e +#define CSR_HPMCOUNTER31H 0xc9f + +/* Machine Timers and Counters */ +#define CSR_MCYCLE 0xb00 +#define CSR_MINSTRET 0xb02 +#define CSR_MCYCLEH 0xb80 +#define CSR_MINSTRETH 0xb82 + +/* Machine Information Registers */ +#define CSR_MVENDORID 0xf11 +#define CSR_MARCHID 0xf12 +#define CSR_MIMPID 0xf13 +#define CSR_MHARTID 0xf14 + +/* Machine Trap Setup */ +#define CSR_MSTATUS 0x300 +#define CSR_MISA 0x301 +#define CSR_MEDELEG 0x302 +#define CSR_MIDELEG 0x303 +#define CSR_MIE 0x304 +#define CSR_MTVEC 0x305 +#define CSR_MCOUNTEREN 0x306 + +/* Legacy Counter Setup (priv v1.9.1) */ +#define CSR_MUCOUNTEREN 0x320 +#define CSR_MSCOUNTEREN 0x321 + +/* Machine Trap Handling */ +#define CSR_MSCRATCH 0x340 +#define CSR_MEPC 0x341 +#define CSR_MCAUSE 0x342 +#define CSR_MBADADDR 0x343 +#define CSR_MIP 0x344 + +/* Supervisor Trap Setup */ +#define CSR_SSTATUS 0x100 +#define CSR_SIE 0x104 +#define CSR_STVEC 0x105 +#define CSR_SCOUNTEREN 0x106 + +/* Supervisor Trap Handling */ +#define CSR_SSCRATCH 0x140 +#define CSR_SEPC 0x141 +#define CSR_SCAUSE 0x142 +#define CSR_SBADADDR 0x143 +#define CSR_SIP 0x144 + +/* Supervisor Protection and Translation */ +#define CSR_SPTBR 0x180 +#define CSR_SATP 0x180 + +/* Physical Memory Protection */ +#define CSR_PMPCFG0 0x3a0 +#define CSR_PMPCFG1 0x3a1 +#define CSR_PMPCFG2 0x3a2 +#define CSR_PMPCFG3 0x3a3 +#define CSR_PMPADDR0 0x3b0 +#define CSR_PMPADDR1 0x3b1 +#define CSR_PMPADDR2 0x3b2 +#define CSR_PMPADDR3 0x3b3 +#define CSR_PMPADDR4 0x3b4 +#define CSR_PMPADDR5 0x3b5 +#define CSR_PMPADDR6 0x3b6 +#define CSR_PMPADDR7 0x3b7 +#define CSR_PMPADDR8 0x3b8 +#define CSR_PMPADDR9 0x3b9 +#define CSR_PMPADDR10 0x3ba +#define CSR_PMPADDR11 0x3bb +#define CSR_PMPADDR12 0x3bc +#define CSR_PMPADDR13 0x3bd +#define CSR_PMPADDR14 0x3be +#define CSR_PMPADDR15 0x3bf + +/* Debug/Trace Registers (shared with Debug Mode) */ +#define CSR_TSELECT 0x7a0 +#define CSR_TDATA1 0x7a1 +#define CSR_TDATA2 0x7a2 +#define CSR_TDATA3 0x7a3 + +/* Debug Mode Registers */ +#define CSR_DCSR 0x7b0 +#define CSR_DPC 0x7b1 +#define CSR_DSCRATCH 0x7b2 + +/* Performance Counters */ +#define CSR_MHPMCOUNTER3 0xb03 +#define CSR_MHPMCOUNTER4 0xb04 +#define CSR_MHPMCOUNTER5 0xb05 +#define CSR_MHPMCOUNTER6 0xb06 +#define CSR_MHPMCOUNTER7 0xb07 +#define CSR_MHPMCOUNTER8 0xb08 +#define CSR_MHPMCOUNTER9 0xb09 +#define CSR_MHPMCOUNTER10 0xb0a +#define CSR_MHPMCOUNTER11 0xb0b +#define CSR_MHPMCOUNTER12 0xb0c +#define CSR_MHPMCOUNTER13 0xb0d +#define CSR_MHPMCOUNTER14 0xb0e +#define CSR_MHPMCOUNTER15 0xb0f +#define CSR_MHPMCOUNTER16 0xb10 +#define CSR_MHPMCOUNTER17 0xb11 +#define CSR_MHPMCOUNTER18 0xb12 +#define CSR_MHPMCOUNTER19 0xb13 +#define CSR_MHPMCOUNTER20 0xb14 +#define CSR_MHPMCOUNTER21 0xb15 +#define CSR_MHPMCOUNTER22 0xb16 +#define CSR_MHPMCOUNTER23 0xb17 +#define CSR_MHPMCOUNTER24 0xb18 +#define CSR_MHPMCOUNTER25 0xb19 +#define CSR_MHPMCOUNTER26 0xb1a +#define CSR_MHPMCOUNTER27 0xb1b +#define CSR_MHPMCOUNTER28 0xb1c +#define CSR_MHPMCOUNTER29 0xb1d +#define CSR_MHPMCOUNTER30 0xb1e +#define CSR_MHPMCOUNTER31 0xb1f +#define CSR_MHPMEVENT3 0x323 +#define CSR_MHPMEVENT4 0x324 +#define CSR_MHPMEVENT5 0x325 +#define CSR_MHPMEVENT6 0x326 +#define CSR_MHPMEVENT7 0x327 +#define CSR_MHPMEVENT8 0x328 +#define CSR_MHPMEVENT9 0x329 +#define CSR_MHPMEVENT10 0x32a +#define CSR_MHPMEVENT11 0x32b +#define CSR_MHPMEVENT12 0x32c +#define CSR_MHPMEVENT13 0x32d +#define CSR_MHPMEVENT14 0x32e +#define CSR_MHPMEVENT15 0x32f +#define CSR_MHPMEVENT16 0x330 +#define CSR_MHPMEVENT17 0x331 +#define CSR_MHPMEVENT18 0x332 +#define CSR_MHPMEVENT19 0x333 +#define CSR_MHPMEVENT20 0x334 +#define CSR_MHPMEVENT21 0x335 +#define CSR_MHPMEVENT22 0x336 +#define CSR_MHPMEVENT23 0x337 +#define CSR_MHPMEVENT24 0x338 +#define CSR_MHPMEVENT25 0x339 +#define CSR_MHPMEVENT26 0x33a +#define CSR_MHPMEVENT27 0x33b +#define CSR_MHPMEVENT28 0x33c +#define CSR_MHPMEVENT29 0x33d +#define CSR_MHPMEVENT30 0x33e +#define CSR_MHPMEVENT31 0x33f +#define CSR_MHPMCOUNTER3H 0xb83 +#define CSR_MHPMCOUNTER4H 0xb84 +#define CSR_MHPMCOUNTER5H 0xb85 +#define CSR_MHPMCOUNTER6H 0xb86 +#define CSR_MHPMCOUNTER7H 0xb87 +#define CSR_MHPMCOUNTER8H 0xb88 +#define CSR_MHPMCOUNTER9H 0xb89 +#define CSR_MHPMCOUNTER10H 0xb8a +#define CSR_MHPMCOUNTER11H 0xb8b +#define CSR_MHPMCOUNTER12H 0xb8c +#define CSR_MHPMCOUNTER13H 0xb8d +#define CSR_MHPMCOUNTER14H 0xb8e +#define CSR_MHPMCOUNTER15H 0xb8f +#define CSR_MHPMCOUNTER16H 0xb90 +#define CSR_MHPMCOUNTER17H 0xb91 +#define CSR_MHPMCOUNTER18H 0xb92 +#define CSR_MHPMCOUNTER19H 0xb93 +#define CSR_MHPMCOUNTER20H 0xb94 +#define CSR_MHPMCOUNTER21H 0xb95 +#define CSR_MHPMCOUNTER22H 0xb96 +#define CSR_MHPMCOUNTER23H 0xb97 +#define CSR_MHPMCOUNTER24H 0xb98 +#define CSR_MHPMCOUNTER25H 0xb99 +#define CSR_MHPMCOUNTER26H 0xb9a +#define CSR_MHPMCOUNTER27H 0xb9b +#define CSR_MHPMCOUNTER28H 0xb9c +#define CSR_MHPMCOUNTER29H 0xb9d +#define CSR_MHPMCOUNTER30H 0xb9e +#define CSR_MHPMCOUNTER31H 0xb9f + +/* mstatus CSR bits */ #define MSTATUS_UIE 0x00000001 #define MSTATUS_SIE 0x00000002 #define MSTATUS_HIE 0x00000004 @@ -276,7 +317,7 @@ #define MSTATUS_SD MSTATUS64_SD #endif -/* sstatus bits */ +/* sstatus CSR bits */ #define SSTATUS_UIE 0x00000001 #define SSTATUS_SIE 0x00000002 #define SSTATUS_UPIE 0x00000010 @@ -297,83 +338,71 @@ #define SSTATUS_SD SSTATUS64_SD #endif -/* irqs */ -#define MIP_SSIP (1 << IRQ_S_SOFT) -#define MIP_HSIP (1 << IRQ_H_SOFT) -#define MIP_MSIP (1 << IRQ_M_SOFT) -#define MIP_STIP (1 << IRQ_S_TIMER) -#define MIP_HTIP (1 << IRQ_H_TIMER) -#define MIP_MTIP (1 << IRQ_M_TIMER) -#define MIP_SEIP (1 << IRQ_S_EXT) -#define MIP_HEIP (1 << IRQ_H_EXT) -#define MIP_MEIP (1 << IRQ_M_EXT) - -#define SIP_SSIP MIP_SSIP -#define SIP_STIP MIP_STIP -#define SIP_SEIP MIP_SEIP - +/* Privilege modes */ #define PRV_U 0 #define PRV_S 1 #define PRV_H 2 #define PRV_M 3 -/* privileged ISA 1.9.1 VM modes (mstatus.vm) */ -#define VM_1_09_MBARE 0 -#define VM_1_09_MBB 1 -#define VM_1_09_MBBID 2 -#define VM_1_09_SV32 8 -#define VM_1_09_SV39 9 -#define VM_1_09_SV48 10 - -/* privileged ISA 1.10.0 VM modes (satp.mode) */ -#define VM_1_10_MBARE 0 -#define VM_1_10_SV32 1 -#define VM_1_10_SV39 8 -#define VM_1_10_SV48 9 -#define VM_1_10_SV57 10 -#define VM_1_10_SV64 11 - -/* privileged ISA interrupt causes */ -#define IRQ_U_SOFT 0 /* since: priv-1.10 */ -#define IRQ_S_SOFT 1 -#define IRQ_H_SOFT 2 /* until: priv-1.9.1 */ -#define IRQ_M_SOFT 3 /* until: priv-1.9.1 */ -#define IRQ_U_TIMER 4 /* since: priv-1.10 */ -#define IRQ_S_TIMER 5 -#define IRQ_H_TIMER 6 /* until: priv-1.9.1 */ -#define IRQ_M_TIMER 7 /* until: priv-1.9.1 */ -#define IRQ_U_EXT 8 /* since: priv-1.10 */ -#define IRQ_S_EXT 9 -#define IRQ_H_EXT 10 /* until: priv-1.9.1 */ -#define IRQ_M_EXT 11 /* until: priv-1.9.1 */ -#define IRQ_X_COP 12 /* non-standard */ - -/* Default addresses */ -#define DEFAULT_RSTVEC 0x00001000 - -/* RV32 satp field masks */ -#define SATP32_MODE 0x80000000 -#define SATP32_ASID 0x7fc00000 -#define SATP32_PPN 0x003fffff - -/* RV64 satp field masks */ -#define SATP64_MODE 0xF000000000000000ULL -#define SATP64_ASID 0x0FFFF00000000000ULL -#define SATP64_PPN 0x00000FFFFFFFFFFFULL +/* RV32 satp CSR field masks */ +#define SATP32_MODE 0x80000000 +#define SATP32_ASID 0x7fc00000 +#define SATP32_PPN 0x003fffff + +/* RV64 satp CSR field masks */ +#define SATP64_MODE 0xF000000000000000ULL +#define SATP64_ASID 0x0FFFF00000000000ULL +#define SATP64_PPN 0x00000FFFFFFFFFFFULL #if defined(TARGET_RISCV32) -#define SATP_MODE SATP32_MODE -#define SATP_ASID SATP32_ASID -#define SATP_PPN SATP32_PPN +#define SATP_MODE SATP32_MODE +#define SATP_ASID SATP32_ASID +#define SATP_PPN SATP32_PPN #endif #if defined(TARGET_RISCV64) -#define SATP_MODE SATP64_MODE -#define SATP_ASID SATP64_ASID -#define SATP_PPN SATP64_PPN +#define SATP_MODE SATP64_MODE +#define SATP_ASID SATP64_ASID +#define SATP_PPN SATP64_PPN #endif -/* RISCV Exception Codes */ -#define EXCP_NONE -1 /* not a real RISCV exception code */ +/* VM modes (mstatus.vm) privileged ISA 1.9.1 */ +#define VM_1_09_MBARE 0 +#define VM_1_09_MBB 1 +#define VM_1_09_MBBID 2 +#define VM_1_09_SV32 8 +#define VM_1_09_SV39 9 +#define VM_1_09_SV48 10 + +/* VM modes (satp.mode) privileged ISA 1.10 */ +#define VM_1_10_MBARE 0 +#define VM_1_10_SV32 1 +#define VM_1_10_SV39 8 +#define VM_1_10_SV48 9 +#define VM_1_10_SV57 10 +#define VM_1_10_SV64 11 + +/* Page table entry (PTE) fields */ +#define PTE_V 0x001 /* Valid */ +#define PTE_R 0x002 /* Read */ +#define PTE_W 0x004 /* Write */ +#define PTE_X 0x008 /* Execute */ +#define PTE_U 0x010 /* User */ +#define PTE_G 0x020 /* Global */ +#define PTE_A 0x040 /* Accessed */ +#define PTE_D 0x080 /* Dirty */ +#define PTE_SOFT 0x300 /* Reserved for Software */ + +/* Page table PPN shift amount */ +#define PTE_PPN_SHIFT 10 + +/* Leaf page shift amount */ +#define PGSHIFT 12 + +/* Default Reset Vector adress */ +#define DEFAULT_RSTVEC 0x1000 + +/* Exception causes */ +#define EXCP_NONE -1 /* sentinel value */ #define RISCV_EXCP_INST_ADDR_MIS 0x0 #define RISCV_EXCP_INST_ACCESS_FAULT 0x1 #define RISCV_EXCP_ILLEGAL_INST 0x2 @@ -382,9 +411,7 @@ #define RISCV_EXCP_LOAD_ACCESS_FAULT 0x5 #define RISCV_EXCP_STORE_AMO_ADDR_MIS 0x6 #define RISCV_EXCP_STORE_AMO_ACCESS_FAULT 0x7 -#define RISCV_EXCP_U_ECALL 0x8 /* for convenience, report all - ECALLs as this, handler - fixes */ +#define RISCV_EXCP_U_ECALL 0x8 #define RISCV_EXCP_S_ECALL 0x9 #define RISCV_EXCP_H_ECALL 0xa #define RISCV_EXCP_M_ECALL 0xb @@ -395,15 +422,35 @@ #define RISCV_EXCP_INT_FLAG 0x80000000 #define RISCV_EXCP_INT_MASK 0x7fffffff -/* page table entry (PTE) fields */ -#define PTE_V 0x001 /* Valid */ -#define PTE_R 0x002 /* Read */ -#define PTE_W 0x004 /* Write */ -#define PTE_X 0x008 /* Execute */ -#define PTE_U 0x010 /* User */ -#define PTE_G 0x020 /* Global */ -#define PTE_A 0x040 /* Accessed */ -#define PTE_D 0x080 /* Dirty */ -#define PTE_SOFT 0x300 /* Reserved for Software */ - -#define PTE_PPN_SHIFT 10 +/* Interrupt causes */ +#define IRQ_U_SOFT 0 +#define IRQ_S_SOFT 1 +#define IRQ_H_SOFT 2 /* reserved */ +#define IRQ_M_SOFT 3 +#define IRQ_U_TIMER 4 +#define IRQ_S_TIMER 5 +#define IRQ_H_TIMER 6 /* reserved */ +#define IRQ_M_TIMER 7 +#define IRQ_U_EXT 8 +#define IRQ_S_EXT 9 +#define IRQ_H_EXT 10 /* reserved */ +#define IRQ_M_EXT 11 + +/* mip masks */ +#define MIP_USIP (1 << IRQ_U_SOFT) +#define MIP_SSIP (1 << IRQ_S_SOFT) +#define MIP_HSIP (1 << IRQ_H_SOFT) +#define MIP_MSIP (1 << IRQ_M_SOFT) +#define MIP_UTIP (1 << IRQ_U_TIMER) +#define MIP_STIP (1 << IRQ_S_TIMER) +#define MIP_HTIP (1 << IRQ_H_TIMER) +#define MIP_MTIP (1 << IRQ_M_TIMER) +#define MIP_UEIP (1 << IRQ_U_EXT) +#define MIP_SEIP (1 << IRQ_S_EXT) +#define MIP_HEIP (1 << IRQ_H_EXT) +#define MIP_MEIP (1 << IRQ_M_EXT) + +/* sip masks */ +#define SIP_SSIP MIP_SSIP +#define SIP_STIP MIP_STIP +#define SIP_SEIP MIP_SEIP diff --git a/target/riscv/helper.c b/target/riscv/cpu_helper.c index 63b3386b76..86f9f4730c 100644 --- a/target/riscv/helper.c +++ b/target/riscv/cpu_helper.c @@ -1,5 +1,5 @@ /* - * RISC-V emulation helpers for qemu. + * RISC-V CPU helpers for qemu. * * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu * Copyright (c) 2017-2018 SiFive, Inc. @@ -72,6 +72,39 @@ bool riscv_cpu_exec_interrupt(CPUState *cs, int interrupt_request) #if !defined(CONFIG_USER_ONLY) +/* iothread_mutex must be held */ +uint32_t riscv_cpu_update_mip(RISCVCPU *cpu, uint32_t mask, uint32_t value) +{ + CPURISCVState *env = &cpu->env; + uint32_t old, new, cmp = atomic_read(&env->mip); + + do { + old = cmp; + new = (old & ~mask) | (value & mask); + cmp = atomic_cmpxchg(&env->mip, old, new); + } while (old != cmp); + + if (new && !old) { + cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HARD); + } else if (!new && old) { + cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_HARD); + } + + return old; +} + +void riscv_set_mode(CPURISCVState *env, target_ulong newpriv) +{ + if (newpriv > PRV_M) { + g_assert_not_reached(); + } + if (newpriv == PRV_H) { + newpriv = PRV_U; + } + /* tlb_flush is unnecessary as mode is contained in mmu_idx */ + env->priv = newpriv; +} + /* get_physical_address - get the physical address for this virtual address * * Do a page table walk to obtain the physical address corresponding to a diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c index aec7558e1b..3726299d4a 100644 --- a/target/riscv/op_helper.c +++ b/target/riscv/op_helper.c @@ -90,7 +90,7 @@ void csr_write_helper(CPURISCVState *env, target_ulong val_to_write, target_ulong csrno) { #ifndef CONFIG_USER_ONLY - uint64_t delegable_ints = MIP_SSIP | MIP_STIP | MIP_SEIP | (1 << IRQ_X_COP); + uint64_t delegable_ints = MIP_SSIP | MIP_STIP | MIP_SEIP; uint64_t all_ints = delegable_ints | MIP_MSIP | MIP_MTIP; #endif @@ -171,10 +171,8 @@ void csr_write_helper(CPURISCVState *env, target_ulong val_to_write, */ qemu_mutex_lock_iothread(); RISCVCPU *cpu = riscv_env_get_cpu(env); - riscv_set_local_interrupt(cpu, MIP_SSIP, - (val_to_write & MIP_SSIP) != 0); - riscv_set_local_interrupt(cpu, MIP_STIP, - (val_to_write & MIP_STIP) != 0); + riscv_cpu_update_mip(cpu, MIP_SSIP | MIP_STIP, + (val_to_write & (MIP_SSIP | MIP_STIP))); /* * csrs, csrc on mip.SEIP is not decomposable into separate read and * write steps, so a different implementation is needed @@ -656,31 +654,6 @@ target_ulong helper_csrrc(CPURISCVState *env, target_ulong src, #ifndef CONFIG_USER_ONLY -/* iothread_mutex must be held */ -void riscv_set_local_interrupt(RISCVCPU *cpu, target_ulong mask, int value) -{ - target_ulong old_mip = cpu->env.mip; - cpu->env.mip = (old_mip & ~mask) | (value ? mask : 0); - - if (cpu->env.mip && !old_mip) { - cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HARD); - } else if (!cpu->env.mip && old_mip) { - cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_HARD); - } -} - -void riscv_set_mode(CPURISCVState *env, target_ulong newpriv) -{ - if (newpriv > PRV_M) { - g_assert_not_reached(); - } - if (newpriv == PRV_H) { - newpriv = PRV_U; - } - /* tlb_flush is unnecessary as mode is contained in mmu_idx */ - env->priv = newpriv; -} - target_ulong helper_sret(CPURISCVState *env, target_ulong cpu_pc_deb) { if (!(env->priv >= PRV_S)) { @@ -731,7 +704,6 @@ target_ulong helper_mret(CPURISCVState *env, target_ulong cpu_pc_deb) return retpc; } - void helper_wfi(CPURISCVState *env) { CPUState *cs = CPU(riscv_env_get_cpu(env)); diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c index 8ed4823d6e..18ba7f85a5 100644 --- a/target/s390x/cpu.c +++ b/target/s390x/cpu.c @@ -145,6 +145,11 @@ static void s390_cpu_full_reset(CPUState *s) env->cregs[0] = CR0_RESET; env->cregs[14] = CR14_RESET; +#if defined(CONFIG_USER_ONLY) + /* user mode should always be allowed to use the full FPU */ + env->cregs[0] |= CR0_AFP; +#endif + /* architectured initial value for Breaking-Event-Address register */ env->gbea = 1; diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h index 6f8861e554..8c2320e882 100644 --- a/target/s390x/cpu.h +++ b/target/s390x/cpu.h @@ -255,6 +255,7 @@ extern const struct VMStateDescription vmstate_s390_cpu; /* PSW defines */ #undef PSW_MASK_PER +#undef PSW_MASK_UNUSED_2 #undef PSW_MASK_DAT #undef PSW_MASK_IO #undef PSW_MASK_EXT @@ -273,6 +274,7 @@ extern const struct VMStateDescription vmstate_s390_cpu; #undef PSW_MASK_ESA_ADDR #define PSW_MASK_PER 0x4000000000000000ULL +#define PSW_MASK_UNUSED_2 0x2000000000000000ULL #define PSW_MASK_DAT 0x0400000000000000ULL #define PSW_MASK_IO 0x0200000000000000ULL #define PSW_MASK_EXT 0x0100000000000000ULL @@ -318,10 +320,14 @@ extern const struct VMStateDescription vmstate_s390_cpu; #define FLAG_MASK_PSW (FLAG_MASK_PER | FLAG_MASK_DAT | FLAG_MASK_PSTATE \ | FLAG_MASK_ASC | FLAG_MASK_64 | FLAG_MASK_32) +/* we'll use some unused PSW positions to store CR flags in tb flags */ +#define FLAG_MASK_AFP (PSW_MASK_UNUSED_2 >> FLAG_MASK_PSW_SHIFT) + /* Control register 0 bits */ #define CR0_LOWPROT 0x0000000010000000ULL #define CR0_SECONDARY 0x0000000004000000ULL #define CR0_EDAT 0x0000000000800000ULL +#define CR0_AFP 0x0000000000040000ULL #define CR0_EMERGENCY_SIGNAL_SC 0x0000000000004000ULL #define CR0_EXTERNAL_CALL_SC 0x0000000000002000ULL #define CR0_CKC_SC 0x0000000000000800ULL @@ -363,6 +369,9 @@ static inline void cpu_get_tb_cpu_state(CPUS390XState* env, target_ulong *pc, *pc = env->psw.addr; *cs_base = env->ex_value; *flags = (env->psw.mask >> FLAG_MASK_PSW_SHIFT) & FLAG_MASK_PSW; + if (env->cregs[0] & CR0_AFP) { + *flags |= FLAG_MASK_AFP; + } } /* PER bits from control register 9 */ diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c index 172fb18df7..60cfeba48f 100644 --- a/target/s390x/cpu_features.c +++ b/target/s390x/cpu_features.c @@ -39,8 +39,10 @@ static const S390FeatDef s390_features[] = { FEAT_INIT("srs", S390_FEAT_TYPE_STFL, 9, "Sense-running-status facility"), FEAT_INIT("csske", S390_FEAT_TYPE_STFL, 10, "Conditional-SSKE facility"), FEAT_INIT("ctop", S390_FEAT_TYPE_STFL, 11, "Configuration-topology facility"), + FEAT_INIT("apqci", S390_FEAT_TYPE_STFL, 12, "Query AP Configuration Information facility"), FEAT_INIT("ipter", S390_FEAT_TYPE_STFL, 13, "IPTE-range facility"), FEAT_INIT("nonqks", S390_FEAT_TYPE_STFL, 14, "Nonquiescing key-setting facility"), + FEAT_INIT("apft", S390_FEAT_TYPE_STFL, 15, "AP Facilities Test facility"), FEAT_INIT("etf2", S390_FEAT_TYPE_STFL, 16, "Extended-translation facility 2"), FEAT_INIT("msa-base", S390_FEAT_TYPE_STFL, 17, "Message-security-assist facility (excluding subfunctions)"), FEAT_INIT("ldisp", S390_FEAT_TYPE_STFL, 18, "Long-displacement facility"), @@ -129,6 +131,7 @@ static const S390FeatDef s390_features[] = { FEAT_INIT_MISC("dateh2", "DAT-enhancement facility 2"), FEAT_INIT_MISC("cmm", "Collaborative-memory-management facility"), + FEAT_INIT_MISC("ap", "AP instructions installed"), FEAT_INIT("plo-cl", S390_FEAT_TYPE_PLO, 0, "PLO Compare and load (32 bit in general registers)"), FEAT_INIT("plo-clg", S390_FEAT_TYPE_PLO, 1, "PLO Compare and load (64 bit in parameter list)"), diff --git a/target/s390x/cpu_features_def.h b/target/s390x/cpu_features_def.h index ac2c947f30..5fc7e7bf01 100644 --- a/target/s390x/cpu_features_def.h +++ b/target/s390x/cpu_features_def.h @@ -27,8 +27,10 @@ typedef enum { S390_FEAT_SENSE_RUNNING_STATUS, S390_FEAT_CONDITIONAL_SSKE, S390_FEAT_CONFIGURATION_TOPOLOGY, + S390_FEAT_AP_QUERY_CONFIG_INFO, S390_FEAT_IPTE_RANGE, S390_FEAT_NONQ_KEY_SETTING, + S390_FEAT_AP_FACILITIES_TEST, S390_FEAT_EXTENDED_TRANSLATION_2, S390_FEAT_MSA, S390_FEAT_LONG_DISPLACEMENT, @@ -119,6 +121,7 @@ typedef enum { /* Misc */ S390_FEAT_DAT_ENH_2, S390_FEAT_CMM, + S390_FEAT_AP, /* PLO */ S390_FEAT_PLO_CL, diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c index 265d25c937..7c253ff308 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -786,6 +786,8 @@ static void check_consistency(const S390CPUModel *model) { S390_FEAT_PRNO_TRNG_QRTCR, S390_FEAT_MSA_EXT_5 }, { S390_FEAT_PRNO_TRNG, S390_FEAT_MSA_EXT_5 }, { S390_FEAT_SIE_KSS, S390_FEAT_SIE_F2 }, + { S390_FEAT_AP_QUERY_CONFIG_INFO, S390_FEAT_AP }, + { S390_FEAT_AP_FACILITIES_TEST, S390_FEAT_AP }, }; int i; diff --git a/target/s390x/excp_helper.c b/target/s390x/excp_helper.c index f0ce60cff2..2a33222f7e 100644 --- a/target/s390x/excp_helper.c +++ b/target/s390x/excp_helper.c @@ -21,33 +21,52 @@ #include "qemu/osdep.h" #include "cpu.h" #include "internal.h" +#include "exec/helper-proto.h" #include "qemu/timer.h" #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "hw/s390x/ioinst.h" #include "exec/address-spaces.h" +#include "tcg_s390x.h" #ifndef CONFIG_USER_ONLY #include "sysemu/sysemu.h" #include "hw/s390x/s390_flic.h" #endif -/* #define DEBUG_S390 */ -/* #define DEBUG_S390_STDOUT */ - -#ifdef DEBUG_S390 -#ifdef DEBUG_S390_STDOUT -#define DPRINTF(fmt, ...) \ - do { fprintf(stderr, fmt, ## __VA_ARGS__); \ - if (qemu_log_separate()) { qemu_log(fmt, ##__VA_ARGS__); } } while (0) -#else -#define DPRINTF(fmt, ...) \ - do { qemu_log(fmt, ## __VA_ARGS__); } while (0) -#endif -#else -#define DPRINTF(fmt, ...) \ - do { } while (0) +void QEMU_NORETURN tcg_s390_program_interrupt(CPUS390XState *env, uint32_t code, + int ilen, uintptr_t ra) +{ + CPUState *cs = CPU(s390_env_get_cpu(env)); + + cpu_restore_state(cs, ra, true); + qemu_log_mask(CPU_LOG_INT, "program interrupt at %#" PRIx64 "\n", + env->psw.addr); + trigger_pgm_exception(env, code, ilen); + cpu_loop_exit(cs); +} + +void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc, + uintptr_t ra) +{ + g_assert(dxc <= 0xff); +#if !defined(CONFIG_USER_ONLY) + /* Store the DXC into the lowcore */ + stl_phys(CPU(s390_env_get_cpu(env))->as, + env->psa + offsetof(LowCore, data_exc_code), dxc); #endif + /* Store the DXC into the FPC if AFP is enabled */ + if (env->cregs[0] & CR0_AFP) { + env->fpc = deposit32(env->fpc, 8, 8, dxc); + } + tcg_s390_program_interrupt(env, PGM_DATA, ILEN_AUTO, ra); +} + +void HELPER(data_exception)(CPUS390XState *env, uint32_t dxc) +{ + tcg_s390_data_exception(env, dxc, GETPC()); +} + #if defined(CONFIG_USER_ONLY) void s390_cpu_do_interrupt(CPUState *cs) @@ -92,8 +111,8 @@ int s390_cpu_handle_mmu_fault(CPUState *cs, vaddr orig_vaddr, int size, uint64_t asc; int prot; - DPRINTF("%s: address 0x%" VADDR_PRIx " rw %d mmu_idx %d\n", - __func__, orig_vaddr, rw, mmu_idx); + qemu_log_mask(CPU_LOG_MMU, "%s: addr 0x%" VADDR_PRIx " rw %d mmu_idx %d\n", + __func__, orig_vaddr, rw, mmu_idx); vaddr = orig_vaddr; @@ -122,8 +141,9 @@ int s390_cpu_handle_mmu_fault(CPUState *cs, vaddr orig_vaddr, int size, if (!address_space_access_valid(&address_space_memory, raddr, TARGET_PAGE_SIZE, rw, MEMTXATTRS_UNSPECIFIED)) { - DPRINTF("%s: raddr %" PRIx64 " > ram_size %" PRIx64 "\n", __func__, - (uint64_t)raddr, (uint64_t)ram_size); + qemu_log_mask(CPU_LOG_MMU, + "%s: raddr %" PRIx64 " > ram_size %" PRIx64 "\n", + __func__, (uint64_t)raddr, (uint64_t)ram_size); trigger_pgm_exception(env, PGM_ADDRESSING, ILEN_AUTO); return 1; } @@ -181,8 +201,10 @@ static void do_program_interrupt(CPUS390XState *env) break; } - qemu_log_mask(CPU_LOG_INT, "%s: code=0x%x ilen=%d\n", - __func__, env->int_pgm_code, ilen); + qemu_log_mask(CPU_LOG_INT, + "%s: code=0x%x ilen=%d psw: %" PRIx64 " %" PRIx64 "\n", + __func__, env->int_pgm_code, ilen, env->psw.mask, + env->psw.addr); lowcore = cpu_map_lowcore(env); @@ -204,10 +226,6 @@ static void do_program_interrupt(CPUS390XState *env) cpu_unmap_lowcore(lowcore); - DPRINTF("%s: %x %x %" PRIx64 " %" PRIx64 "\n", __func__, - env->int_pgm_code, ilen, env->psw.mask, - env->psw.addr); - load_psw(env, mask, addr); } @@ -298,9 +316,6 @@ static void do_ext_interrupt(CPUS390XState *env) cpu_unmap_lowcore(lowcore); - DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__, - env->psw.mask, env->psw.addr); - load_psw(env, mask, addr); } @@ -329,8 +344,6 @@ static void do_io_interrupt(CPUS390XState *env) cpu_unmap_lowcore(lowcore); g_free(io); - DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__, env->psw.mask, - env->psw.addr); load_psw(env, mask, addr); } @@ -372,9 +385,6 @@ static void do_mchk_interrupt(CPUS390XState *env) cpu_unmap_lowcore(lowcore); - DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__, - env->psw.mask, env->psw.addr); - load_psw(env, mask, addr); } @@ -385,8 +395,8 @@ void s390_cpu_do_interrupt(CPUState *cs) CPUS390XState *env = &cpu->env; bool stopped = false; - qemu_log_mask(CPU_LOG_INT, "%s: %d at pc=%" PRIx64 "\n", - __func__, cs->exception_index, env->psw.addr); + qemu_log_mask(CPU_LOG_INT, "%s: %d at psw=%" PRIx64 ":%" PRIx64 "\n", + __func__, cs->exception_index, env->psw.mask, env->psw.addr); try_deliver: /* handle machine checks */ diff --git a/target/s390x/fpu_helper.c b/target/s390x/fpu_helper.c index 5c5b451b3b..1b662d2520 100644 --- a/target/s390x/fpu_helper.c +++ b/target/s390x/fpu_helper.c @@ -21,6 +21,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "internal.h" +#include "tcg_s390x.h" #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "exec/helper-proto.h" @@ -40,14 +41,6 @@ ? (mask / (from / to)) & to \ : (mask & from) * (to / from)) -static void ieee_exception(CPUS390XState *env, uint32_t dxc, uintptr_t retaddr) -{ - /* Install the DXC code. */ - env->fpc = (env->fpc & ~0xff00) | (dxc << 8); - /* Trap. */ - s390_program_interrupt(env, PGM_DATA, ILEN_AUTO, retaddr); -} - /* Should be called after any operation that may raise IEEE exceptions. */ static void handle_exceptions(CPUS390XState *env, uintptr_t retaddr) { @@ -75,7 +68,7 @@ static void handle_exceptions(CPUS390XState *env, uintptr_t retaddr) /* Send signals for enabled exceptions. */ s390_exc &= env->fpc >> 24; if (s390_exc) { - ieee_exception(env, s390_exc, retaddr); + tcg_s390_data_exception(env, s390_exc, retaddr); } } @@ -773,6 +766,6 @@ void HELPER(sfas)(CPUS390XState *env, uint64_t val) is also 1, a simulated-iee-exception trap occurs. */ s390_exc = (signalling >> 16) & (source >> 24); if (s390_exc) { - ieee_exception(env, s390_exc | 3, GETPC()); + tcg_s390_data_exception(env, s390_exc | 3, GETPC()); } } diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c index 384b61cd67..70015eaaf5 100644 --- a/target/s390x/gen-features.c +++ b/target/s390x/gen-features.c @@ -447,6 +447,9 @@ static uint16_t full_GEN12_GA1[] = { S390_FEAT_ADAPTER_INT_SUPPRESSION, S390_FEAT_EDAT_2, S390_FEAT_SIDE_EFFECT_ACCESS_ESOP2, + S390_FEAT_AP_QUERY_CONFIG_INFO, + S390_FEAT_AP_FACILITIES_TEST, + S390_FEAT_AP, }; static uint16_t full_GEN12_GA2[] = { diff --git a/target/s390x/helper.h b/target/s390x/helper.h index 97c60ca7bc..018e9dd414 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -1,4 +1,5 @@ DEF_HELPER_2(exception, noreturn, env, i32) +DEF_HELPER_2(data_exception, noreturn, env, i32) DEF_HELPER_FLAGS_4(nc, TCG_CALL_NO_WG, i32, env, i32, i64, i64) DEF_HELPER_FLAGS_4(oc, TCG_CALL_NO_WG, i32, env, i32, i64, i64) DEF_HELPER_FLAGS_4(xc, TCG_CALL_NO_WG, i32, env, i32, i64, i64) diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def index 9c7b434fca..54e39df831 100644 --- a/target/s390x/insn-data.def +++ b/target/s390x/insn-data.def @@ -3,6 +3,8 @@ * * C(OPC, NAME, FMT, FAC, I1, I2, P, W, OP, CC) * D(OPC, NAME, FMT, FAC, I1, I2, P, W, OP, CC, DATA) + * E(OPC, NAME, FMT, FAC, I1, I2, P, W, OP, CC, DATA, FLAGS) + * F(OPC, NAME, FMT, FAC, I1, I2, P, W, OP, CC, FLAGS) * * OPC = (op << 8) | op2 where op is the major, op2 the minor opcode * NAME = name of the opcode, used internally @@ -15,6 +17,7 @@ * OP = func op_xx does the bulk of the operation * CC = func cout_xx defines how cc should get set * DATA = immediate argument to op_xx function + * FLAGS = categorize the type of instruction (e.g. for advanced checks) * * The helpers get called in order: I1, I2, P, OP, W, CC */ @@ -29,11 +32,11 @@ C(0xb9e8, AGRK, RRF_a, DO, r2, r3, r1, 0, add, adds64) C(0xe308, AG, RXY_a, Z, r1, m2_64, r1, 0, add, adds64) C(0xe318, AGF, RXY_a, Z, r1, m2_32s, r1, 0, add, adds64) - C(0xb30a, AEBR, RRE, Z, e1, e2, new, e1, aeb, f32) - C(0xb31a, ADBR, RRE, Z, f1_o, f2_o, f1, 0, adb, f64) - C(0xb34a, AXBR, RRE, Z, 0, x2_o, x1, 0, axb, f128) - C(0xed0a, AEB, RXE, Z, e1, m2_32u, new, e1, aeb, f32) - C(0xed1a, ADB, RXE, Z, f1_o, m2_64, f1, 0, adb, f64) + F(0xb30a, AEBR, RRE, Z, e1, e2, new, e1, aeb, f32, IF_BFP) + F(0xb31a, ADBR, RRE, Z, f1_o, f2_o, f1, 0, adb, f64, IF_BFP) + F(0xb34a, AXBR, RRE, Z, 0, x2_o, x1, 0, axb, f128, IF_BFP) + F(0xed0a, AEB, RXE, Z, e1, m2_32u, new, e1, aeb, f32, IF_BFP) + F(0xed1a, ADB, RXE, Z, f1_o, m2_64, f1, 0, adb, f64, IF_BFP) /* ADD HIGH */ C(0xb9c8, AHHHR, RRF_a, HW, r2_sr32, r3_sr32, new, r1_32h, add, adds32) C(0xb9d8, AHHLR, RRF_a, HW, r2_sr32, r3, new, r1_32h, add, adds32) @@ -151,7 +154,7 @@ C(0xb241, CKSM, RRE, Z, r1_o, ra2, new, r1_32, cksm, 0) /* COPY SIGN */ - C(0xb372, CPSDR, RRF_b, FPSSH, f3_o, f2_o, f1, 0, cps, 0) + F(0xb372, CPSDR, RRF_b, FPSSH, f3_o, f2_o, f1, 0, cps, 0, IF_AFP1 | IF_AFP2 | IF_AFP3) /* COMPARE */ C(0x1900, CR, RR_a, Z, r1_o, r2_o, 0, 0, 0, cmps32) @@ -161,17 +164,17 @@ C(0xb930, CGFR, RRE, Z, r1_o, r2_32s, 0, 0, 0, cmps64) C(0xe320, CG, RXY_a, Z, r1_o, m2_64, 0, 0, 0, cmps64) C(0xe330, CGF, RXY_a, Z, r1_o, m2_32s, 0, 0, 0, cmps64) - C(0xb309, CEBR, RRE, Z, e1, e2, 0, 0, ceb, 0) - C(0xb319, CDBR, RRE, Z, f1_o, f2_o, 0, 0, cdb, 0) - C(0xb349, CXBR, RRE, Z, x1_o, x2_o, 0, 0, cxb, 0) - C(0xed09, CEB, RXE, Z, e1, m2_32u, 0, 0, ceb, 0) - C(0xed19, CDB, RXE, Z, f1_o, m2_64, 0, 0, cdb, 0) + F(0xb309, CEBR, RRE, Z, e1, e2, 0, 0, ceb, 0, IF_BFP) + F(0xb319, CDBR, RRE, Z, f1_o, f2_o, 0, 0, cdb, 0, IF_BFP) + F(0xb349, CXBR, RRE, Z, x1_o, x2_o, 0, 0, cxb, 0, IF_BFP) + F(0xed09, CEB, RXE, Z, e1, m2_32u, 0, 0, ceb, 0, IF_BFP) + F(0xed19, CDB, RXE, Z, f1_o, m2_64, 0, 0, cdb, 0, IF_BFP) /* COMPARE AND SIGNAL */ - C(0xb308, KEBR, RRE, Z, e1, e2, 0, 0, keb, 0) - C(0xb318, KDBR, RRE, Z, f1_o, f2_o, 0, 0, kdb, 0) - C(0xb348, KXBR, RRE, Z, x1_o, x2_o, 0, 0, kxb, 0) - C(0xed08, KEB, RXE, Z, e1, m2_32u, 0, 0, keb, 0) - C(0xed18, KDB, RXE, Z, f1_o, m2_64, 0, 0, kdb, 0) + F(0xb308, KEBR, RRE, Z, e1, e2, 0, 0, keb, 0, IF_BFP) + F(0xb318, KDBR, RRE, Z, f1_o, f2_o, 0, 0, kdb, 0, IF_BFP) + F(0xb348, KXBR, RRE, Z, x1_o, x2_o, 0, 0, kxb, 0, IF_BFP) + F(0xed08, KEB, RXE, Z, e1, m2_32u, 0, 0, keb, 0, IF_BFP) + F(0xed18, KDB, RXE, Z, f1_o, m2_64, 0, 0, kdb, 0, IF_BFP) /* COMPARE IMMEDIATE */ C(0xc20d, CFI, RIL_a, EI, r1, i2, 0, 0, 0, cmps32) C(0xc20c, CGFI, RIL_a, EI, r1, i2, 0, 0, 0, cmps64) @@ -288,33 +291,33 @@ C(0x4e00, CVD, RX_a, Z, r1_o, a2, 0, 0, cvd, 0) C(0xe326, CVDY, RXY_a, LD, r1_o, a2, 0, 0, cvd, 0) /* CONVERT TO FIXED */ - C(0xb398, CFEBR, RRF_e, Z, 0, e2, new, r1_32, cfeb, 0) - C(0xb399, CFDBR, RRF_e, Z, 0, f2_o, new, r1_32, cfdb, 0) - C(0xb39a, CFXBR, RRF_e, Z, 0, x2_o, new, r1_32, cfxb, 0) - C(0xb3a8, CGEBR, RRF_e, Z, 0, e2, r1, 0, cgeb, 0) - C(0xb3a9, CGDBR, RRF_e, Z, 0, f2_o, r1, 0, cgdb, 0) - C(0xb3aa, CGXBR, RRF_e, Z, 0, x2_o, r1, 0, cgxb, 0) + F(0xb398, CFEBR, RRF_e, Z, 0, e2, new, r1_32, cfeb, 0, IF_BFP) + F(0xb399, CFDBR, RRF_e, Z, 0, f2_o, new, r1_32, cfdb, 0, IF_BFP) + F(0xb39a, CFXBR, RRF_e, Z, 0, x2_o, new, r1_32, cfxb, 0, IF_BFP) + F(0xb3a8, CGEBR, RRF_e, Z, 0, e2, r1, 0, cgeb, 0, IF_BFP) + F(0xb3a9, CGDBR, RRF_e, Z, 0, f2_o, r1, 0, cgdb, 0, IF_BFP) + F(0xb3aa, CGXBR, RRF_e, Z, 0, x2_o, r1, 0, cgxb, 0, IF_BFP) /* CONVERT FROM FIXED */ - C(0xb394, CEFBR, RRF_e, Z, 0, r2_32s, new, e1, cegb, 0) - C(0xb395, CDFBR, RRF_e, Z, 0, r2_32s, f1, 0, cdgb, 0) - C(0xb396, CXFBR, RRF_e, Z, 0, r2_32s, x1, 0, cxgb, 0) - C(0xb3a4, CEGBR, RRF_e, Z, 0, r2_o, new, e1, cegb, 0) - C(0xb3a5, CDGBR, RRF_e, Z, 0, r2_o, f1, 0, cdgb, 0) - C(0xb3a6, CXGBR, RRF_e, Z, 0, r2_o, x1, 0, cxgb, 0) + F(0xb394, CEFBR, RRF_e, Z, 0, r2_32s, new, e1, cegb, 0, IF_BFP) + F(0xb395, CDFBR, RRF_e, Z, 0, r2_32s, f1, 0, cdgb, 0, IF_BFP) + F(0xb396, CXFBR, RRF_e, Z, 0, r2_32s, x1, 0, cxgb, 0, IF_BFP) + F(0xb3a4, CEGBR, RRF_e, Z, 0, r2_o, new, e1, cegb, 0, IF_BFP) + F(0xb3a5, CDGBR, RRF_e, Z, 0, r2_o, f1, 0, cdgb, 0, IF_BFP) + F(0xb3a6, CXGBR, RRF_e, Z, 0, r2_o, x1, 0, cxgb, 0, IF_BFP) /* CONVERT TO LOGICAL */ - C(0xb39c, CLFEBR, RRF_e, FPE, 0, e2, new, r1_32, clfeb, 0) - C(0xb39d, CLFDBR, RRF_e, FPE, 0, f2_o, new, r1_32, clfdb, 0) - C(0xb39e, CLFXBR, RRF_e, FPE, 0, x2_o, new, r1_32, clfxb, 0) - C(0xb3ac, CLGEBR, RRF_e, FPE, 0, e2, r1, 0, clgeb, 0) - C(0xb3ad, CLGDBR, RRF_e, FPE, 0, f2_o, r1, 0, clgdb, 0) - C(0xb3ae, CLGXBR, RRF_e, FPE, 0, x2_o, r1, 0, clgxb, 0) + F(0xb39c, CLFEBR, RRF_e, FPE, 0, e2, new, r1_32, clfeb, 0, IF_BFP) + F(0xb39d, CLFDBR, RRF_e, FPE, 0, f2_o, new, r1_32, clfdb, 0, IF_BFP) + F(0xb39e, CLFXBR, RRF_e, FPE, 0, x2_o, new, r1_32, clfxb, 0, IF_BFP) + F(0xb3ac, CLGEBR, RRF_e, FPE, 0, e2, r1, 0, clgeb, 0, IF_BFP) + F(0xb3ad, CLGDBR, RRF_e, FPE, 0, f2_o, r1, 0, clgdb, 0, IF_BFP) + F(0xb3ae, CLGXBR, RRF_e, FPE, 0, x2_o, r1, 0, clgxb, 0, IF_BFP) /* CONVERT FROM LOGICAL */ - C(0xb390, CELFBR, RRF_e, FPE, 0, r2_32u, new, e1, celgb, 0) - C(0xb391, CDLFBR, RRF_e, FPE, 0, r2_32u, f1, 0, cdlgb, 0) - C(0xb392, CXLFBR, RRF_e, FPE, 0, r2_32u, x1, 0, cxlgb, 0) - C(0xb3a0, CELGBR, RRF_e, FPE, 0, r2_o, new, e1, celgb, 0) - C(0xb3a1, CDLGBR, RRF_e, FPE, 0, r2_o, f1, 0, cdlgb, 0) - C(0xb3a2, CXLGBR, RRF_e, FPE, 0, r2_o, x1, 0, cxlgb, 0) + F(0xb390, CELFBR, RRF_e, FPE, 0, r2_32u, new, e1, celgb, 0, IF_BFP) + F(0xb391, CDLFBR, RRF_e, FPE, 0, r2_32u, f1, 0, cdlgb, 0, IF_BFP) + F(0xb392, CXLFBR, RRF_e, FPE, 0, r2_32u, x1, 0, cxlgb, 0, IF_BFP) + F(0xb3a0, CELGBR, RRF_e, FPE, 0, r2_o, new, e1, celgb, 0, IF_BFP) + F(0xb3a1, CDLGBR, RRF_e, FPE, 0, r2_o, f1, 0, cdlgb, 0, IF_BFP) + F(0xb3a2, CXLGBR, RRF_e, FPE, 0, r2_o, x1, 0, cxlgb, 0, IF_BFP) /* CONVERT UTF-8 TO UTF-16 */ D(0xb2a7, CU12, RRF_c, Z, 0, 0, 0, 0, cuXX, 0, 12) @@ -332,11 +335,11 @@ /* DIVIDE */ C(0x1d00, DR, RR_a, Z, r1_D32, r2_32s, new_P, r1_P32, divs32, 0) C(0x5d00, D, RX_a, Z, r1_D32, m2_32s, new_P, r1_P32, divs32, 0) - C(0xb30d, DEBR, RRE, Z, e1, e2, new, e1, deb, 0) - C(0xb31d, DDBR, RRE, Z, f1_o, f2_o, f1, 0, ddb, 0) - C(0xb34d, DXBR, RRE, Z, 0, x2_o, x1, 0, dxb, 0) - C(0xed0d, DEB, RXE, Z, e1, m2_32u, new, e1, deb, 0) - C(0xed1d, DDB, RXE, Z, f1_o, m2_64, f1, 0, ddb, 0) + F(0xb30d, DEBR, RRE, Z, e1, e2, new, e1, deb, 0, IF_BFP) + F(0xb31d, DDBR, RRE, Z, f1_o, f2_o, f1, 0, ddb, 0, IF_BFP) + F(0xb34d, DXBR, RRE, Z, 0, x2_o, x1, 0, dxb, 0, IF_BFP) + F(0xed0d, DEB, RXE, Z, e1, m2_32u, new, e1, deb, 0, IF_BFP) + F(0xed1d, DDB, RXE, Z, f1_o, m2_64, f1, 0, ddb, 0, IF_BFP) /* DIVIDE LOGICAL */ C(0xb997, DLR, RRE, Z, r1_D32, r2_32u, new_P, r1_P32, divu32, 0) C(0xe397, DL, RXY_a, Z, r1_D32, m2_32u, new_P, r1_P32, divu32, 0) @@ -375,7 +378,7 @@ /* EXTRACT CPU TIME */ C(0xc801, ECTG, SSF, ECT, 0, 0, 0, 0, ectg, 0) /* EXTRACT FPC */ - C(0xb38c, EFPC, RRE, Z, 0, 0, new, r1_32, efpc, 0) + F(0xb38c, EFPC, RRE, Z, 0, 0, new, r1_32, efpc, 0, IF_BFP) /* EXTRACT PSW */ C(0xb98d, EPSW, RRE, Z, 0, 0, 0, 0, epsw, 0) @@ -407,13 +410,13 @@ C(0xb914, LGFR, RRE, Z, 0, r2_32s, 0, r1, mov2, 0) C(0xe304, LG, RXY_a, Z, 0, a2, r1, 0, ld64, 0) C(0xe314, LGF, RXY_a, Z, 0, a2, r1, 0, ld32s, 0) - C(0x2800, LDR, RR_a, Z, 0, f2_o, 0, f1, mov2, 0) - C(0x6800, LD, RX_a, Z, 0, m2_64, 0, f1, mov2, 0) - C(0xed65, LDY, RXY_a, LD, 0, m2_64, 0, f1, mov2, 0) - C(0x3800, LER, RR_a, Z, 0, e2, 0, cond_e1e2, mov2, 0) - C(0x7800, LE, RX_a, Z, 0, m2_32u, 0, e1, mov2, 0) - C(0xed64, LEY, RXY_a, LD, 0, m2_32u, 0, e1, mov2, 0) - C(0xb365, LXR, RRE, Z, 0, x2_o, 0, x1, movx, 0) + F(0x2800, LDR, RR_a, Z, 0, f2_o, 0, f1, mov2, 0, IF_AFP1 | IF_AFP2) + F(0x6800, LD, RX_a, Z, 0, m2_64, 0, f1, mov2, 0, IF_AFP1) + F(0xed65, LDY, RXY_a, LD, 0, m2_64, 0, f1, mov2, 0, IF_AFP1) + F(0x3800, LER, RR_a, Z, 0, e2, 0, cond_e1e2, mov2, 0, IF_AFP1 | IF_AFP2) + F(0x7800, LE, RX_a, Z, 0, m2_32u, 0, e1, mov2, 0, IF_AFP1) + F(0xed64, LEY, RXY_a, LD, 0, m2_32u, 0, e1, mov2, 0, IF_AFP1) + F(0xb365, LXR, RRE, Z, 0, x2_o, 0, x1, movx, 0, IF_AFP1) /* LOAD IMMEDIATE */ C(0xc001, LGFI, RIL_a, EI, 0, i2, 0, r1, mov2, 0) /* LOAD RELATIVE LONG */ @@ -450,9 +453,9 @@ C(0xe312, LT, RXY_a, EI, 0, a2, new, r1_32, ld32s, s64) C(0xe302, LTG, RXY_a, EI, 0, a2, r1, 0, ld64, s64) C(0xe332, LTGF, RXY_a, GIE, 0, a2, r1, 0, ld32s, s64) - C(0xb302, LTEBR, RRE, Z, 0, e2, 0, cond_e1e2, mov2, f32) - C(0xb312, LTDBR, RRE, Z, 0, f2_o, 0, f1, mov2, f64) - C(0xb342, LTXBR, RRE, Z, 0, x2_o, 0, x1, movx, f128) + F(0xb302, LTEBR, RRE, Z, 0, e2, 0, cond_e1e2, mov2, f32, IF_BFP) + F(0xb312, LTDBR, RRE, Z, 0, f2_o, 0, f1, mov2, f64, IF_BFP) + F(0xb342, LTXBR, RRE, Z, 0, x2_o, 0, x1, movx, f128, IF_BFP) /* LOAD AND TRAP */ C(0xe39f, LAT, RXY_a, LAT, 0, m2_32u, r1, 0, lat, 0) C(0xe385, LGAT, RXY_a, LAT, 0, a2, r1, 0, lgat, 0) @@ -472,10 +475,10 @@ C(0x1300, LCR, RR_a, Z, 0, r2, new, r1_32, neg, neg32) C(0xb903, LCGR, RRE, Z, 0, r2, r1, 0, neg, neg64) C(0xb913, LCGFR, RRE, Z, 0, r2_32s, r1, 0, neg, neg64) - C(0xb303, LCEBR, RRE, Z, 0, e2, new, e1, negf32, f32) - C(0xb313, LCDBR, RRE, Z, 0, f2_o, f1, 0, negf64, f64) - C(0xb343, LCXBR, RRE, Z, 0, x2_o, x1, 0, negf128, f128) - C(0xb373, LCDFR, RRE, FPSSH, 0, f2_o, f1, 0, negf64, 0) + F(0xb303, LCEBR, RRE, Z, 0, e2, new, e1, negf32, f32, IF_BFP) + F(0xb313, LCDBR, RRE, Z, 0, f2_o, f1, 0, negf64, f64, IF_BFP) + F(0xb343, LCXBR, RRE, Z, 0, x2_o, x1, 0, negf128, f128, IF_BFP) + F(0xb373, LCDFR, RRE, FPSSH, 0, f2_o, f1, 0, negf64, 0, IF_AFP1 | IF_AFP2) /* LOAD HALFWORD */ C(0xb927, LHR, RRE, EI, 0, r2_16s, 0, r1_32, mov2, 0) C(0xb907, LGHR, RRE, EI, 0, r2_16s, 0, r1, mov2, 0) @@ -532,17 +535,17 @@ C(0xe39c, LLGTAT, RXY_a, LAT, 0, m2_32u, r1, 0, llgtat, 0) /* LOAD FPR FROM GR */ - C(0xb3c1, LDGR, RRE, FPRGR, 0, r2_o, 0, f1, mov2, 0) + F(0xb3c1, LDGR, RRE, FPRGR, 0, r2_o, 0, f1, mov2, 0, IF_AFP1) /* LOAD GR FROM FPR */ - C(0xb3cd, LGDR, RRE, FPRGR, 0, f2_o, 0, r1, mov2, 0) + F(0xb3cd, LGDR, RRE, FPRGR, 0, f2_o, 0, r1, mov2, 0, IF_AFP2) /* LOAD NEGATIVE */ C(0x1100, LNR, RR_a, Z, 0, r2_32s, new, r1_32, nabs, nabs32) C(0xb901, LNGR, RRE, Z, 0, r2, r1, 0, nabs, nabs64) C(0xb911, LNGFR, RRE, Z, 0, r2_32s, r1, 0, nabs, nabs64) - C(0xb301, LNEBR, RRE, Z, 0, e2, new, e1, nabsf32, f32) - C(0xb311, LNDBR, RRE, Z, 0, f2_o, f1, 0, nabsf64, f64) - C(0xb341, LNXBR, RRE, Z, 0, x2_o, x1, 0, nabsf128, f128) - C(0xb371, LNDFR, RRE, FPSSH, 0, f2_o, f1, 0, nabsf64, 0) + F(0xb301, LNEBR, RRE, Z, 0, e2, new, e1, nabsf32, f32, IF_BFP) + F(0xb311, LNDBR, RRE, Z, 0, f2_o, f1, 0, nabsf64, f64, IF_BFP) + F(0xb341, LNXBR, RRE, Z, 0, x2_o, x1, 0, nabsf128, f128, IF_BFP) + F(0xb371, LNDFR, RRE, FPSSH, 0, f2_o, f1, 0, nabsf64, 0, IF_AFP1 | IF_AFP2) /* LOAD ON CONDITION */ C(0xb9f2, LOCR, RRF_c, LOC, r1, r2, new, r1_32, loc, 0) C(0xb9e2, LOCGR, RRF_c, LOC, r1, r2, r1, 0, loc, 0) @@ -564,10 +567,10 @@ C(0x1000, LPR, RR_a, Z, 0, r2_32s, new, r1_32, abs, abs32) C(0xb900, LPGR, RRE, Z, 0, r2, r1, 0, abs, abs64) C(0xb910, LPGFR, RRE, Z, 0, r2_32s, r1, 0, abs, abs64) - C(0xb300, LPEBR, RRE, Z, 0, e2, new, e1, absf32, f32) - C(0xb310, LPDBR, RRE, Z, 0, f2_o, f1, 0, absf64, f64) - C(0xb340, LPXBR, RRE, Z, 0, x2_o, x1, 0, absf128, f128) - C(0xb370, LPDFR, RRE, FPSSH, 0, f2_o, f1, 0, absf64, 0) + F(0xb300, LPEBR, RRE, Z, 0, e2, new, e1, absf32, f32, IF_BFP) + F(0xb310, LPDBR, RRE, Z, 0, f2_o, f1, 0, absf64, f64, IF_BFP) + F(0xb340, LPXBR, RRE, Z, 0, x2_o, x1, 0, absf128, f128, IF_BFP) + F(0xb370, LPDFR, RRE, FPSSH, 0, f2_o, f1, 0, absf64, 0, IF_AFP1 | IF_AFP2) /* LOAD REVERSED */ C(0xb91f, LRVR, RRE, Z, 0, r2_32u, new, r1_32, rev32, 0) C(0xb90f, LRVGR, RRE, Z, 0, r2_o, r1, 0, rev64, 0) @@ -575,30 +578,30 @@ C(0xe31e, LRV, RXY_a, Z, 0, m2_32u, new, r1_32, rev32, 0) C(0xe30f, LRVG, RXY_a, Z, 0, m2_64, r1, 0, rev64, 0) /* LOAD ZERO */ - C(0xb374, LZER, RRE, Z, 0, 0, 0, e1, zero, 0) - C(0xb375, LZDR, RRE, Z, 0, 0, 0, f1, zero, 0) - C(0xb376, LZXR, RRE, Z, 0, 0, 0, x1, zero2, 0) + F(0xb374, LZER, RRE, Z, 0, 0, 0, e1, zero, 0, IF_AFP1) + F(0xb375, LZDR, RRE, Z, 0, 0, 0, f1, zero, 0, IF_AFP1) + F(0xb376, LZXR, RRE, Z, 0, 0, 0, x1, zero2, 0, IF_AFP1) /* LOAD FPC */ - C(0xb29d, LFPC, S, Z, 0, m2_32u, 0, 0, sfpc, 0) + F(0xb29d, LFPC, S, Z, 0, m2_32u, 0, 0, sfpc, 0, IF_BFP) /* LOAD FPC AND SIGNAL */ - C(0xb2bd, LFAS, S, IEEEE_SIM, 0, m2_32u, 0, 0, sfas, 0) + F(0xb2bd, LFAS, S, IEEEE_SIM, 0, m2_32u, 0, 0, sfas, 0, IF_DFP) /* LOAD FP INTEGER */ - C(0xb357, FIEBR, RRF_e, Z, 0, e2, new, e1, fieb, 0) - C(0xb35f, FIDBR, RRF_e, Z, 0, f2_o, f1, 0, fidb, 0) - C(0xb347, FIXBR, RRF_e, Z, 0, x2_o, x1, 0, fixb, 0) + F(0xb357, FIEBR, RRF_e, Z, 0, e2, new, e1, fieb, 0, IF_BFP) + F(0xb35f, FIDBR, RRF_e, Z, 0, f2_o, f1, 0, fidb, 0, IF_BFP) + F(0xb347, FIXBR, RRF_e, Z, 0, x2_o, x1, 0, fixb, 0, IF_BFP) /* LOAD LENGTHENED */ - C(0xb304, LDEBR, RRE, Z, 0, e2, f1, 0, ldeb, 0) - C(0xb305, LXDBR, RRE, Z, 0, f2_o, x1, 0, lxdb, 0) - C(0xb306, LXEBR, RRE, Z, 0, e2, x1, 0, lxeb, 0) - C(0xed04, LDEB, RXE, Z, 0, m2_32u, f1, 0, ldeb, 0) - C(0xed05, LXDB, RXE, Z, 0, m2_64, x1, 0, lxdb, 0) - C(0xed06, LXEB, RXE, Z, 0, m2_32u, x1, 0, lxeb, 0) + F(0xb304, LDEBR, RRE, Z, 0, e2, f1, 0, ldeb, 0, IF_BFP) + F(0xb305, LXDBR, RRE, Z, 0, f2_o, x1, 0, lxdb, 0, IF_BFP) + F(0xb306, LXEBR, RRE, Z, 0, e2, x1, 0, lxeb, 0, IF_BFP) + F(0xed04, LDEB, RXE, Z, 0, m2_32u, f1, 0, ldeb, 0, IF_BFP) + F(0xed05, LXDB, RXE, Z, 0, m2_64, x1, 0, lxdb, 0, IF_BFP) + F(0xed06, LXEB, RXE, Z, 0, m2_32u, x1, 0, lxeb, 0, IF_BFP) /* LOAD ROUNDED */ - C(0xb344, LEDBR, RRE, Z, 0, f2_o, new, e1, ledb, 0) - C(0xb345, LDXBR, RRE, Z, 0, x2_o, f1, 0, ldxb, 0) - C(0xb346, LEXBR, RRE, Z, 0, x2_o, new, e1, lexb, 0) + F(0xb344, LEDBR, RRE, Z, 0, f2_o, new, e1, ledb, 0, IF_BFP) + F(0xb345, LDXBR, RRE, Z, 0, x2_o, f1, 0, ldxb, 0, IF_BFP) + F(0xb346, LEXBR, RRE, Z, 0, x2_o, new, e1, lexb, 0, IF_BFP) /* LOAD MULTIPLE */ C(0x9800, LM, RS_a, Z, 0, a2, 0, 0, lm32, 0) @@ -644,15 +647,15 @@ C(0x1c00, MR, RR_a, Z, r1p1_32s, r2_32s, new, r1_D32, mul, 0) C(0x5c00, M, RX_a, Z, r1p1_32s, m2_32s, new, r1_D32, mul, 0) C(0xe35c, MFY, RXY_a, GIE, r1p1_32s, m2_32s, new, r1_D32, mul, 0) - C(0xb317, MEEBR, RRE, Z, e1, e2, new, e1, meeb, 0) - C(0xb31c, MDBR, RRE, Z, f1_o, f2_o, f1, 0, mdb, 0) - C(0xb34c, MXBR, RRE, Z, 0, x2_o, x1, 0, mxb, 0) - C(0xb30c, MDEBR, RRE, Z, f1_o, e2, f1, 0, mdeb, 0) - C(0xb307, MXDBR, RRE, Z, 0, f2_o, x1, 0, mxdb, 0) - C(0xed17, MEEB, RXE, Z, e1, m2_32u, new, e1, meeb, 0) - C(0xed1c, MDB, RXE, Z, f1_o, m2_64, f1, 0, mdb, 0) - C(0xed0c, MDEB, RXE, Z, f1_o, m2_32u, f1, 0, mdeb, 0) - C(0xed07, MXDB, RXE, Z, 0, m2_64, x1, 0, mxdb, 0) + F(0xb317, MEEBR, RRE, Z, e1, e2, new, e1, meeb, 0, IF_BFP) + F(0xb31c, MDBR, RRE, Z, f1_o, f2_o, f1, 0, mdb, 0, IF_BFP) + F(0xb34c, MXBR, RRE, Z, 0, x2_o, x1, 0, mxb, 0, IF_BFP) + F(0xb30c, MDEBR, RRE, Z, f1_o, e2, f1, 0, mdeb, 0, IF_BFP) + F(0xb307, MXDBR, RRE, Z, 0, f2_o, x1, 0, mxdb, 0, IF_BFP) + F(0xed17, MEEB, RXE, Z, e1, m2_32u, new, e1, meeb, 0, IF_BFP) + F(0xed1c, MDB, RXE, Z, f1_o, m2_64, f1, 0, mdb, 0, IF_BFP) + F(0xed0c, MDEB, RXE, Z, f1_o, m2_32u, f1, 0, mdeb, 0, IF_BFP) + F(0xed07, MXDB, RXE, Z, 0, m2_64, x1, 0, mxdb, 0, IF_BFP) /* MULTIPLY HALFWORD */ C(0x4c00, MH, RX_a, Z, r1_o, m2_16s, new, r1_32, mul, 0) C(0xe37c, MHY, RXY_a, GIE, r1_o, m2_16s, new, r1_32, mul, 0) @@ -677,15 +680,15 @@ C(0xc200, MSGFI, RIL_a, GIE, r1_o, i2, r1, 0, mul, 0) /* MULTIPLY AND ADD */ - C(0xb30e, MAEBR, RRD, Z, e1, e2, new, e1, maeb, 0) - C(0xb31e, MADBR, RRD, Z, f1_o, f2_o, f1, 0, madb, 0) - C(0xed0e, MAEB, RXF, Z, e1, m2_32u, new, e1, maeb, 0) - C(0xed1e, MADB, RXF, Z, f1_o, m2_64, f1, 0, madb, 0) + F(0xb30e, MAEBR, RRD, Z, e1, e2, new, e1, maeb, 0, IF_BFP) + F(0xb31e, MADBR, RRD, Z, f1_o, f2_o, f1, 0, madb, 0, IF_BFP) + F(0xed0e, MAEB, RXF, Z, e1, m2_32u, new, e1, maeb, 0, IF_BFP) + F(0xed1e, MADB, RXF, Z, f1_o, m2_64, f1, 0, madb, 0, IF_BFP) /* MULTIPLY AND SUBTRACT */ - C(0xb30f, MSEBR, RRD, Z, e1, e2, new, e1, mseb, 0) - C(0xb31f, MSDBR, RRD, Z, f1_o, f2_o, f1, 0, msdb, 0) - C(0xed0f, MSEB, RXF, Z, e1, m2_32u, new, e1, mseb, 0) - C(0xed1f, MSDB, RXF, Z, f1_o, m2_64, f1, 0, msdb, 0) + F(0xb30f, MSEBR, RRD, Z, e1, e2, new, e1, mseb, 0, IF_BFP) + F(0xb31f, MSDBR, RRD, Z, f1_o, f2_o, f1, 0, msdb, 0, IF_BFP) + F(0xed0f, MSEB, RXF, Z, e1, m2_32u, new, e1, mseb, 0, IF_BFP) + F(0xed1f, MSDB, RXF, Z, f1_o, m2_64, f1, 0, msdb, 0, IF_BFP) /* OR */ C(0x1600, OR, RR_a, Z, r1, r2, new, r1_32, or, nz32) @@ -752,14 +755,14 @@ D(0x010d, SAM31, E, Z, 0, 0, 0, 0, sam, 0, 1) D(0x010e, SAM64, E, Z, 0, 0, 0, 0, sam, 0, 3) /* SET FPC */ - C(0xb384, SFPC, RRE, Z, 0, r1_o, 0, 0, sfpc, 0) + F(0xb384, SFPC, RRE, Z, 0, r1_o, 0, 0, sfpc, 0, IF_BFP) /* SET FPC AND SIGNAL */ - C(0xb385, SFASR, RRE, IEEEE_SIM, 0, r1_o, 0, 0, sfas, 0) + F(0xb385, SFASR, RRE, IEEEE_SIM, 0, r1_o, 0, 0, sfas, 0, IF_DFP) /* SET BFP ROUNDING MODE */ - C(0xb299, SRNM, S, Z, 0, 0, 0, 0, srnm, 0) - C(0xb2b8, SRNMB, S, FPE, 0, 0, 0, 0, srnm, 0) + F(0xb299, SRNM, S, Z, 0, 0, 0, 0, srnm, 0, IF_BFP) + F(0xb2b8, SRNMB, S, FPE, 0, 0, 0, 0, srnm, 0, IF_BFP) /* SET DFP ROUNDING MODE */ - C(0xb2b9, SRNMT, S, DFPR, 0, 0, 0, 0, srnm, 0) + F(0xb2b9, SRNMT, S, DFPR, 0, 0, 0, 0, srnm, 0, IF_DFP) /* SET PROGRAM MASK */ C(0x0400, SPM, RR_a, Z, r1, 0, 0, 0, spm, 0) @@ -789,20 +792,20 @@ C(0x8c00, SRDL, RS_a, Z, r1_D32, sh64, new, r1_D32, srl, 0) /* SQUARE ROOT */ - C(0xb314, SQEBR, RRE, Z, 0, e2, new, e1, sqeb, 0) - C(0xb315, SQDBR, RRE, Z, 0, f2_o, f1, 0, sqdb, 0) - C(0xb316, SQXBR, RRE, Z, 0, x2_o, x1, 0, sqxb, 0) - C(0xed14, SQEB, RXE, Z, 0, m2_32u, new, e1, sqeb, 0) - C(0xed15, SQDB, RXE, Z, 0, m2_64, f1, 0, sqdb, 0) + F(0xb314, SQEBR, RRE, Z, 0, e2, new, e1, sqeb, 0, IF_BFP) + F(0xb315, SQDBR, RRE, Z, 0, f2_o, f1, 0, sqdb, 0, IF_BFP) + F(0xb316, SQXBR, RRE, Z, 0, x2_o, x1, 0, sqxb, 0, IF_BFP) + F(0xed14, SQEB, RXE, Z, 0, m2_32u, new, e1, sqeb, 0, IF_BFP) + F(0xed15, SQDB, RXE, Z, 0, m2_64, f1, 0, sqdb, 0, IF_BFP) /* STORE */ C(0x5000, ST, RX_a, Z, r1_o, a2, 0, 0, st32, 0) C(0xe350, STY, RXY_a, LD, r1_o, a2, 0, 0, st32, 0) C(0xe324, STG, RXY_a, Z, r1_o, a2, 0, 0, st64, 0) - C(0x6000, STD, RX_a, Z, f1_o, a2, 0, 0, st64, 0) - C(0xed67, STDY, RXY_a, LD, f1_o, a2, 0, 0, st64, 0) - C(0x7000, STE, RX_a, Z, e1, a2, 0, 0, st32, 0) - C(0xed66, STEY, RXY_a, LD, e1, a2, 0, 0, st32, 0) + F(0x6000, STD, RX_a, Z, f1_o, a2, 0, 0, st64, 0, IF_AFP1) + F(0xed67, STDY, RXY_a, LD, f1_o, a2, 0, 0, st64, 0, IF_AFP1) + F(0x7000, STE, RX_a, Z, e1, a2, 0, 0, st32, 0, IF_AFP1) + F(0xed66, STEY, RXY_a, LD, e1, a2, 0, 0, st32, 0, IF_AFP1) /* STORE RELATIVE LONG */ C(0xc40f, STRL, RIL_b, GIE, r1_o, ri2, 0, 0, st32, 0) C(0xc40b, STGRL, RIL_b, GIE, r1_o, ri2, 0, 0, st64, 0) @@ -837,7 +840,7 @@ /* STORE FACILITY LIST EXTENDED */ C(0xb2b0, STFLE, S, SFLE, 0, a2, 0, 0, stfle, 0) /* STORE FPC */ - C(0xb29c, STFPC, S, Z, 0, a2, new, m2_32, efpc, 0) + F(0xb29c, STFPC, S, Z, 0, a2, new, m2_32, efpc, 0, IF_BFP) /* STORE MULTIPLE */ D(0x9000, STM, RS_a, Z, 0, a2, 0, 0, stm, 0, 4) @@ -861,11 +864,11 @@ C(0xb9e9, SGRK, RRF_a, DO, r2, r3, r1, 0, sub, subs64) C(0xe309, SG, RXY_a, Z, r1, m2_64, r1, 0, sub, subs64) C(0xe319, SGF, RXY_a, Z, r1, m2_32s, r1, 0, sub, subs64) - C(0xb30b, SEBR, RRE, Z, e1, e2, new, e1, seb, f32) - C(0xb31b, SDBR, RRE, Z, f1_o, f2_o, f1, 0, sdb, f64) - C(0xb34b, SXBR, RRE, Z, 0, x2_o, x1, 0, sxb, f128) - C(0xed0b, SEB, RXE, Z, e1, m2_32u, new, e1, seb, f32) - C(0xed1b, SDB, RXE, Z, f1_o, m2_64, f1, 0, sdb, f64) + F(0xb30b, SEBR, RRE, Z, e1, e2, new, e1, seb, f32, IF_BFP) + F(0xb31b, SDBR, RRE, Z, f1_o, f2_o, f1, 0, sdb, f64, IF_BFP) + F(0xb34b, SXBR, RRE, Z, 0, x2_o, x1, 0, sxb, f128, IF_BFP) + F(0xed0b, SEB, RXE, Z, e1, m2_32u, new, e1, seb, f32, IF_BFP) + F(0xed1b, SDB, RXE, Z, f1_o, m2_64, f1, 0, sdb, f64, IF_BFP) /* SUBTRACT HALFWORD */ C(0x4b00, SH, RX_a, Z, r1, m2_16s, new, r1_32, sub, subs32) C(0xe37b, SHY, RXY_a, LD, r1, m2_16s, new, r1_32, sub, subs32) @@ -904,9 +907,9 @@ C(0x9300, TS, S, Z, 0, a2, 0, 0, ts, 0) /* TEST DATA CLASS */ - C(0xed10, TCEB, RXE, Z, e1, a2, 0, 0, tceb, 0) - C(0xed11, TCDB, RXE, Z, f1_o, a2, 0, 0, tcdb, 0) - C(0xed12, TCXB, RXE, Z, x1_o, a2, 0, 0, tcxb, 0) + F(0xed10, TCEB, RXE, Z, e1, a2, 0, 0, tceb, 0, IF_BFP) + F(0xed11, TCDB, RXE, Z, f1_o, a2, 0, 0, tcdb, 0, IF_BFP) + F(0xed12, TCXB, RXE, Z, x1_o, a2, 0, 0, tcxb, 0, IF_BFP) /* TEST DECIMAL */ C(0xebc0, TP, RSL, E2, la1, 0, 0, 0, tp, 0) @@ -961,126 +964,126 @@ #ifndef CONFIG_USER_ONLY /* COMPARE AND SWAP AND PURGE */ - D(0xb250, CSP, RRE, Z, r1_32u, ra2, r1_P, 0, csp, 0, MO_TEUL) - D(0xb98a, CSPG, RRE, DAT_ENH, r1_o, ra2, r1_P, 0, csp, 0, MO_TEQ) + E(0xb250, CSP, RRE, Z, r1_32u, ra2, r1_P, 0, csp, 0, MO_TEUL, IF_PRIV) + E(0xb98a, CSPG, RRE, DAT_ENH, r1_o, ra2, r1_P, 0, csp, 0, MO_TEQ, IF_PRIV) /* DIAGNOSE (KVM hypercall) */ - C(0x8300, DIAG, RSI, Z, 0, 0, 0, 0, diag, 0) + F(0x8300, DIAG, RSI, Z, 0, 0, 0, 0, diag, 0, IF_PRIV) /* INSERT STORAGE KEY EXTENDED */ - C(0xb229, ISKE, RRE, Z, 0, r2_o, new, r1_8, iske, 0) + F(0xb229, ISKE, RRE, Z, 0, r2_o, new, r1_8, iske, 0, IF_PRIV) /* INVALIDATE DAT TABLE ENTRY */ - C(0xb98e, IPDE, RRF_b, Z, r1_o, r2_o, 0, 0, idte, 0) + F(0xb98e, IPDE, RRF_b, Z, r1_o, r2_o, 0, 0, idte, 0, IF_PRIV) /* INVALIDATE PAGE TABLE ENTRY */ - C(0xb221, IPTE, RRF_a, Z, r1_o, r2_o, 0, 0, ipte, 0) + F(0xb221, IPTE, RRF_a, Z, r1_o, r2_o, 0, 0, ipte, 0, IF_PRIV) /* LOAD CONTROL */ - C(0xb700, LCTL, RS_a, Z, 0, a2, 0, 0, lctl, 0) - C(0xeb2f, LCTLG, RSY_a, Z, 0, a2, 0, 0, lctlg, 0) + F(0xb700, LCTL, RS_a, Z, 0, a2, 0, 0, lctl, 0, IF_PRIV) + F(0xeb2f, LCTLG, RSY_a, Z, 0, a2, 0, 0, lctlg, 0, IF_PRIV) /* LOAD PROGRAM PARAMETER */ - C(0xb280, LPP, S, LPP, 0, m2_64, 0, 0, lpp, 0) + F(0xb280, LPP, S, LPP, 0, m2_64, 0, 0, lpp, 0, IF_PRIV) /* LOAD PSW */ - C(0x8200, LPSW, S, Z, 0, a2, 0, 0, lpsw, 0) + F(0x8200, LPSW, S, Z, 0, a2, 0, 0, lpsw, 0, IF_PRIV) /* LOAD PSW EXTENDED */ - C(0xb2b2, LPSWE, S, Z, 0, a2, 0, 0, lpswe, 0) + F(0xb2b2, LPSWE, S, Z, 0, a2, 0, 0, lpswe, 0, IF_PRIV) /* LOAD REAL ADDRESS */ - C(0xb100, LRA, RX_a, Z, 0, a2, r1, 0, lra, 0) - C(0xe313, LRAY, RXY_a, LD, 0, a2, r1, 0, lra, 0) - C(0xe303, LRAG, RXY_a, Z, 0, a2, r1, 0, lra, 0) + F(0xb100, LRA, RX_a, Z, 0, a2, r1, 0, lra, 0, IF_PRIV) + F(0xe313, LRAY, RXY_a, LD, 0, a2, r1, 0, lra, 0, IF_PRIV) + F(0xe303, LRAG, RXY_a, Z, 0, a2, r1, 0, lra, 0, IF_PRIV) /* LOAD USING REAL ADDRESS */ - C(0xb24b, LURA, RRE, Z, 0, r2, new, r1_32, lura, 0) - C(0xb905, LURAG, RRE, Z, 0, r2, r1, 0, lurag, 0) + F(0xb24b, LURA, RRE, Z, 0, r2, new, r1_32, lura, 0, IF_PRIV) + F(0xb905, LURAG, RRE, Z, 0, r2, r1, 0, lurag, 0, IF_PRIV) /* MOVE TO PRIMARY */ - C(0xda00, MVCP, SS_d, Z, la1, a2, 0, 0, mvcp, 0) + F(0xda00, MVCP, SS_d, Z, la1, a2, 0, 0, mvcp, 0, IF_PRIV) /* MOVE TO SECONDARY */ - C(0xdb00, MVCS, SS_d, Z, la1, a2, 0, 0, mvcs, 0) + F(0xdb00, MVCS, SS_d, Z, la1, a2, 0, 0, mvcs, 0, IF_PRIV) /* PURGE TLB */ - C(0xb20d, PTLB, S, Z, 0, 0, 0, 0, ptlb, 0) + F(0xb20d, PTLB, S, Z, 0, 0, 0, 0, ptlb, 0, IF_PRIV) /* RESET REFERENCE BIT EXTENDED */ - C(0xb22a, RRBE, RRE, Z, 0, r2_o, 0, 0, rrbe, 0) + F(0xb22a, RRBE, RRE, Z, 0, r2_o, 0, 0, rrbe, 0, IF_PRIV) /* SERVICE CALL LOGICAL PROCESSOR (PV hypercall) */ - C(0xb220, SERVC, RRE, Z, r1_o, r2_o, 0, 0, servc, 0) + F(0xb220, SERVC, RRE, Z, r1_o, r2_o, 0, 0, servc, 0, IF_PRIV) /* SET ADDRESS SPACE CONTROL FAST */ - C(0xb279, SACF, S, Z, 0, a2, 0, 0, sacf, 0) + F(0xb279, SACF, S, Z, 0, a2, 0, 0, sacf, 0, IF_PRIV) /* SET CLOCK */ - C(0xb204, SCK, S, Z, la2, 0, 0, 0, sck, 0) + F(0xb204, SCK, S, Z, la2, 0, 0, 0, sck, 0, IF_PRIV) /* SET CLOCK COMPARATOR */ - C(0xb206, SCKC, S, Z, 0, m2_64a, 0, 0, sckc, 0) + F(0xb206, SCKC, S, Z, 0, m2_64a, 0, 0, sckc, 0, IF_PRIV) /* SET CLOCK PROGRAMMABLE FIELD */ - C(0x0107, SCKPF, E, Z, 0, 0, 0, 0, sckpf, 0) + F(0x0107, SCKPF, E, Z, 0, 0, 0, 0, sckpf, 0, IF_PRIV) /* SET CPU TIMER */ - C(0xb208, SPT, S, Z, 0, m2_64a, 0, 0, spt, 0) + F(0xb208, SPT, S, Z, 0, m2_64a, 0, 0, spt, 0, IF_PRIV) /* SET PREFIX */ - C(0xb210, SPX, S, Z, 0, m2_32ua, 0, 0, spx, 0) + F(0xb210, SPX, S, Z, 0, m2_32ua, 0, 0, spx, 0, IF_PRIV) /* SET PSW KEY FROM ADDRESS */ - C(0xb20a, SPKA, S, Z, 0, a2, 0, 0, spka, 0) + F(0xb20a, SPKA, S, Z, 0, a2, 0, 0, spka, 0, IF_PRIV) /* SET STORAGE KEY EXTENDED */ - C(0xb22b, SSKE, RRF_c, Z, r1_o, r2_o, 0, 0, sske, 0) + F(0xb22b, SSKE, RRF_c, Z, r1_o, r2_o, 0, 0, sske, 0, IF_PRIV) /* SET SYSTEM MASK */ - C(0x8000, SSM, S, Z, 0, m2_8u, 0, 0, ssm, 0) + F(0x8000, SSM, S, Z, 0, m2_8u, 0, 0, ssm, 0, IF_PRIV) /* SIGNAL PROCESSOR */ - C(0xae00, SIGP, RS_a, Z, 0, a2, 0, 0, sigp, 0) + F(0xae00, SIGP, RS_a, Z, 0, a2, 0, 0, sigp, 0, IF_PRIV) /* STORE CLOCK */ C(0xb205, STCK, S, Z, la2, 0, new, m1_64, stck, 0) C(0xb27c, STCKF, S, SCF, la2, 0, new, m1_64, stck, 0) /* STORE CLOCK EXTENDED */ C(0xb278, STCKE, S, Z, 0, a2, 0, 0, stcke, 0) /* STORE CLOCK COMPARATOR */ - C(0xb207, STCKC, S, Z, la2, 0, new, m1_64a, stckc, 0) + F(0xb207, STCKC, S, Z, la2, 0, new, m1_64a, stckc, 0, IF_PRIV) /* STORE CONTROL */ - C(0xb600, STCTL, RS_a, Z, 0, a2, 0, 0, stctl, 0) - C(0xeb25, STCTG, RSY_a, Z, 0, a2, 0, 0, stctg, 0) + F(0xb600, STCTL, RS_a, Z, 0, a2, 0, 0, stctl, 0, IF_PRIV) + F(0xeb25, STCTG, RSY_a, Z, 0, a2, 0, 0, stctg, 0, IF_PRIV) /* STORE CPU ADDRESS */ - C(0xb212, STAP, S, Z, la2, 0, new, m1_16a, stap, 0) + F(0xb212, STAP, S, Z, la2, 0, new, m1_16a, stap, 0, IF_PRIV) /* STORE CPU ID */ - C(0xb202, STIDP, S, Z, la2, 0, new, m1_64a, stidp, 0) + F(0xb202, STIDP, S, Z, la2, 0, new, m1_64a, stidp, 0, IF_PRIV) /* STORE CPU TIMER */ - C(0xb209, STPT, S, Z, la2, 0, new, m1_64a, stpt, 0) + F(0xb209, STPT, S, Z, la2, 0, new, m1_64a, stpt, 0, IF_PRIV) /* STORE FACILITY LIST */ - C(0xb2b1, STFL, S, Z, 0, 0, 0, 0, stfl, 0) + F(0xb2b1, STFL, S, Z, 0, 0, 0, 0, stfl, 0, IF_PRIV) /* STORE PREFIX */ - C(0xb211, STPX, S, Z, la2, 0, new, m1_32a, stpx, 0) + F(0xb211, STPX, S, Z, la2, 0, new, m1_32a, stpx, 0, IF_PRIV) /* STORE SYSTEM INFORMATION */ - C(0xb27d, STSI, S, Z, 0, a2, 0, 0, stsi, 0) + F(0xb27d, STSI, S, Z, 0, a2, 0, 0, stsi, 0, IF_PRIV) /* STORE THEN AND SYSTEM MASK */ - C(0xac00, STNSM, SI, Z, la1, 0, 0, 0, stnosm, 0) + F(0xac00, STNSM, SI, Z, la1, 0, 0, 0, stnosm, 0, IF_PRIV) /* STORE THEN OR SYSTEM MASK */ - C(0xad00, STOSM, SI, Z, la1, 0, 0, 0, stnosm, 0) + F(0xad00, STOSM, SI, Z, la1, 0, 0, 0, stnosm, 0, IF_PRIV) /* STORE USING REAL ADDRESS */ - C(0xb246, STURA, RRE, Z, r1_o, r2_o, 0, 0, stura, 0) - C(0xb925, STURG, RRE, Z, r1_o, r2_o, 0, 0, sturg, 0) + F(0xb246, STURA, RRE, Z, r1_o, r2_o, 0, 0, stura, 0, IF_PRIV) + F(0xb925, STURG, RRE, Z, r1_o, r2_o, 0, 0, sturg, 0, IF_PRIV) /* TEST BLOCK */ - C(0xb22c, TB, RRE, Z, 0, r2_o, 0, 0, testblock, 0) + F(0xb22c, TB, RRE, Z, 0, r2_o, 0, 0, testblock, 0, IF_PRIV) /* TEST PROTECTION */ C(0xe501, TPROT, SSE, Z, la1, a2, 0, 0, tprot, 0) /* CCW I/O Instructions */ - C(0xb276, XSCH, S, Z, 0, 0, 0, 0, xsch, 0) - C(0xb230, CSCH, S, Z, 0, 0, 0, 0, csch, 0) - C(0xb231, HSCH, S, Z, 0, 0, 0, 0, hsch, 0) - C(0xb232, MSCH, S, Z, 0, insn, 0, 0, msch, 0) - C(0xb23b, RCHP, S, Z, 0, 0, 0, 0, rchp, 0) - C(0xb238, RSCH, S, Z, 0, 0, 0, 0, rsch, 0) - C(0xb237, SAL, S, Z, 0, 0, 0, 0, sal, 0) - C(0xb23c, SCHM, S, Z, 0, insn, 0, 0, schm, 0) - C(0xb274, SIGA, S, Z, 0, 0, 0, 0, siga, 0) - C(0xb23a, STCPS, S, Z, 0, 0, 0, 0, stcps, 0) - C(0xb233, SSCH, S, Z, 0, insn, 0, 0, ssch, 0) - C(0xb239, STCRW, S, Z, 0, insn, 0, 0, stcrw, 0) - C(0xb234, STSCH, S, Z, 0, insn, 0, 0, stsch, 0) - C(0xb236, TPI , S, Z, la2, 0, 0, 0, tpi, 0) - C(0xb235, TSCH, S, Z, 0, insn, 0, 0, tsch, 0) + F(0xb276, XSCH, S, Z, 0, 0, 0, 0, xsch, 0, IF_PRIV) + F(0xb230, CSCH, S, Z, 0, 0, 0, 0, csch, 0, IF_PRIV) + F(0xb231, HSCH, S, Z, 0, 0, 0, 0, hsch, 0, IF_PRIV) + F(0xb232, MSCH, S, Z, 0, insn, 0, 0, msch, 0, IF_PRIV) + F(0xb23b, RCHP, S, Z, 0, 0, 0, 0, rchp, 0, IF_PRIV) + F(0xb238, RSCH, S, Z, 0, 0, 0, 0, rsch, 0, IF_PRIV) + F(0xb237, SAL, S, Z, 0, 0, 0, 0, sal, 0, IF_PRIV) + F(0xb23c, SCHM, S, Z, 0, insn, 0, 0, schm, 0, IF_PRIV) + F(0xb274, SIGA, S, Z, 0, 0, 0, 0, siga, 0, IF_PRIV) + F(0xb23a, STCPS, S, Z, 0, 0, 0, 0, stcps, 0, IF_PRIV) + F(0xb233, SSCH, S, Z, 0, insn, 0, 0, ssch, 0, IF_PRIV) + F(0xb239, STCRW, S, Z, 0, insn, 0, 0, stcrw, 0, IF_PRIV) + F(0xb234, STSCH, S, Z, 0, insn, 0, 0, stsch, 0, IF_PRIV) + F(0xb236, TPI , S, Z, la2, 0, 0, 0, tpi, 0, IF_PRIV) + F(0xb235, TSCH, S, Z, 0, insn, 0, 0, tsch, 0, IF_PRIV) /* ??? Not listed in PoO ninth edition, but there's a linux driver that uses it: "A CHSC subchannel is usually present on LPAR only." */ - C(0xb25f, CHSC, RRE, Z, 0, insn, 0, 0, chsc, 0) + F(0xb25f, CHSC, RRE, Z, 0, insn, 0, 0, chsc, 0, IF_PRIV) /* zPCI Instructions */ /* None of these instructions are documented in the PoP, so this is all based upon target/s390x/kvm.c and Linux code and likely incomplete */ - C(0xebd0, PCISTB, RSY_a, PCI, la2, 0, 0, 0, pcistb, 0) - C(0xebd1, SIC, RSY_a, AIS, r1, r3, 0, 0, sic, 0) - C(0xb9a0, CLP, RRF_c, PCI, 0, 0, 0, 0, clp, 0) - C(0xb9d0, PCISTG, RRE, PCI, 0, 0, 0, 0, pcistg, 0) - C(0xb9d2, PCILG, RRE, PCI, 0, 0, 0, 0, pcilg, 0) - C(0xb9d3, RPCIT, RRE, PCI, 0, 0, 0, 0, rpcit, 0) - C(0xe3d0, MPCIFC, RXY_a, PCI, la2, 0, 0, 0, mpcifc, 0) - C(0xe3d4, STPCIFC, RXY_a, PCI, la2, 0, 0, 0, stpcifc, 0) + F(0xebd0, PCISTB, RSY_a, PCI, la2, 0, 0, 0, pcistb, 0, IF_PRIV) + F(0xebd1, SIC, RSY_a, AIS, r1, r3, 0, 0, sic, 0, IF_PRIV) + F(0xb9a0, CLP, RRF_c, PCI, 0, 0, 0, 0, clp, 0, IF_PRIV) + F(0xb9d0, PCISTG, RRE, PCI, 0, 0, 0, 0, pcistg, 0, IF_PRIV) + F(0xb9d2, PCILG, RRE, PCI, 0, 0, 0, 0, pcilg, 0, IF_PRIV) + F(0xb9d3, RPCIT, RRE, PCI, 0, 0, 0, 0, rpcit, 0, IF_PRIV) + F(0xe3d0, MPCIFC, RXY_a, PCI, la2, 0, 0, 0, mpcifc, 0, IF_PRIV) + F(0xe3d4, STPCIFC, RXY_a, PCI, la2, 0, 0, 0, stpcifc, 0, IF_PRIV) #endif /* CONFIG_USER_ONLY */ diff --git a/target/s390x/interrupt.c b/target/s390x/interrupt.c index 25cfb3eef8..a17eff5ebc 100644 --- a/target/s390x/interrupt.c +++ b/target/s390x/interrupt.c @@ -15,6 +15,7 @@ #include "exec/exec-all.h" #include "sysemu/kvm.h" #include "hw/s390x/ioinst.h" +#include "tcg_s390x.h" #if !defined(CONFIG_USER_ONLY) #include "hw/s390x/s390_flic.h" #endif @@ -29,25 +30,11 @@ void trigger_pgm_exception(CPUS390XState *env, uint32_t code, uint32_t ilen) env->int_pgm_ilen = ilen; } -static void tcg_s390_program_interrupt(CPUS390XState *env, uint32_t code, - int ilen, uintptr_t ra) -{ -#ifdef CONFIG_TCG - trigger_pgm_exception(env, code, ilen); - cpu_loop_exit_restore(CPU(s390_env_get_cpu(env)), ra); -#else - g_assert_not_reached(); -#endif -} - void s390_program_interrupt(CPUS390XState *env, uint32_t code, int ilen, uintptr_t ra) { S390CPU *cpu = s390_env_get_cpu(env); - qemu_log_mask(CPU_LOG_INT, "program interrupt at %#" PRIx64 "\n", - env->psw.addr); - if (kvm_enabled()) { kvm_s390_program_interrupt(cpu, code); } else if (tcg_enabled()) { diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c index 348e8cc546..2ebf26adfe 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -36,6 +36,7 @@ #include "qemu/timer.h" #include "qemu/units.h" #include "qemu/mmap-alloc.h" +#include "qemu/log.h" #include "sysemu/sysemu.h" #include "sysemu/hw_accel.h" #include "hw/hw.h" @@ -292,6 +293,12 @@ static int kvm_s390_configure_mempath_backing(KVMState *s) return 0; } + if (!hpage_1m_allowed()) { + error_report("This QEMU machine does not support huge page " + "mappings"); + return -EINVAL; + } + if (path_psize != 1 * MiB) { error_report("Memory backing with 2G pages was specified, " "but KVM does not support this memory backing"); @@ -1109,7 +1116,8 @@ void kvm_s390_program_interrupt(S390CPU *cpu, uint16_t code) .type = KVM_S390_PROGRAM_INT, .u.pgm.code = code, }; - + qemu_log_mask(CPU_LOG_INT, "program interrupt at %#" PRIx64 "\n", + cpu->env.psw.addr); kvm_s390_vcpu_interrupt(cpu, &irq); } @@ -2291,11 +2299,26 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) error_setg(errp, "KVM: host CPU model could not be identified"); return; } + /* for now, we can only provide the AP feature with HW support */ + if (kvm_vm_check_attr(kvm_state, KVM_S390_VM_CRYPTO, + KVM_S390_VM_CRYPTO_ENABLE_APIE)) { + set_bit(S390_FEAT_AP, model->features); + } /* strip of features that are not part of the maximum model */ bitmap_and(model->features, model->features, model->def->full_feat, S390_FEAT_MAX); } +static void kvm_s390_configure_apie(bool interpret) +{ + uint64_t attr = interpret ? KVM_S390_VM_CRYPTO_ENABLE_APIE : + KVM_S390_VM_CRYPTO_DISABLE_APIE; + + if (kvm_vm_check_attr(kvm_state, KVM_S390_VM_CRYPTO, attr)) { + kvm_s390_set_attr(attr); + } +} + void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) { struct kvm_s390_vm_cpu_processor prop = { @@ -2345,6 +2368,10 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) if (test_bit(S390_FEAT_CMM, model->features)) { kvm_s390_enable_cmma(); } + + if (test_bit(S390_FEAT_AP, model->features)) { + kvm_s390_configure_apie(true); + } } void kvm_s390_restart_interrupt(S390CPU *cpu) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index bacae4f503..490c43e6e6 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -25,6 +25,7 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "qemu/int128.h" +#include "qemu/atomic128.h" #if !defined(CONFIG_USER_ONLY) #include "hw/s390x/storage-keys.h" @@ -1379,65 +1380,62 @@ uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2, return cc; } -static void do_cdsg(CPUS390XState *env, uint64_t addr, - uint32_t r1, uint32_t r3, bool parallel) +void HELPER(cdsg)(CPUS390XState *env, uint64_t addr, + uint32_t r1, uint32_t r3) { uintptr_t ra = GETPC(); Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]); Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]); Int128 oldv; + uint64_t oldh, oldl; bool fail; - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); - oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); - fail = !int128_eq(oldv, cmpv); -#endif - } else { - uint64_t oldh, oldl; + check_alignment(env, addr, 16, ra); - check_alignment(env, addr, 16, ra); + oldh = cpu_ldq_data_ra(env, addr + 0, ra); + oldl = cpu_ldq_data_ra(env, addr + 8, ra); - oldh = cpu_ldq_data_ra(env, addr + 0, ra); - oldl = cpu_ldq_data_ra(env, addr + 8, ra); - - oldv = int128_make128(oldl, oldh); - fail = !int128_eq(oldv, cmpv); - if (fail) { - newv = oldv; - } - - cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra); - cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra); + oldv = int128_make128(oldl, oldh); + fail = !int128_eq(oldv, cmpv); + if (fail) { + newv = oldv; } + cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra); + cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra); + env->cc_op = fail; env->regs[r1] = int128_gethi(oldv); env->regs[r1 + 1] = int128_getlo(oldv); } -void HELPER(cdsg)(CPUS390XState *env, uint64_t addr, - uint32_t r1, uint32_t r3) -{ - do_cdsg(env, addr, r1, r3, false); -} - void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr, uint32_t r1, uint32_t r3) { - do_cdsg(env, addr, r1, r3, true); + uintptr_t ra = GETPC(); + Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]); + Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]); + int mem_idx; + TCGMemOpIdx oi; + Int128 oldv; + bool fail; + + assert(HAVE_CMPXCHG128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); + fail = !int128_eq(oldv, cmpv); + + env->cc_op = fail; + env->regs[r1] = int128_gethi(oldv); + env->regs[r1 + 1] = int128_getlo(oldv); } static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2, bool parallel) { -#if !defined(CONFIG_USER_ONLY) || defined(CONFIG_ATOMIC128) uint32_t mem_idx = cpu_mmu_index(env, false); -#endif uintptr_t ra = GETPC(); uint32_t fc = extract32(env->regs[0], 0, 8); uint32_t sc = extract32(env->regs[0], 8, 8); @@ -1465,18 +1463,20 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, probe_write(env, a2, 0, mem_idx, ra); #endif - /* Note that the compare-and-swap is atomic, and the store is atomic, but - the complete operation is not. Therefore we do not need to assert serial - context in order to implement this. That said, restart early if we can't - support either operation that is supposed to be atomic. */ + /* + * Note that the compare-and-swap is atomic, and the store is atomic, + * but the complete operation is not. Therefore we do not need to + * assert serial context in order to implement this. That said, + * restart early if we can't support either operation that is supposed + * to be atomic. + */ if (parallel) { - int mask = 0; -#if !defined(CONFIG_ATOMIC64) - mask = -8; -#elif !defined(CONFIG_ATOMIC128) - mask = -16; + uint32_t max = 2; +#ifdef CONFIG_ATOMIC64 + max = 3; #endif - if (((4 << fc) | (1 << sc)) & mask) { + if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) || + (HAVE_ATOMIC128 ? 0 : sc > max)) { cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); } } @@ -1546,16 +1546,7 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]); Int128 ov; - if (parallel) { -#ifdef CONFIG_ATOMIC128 - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); - ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra); - cc = !int128_eq(ov, cv); -#else - /* Note that we asserted !parallel above. */ - g_assert_not_reached(); -#endif - } else { + if (!parallel) { uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra); uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra); @@ -1567,6 +1558,13 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra); cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra); + } else if (HAVE_CMPXCHG128) { + TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra); + cc = !int128_eq(ov, cv); + } else { + /* Note that we asserted !parallel above. */ + g_assert_not_reached(); } env->regs[r3 + 0] = int128_gethi(ov); @@ -1596,18 +1594,16 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, cpu_stq_data_ra(env, a2, svh, ra); break; case 4: - if (parallel) { -#ifdef CONFIG_ATOMIC128 + if (!parallel) { + cpu_stq_data_ra(env, a2 + 0, svh, ra); + cpu_stq_data_ra(env, a2 + 8, svl, ra); + } else if (HAVE_ATOMIC128) { TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); Int128 sv = int128_make128(svl, svh); helper_atomic_sto_be_mmu(env, a2, sv, oi, ra); -#else + } else { /* Note that we asserted !parallel above. */ g_assert_not_reached(); -#endif - } else { - cpu_stq_data_ra(env, a2 + 0, svh, ra); - cpu_stq_data_ra(env, a2 + 8, svl, ra); } break; default: @@ -2100,76 +2096,64 @@ uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr) #endif /* load pair from quadword */ -static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel) +uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr) { uintptr_t ra = GETPC(); uint64_t hi, lo; - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); - Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra); - hi = int128_gethi(v); - lo = int128_getlo(v); -#endif - } else { - check_alignment(env, addr, 16, ra); - - hi = cpu_ldq_data_ra(env, addr + 0, ra); - lo = cpu_ldq_data_ra(env, addr + 8, ra); - } + check_alignment(env, addr, 16, ra); + hi = cpu_ldq_data_ra(env, addr + 0, ra); + lo = cpu_ldq_data_ra(env, addr + 8, ra); env->retxl = lo; return hi; } -uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr) -{ - return do_lpq(env, addr, false); -} - uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr) { - return do_lpq(env, addr, true); -} - -/* store pair to quadword */ -static void do_stpq(CPUS390XState *env, uint64_t addr, - uint64_t low, uint64_t high, bool parallel) -{ uintptr_t ra = GETPC(); + uint64_t hi, lo; + int mem_idx; + TCGMemOpIdx oi; + Int128 v; - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + assert(HAVE_ATOMIC128); - Int128 v = int128_make128(low, high); - helper_atomic_sto_be_mmu(env, addr, v, oi, ra); -#endif - } else { - check_alignment(env, addr, 16, ra); + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + v = helper_atomic_ldo_be_mmu(env, addr, oi, ra); + hi = int128_gethi(v); + lo = int128_getlo(v); - cpu_stq_data_ra(env, addr + 0, high, ra); - cpu_stq_data_ra(env, addr + 8, low, ra); - } + env->retxl = lo; + return hi; } +/* store pair to quadword */ void HELPER(stpq)(CPUS390XState *env, uint64_t addr, uint64_t low, uint64_t high) { - do_stpq(env, addr, low, high, false); + uintptr_t ra = GETPC(); + + check_alignment(env, addr, 16, ra); + cpu_stq_data_ra(env, addr + 0, high, ra); + cpu_stq_data_ra(env, addr + 8, low, ra); } void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr, uint64_t low, uint64_t high) { - do_stpq(env, addr, low, high, true); + uintptr_t ra = GETPC(); + int mem_idx; + TCGMemOpIdx oi; + Int128 v; + + assert(HAVE_ATOMIC128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + v = int128_make128(low, high); + helper_atomic_sto_be_mmu(env, addr, v, oi, ra); } /* Execute instruction. This instruction executes an insn modified with diff --git a/target/s390x/tcg-stub.c b/target/s390x/tcg-stub.c index c93501db0b..32adb7276a 100644 --- a/target/s390x/tcg-stub.c +++ b/target/s390x/tcg-stub.c @@ -18,3 +18,13 @@ void tcg_s390_tod_updated(CPUState *cs, run_on_cpu_data opaque) { } +void QEMU_NORETURN tcg_s390_program_interrupt(CPUS390XState *env, uint32_t code, + int ilen, uintptr_t ra) +{ + g_assert_not_reached(); +} +void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc, + uintptr_t ra) +{ + g_assert_not_reached(); +} diff --git a/target/s390x/tcg_s390x.h b/target/s390x/tcg_s390x.h index 4e308aa0ce..ab2c4ba703 100644 --- a/target/s390x/tcg_s390x.h +++ b/target/s390x/tcg_s390x.h @@ -14,5 +14,9 @@ #define TCG_S390X_H void tcg_s390_tod_updated(CPUState *cs, run_on_cpu_data opaque); +void QEMU_NORETURN tcg_s390_program_interrupt(CPUS390XState *env, uint32_t code, + int ilen, uintptr_t ra); +void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc, + uintptr_t ra); #endif /* TCG_S390X_H */ diff --git a/target/s390x/translate.c b/target/s390x/translate.c index 7363aabf3a..b5bd56b7ee 100644 --- a/target/s390x/translate.c +++ b/target/s390x/translate.c @@ -44,6 +44,7 @@ #include "trace-tcg.h" #include "exec/translator.h" #include "exec/log.h" +#include "qemu/atomic128.h" /* Information that (most) every instruction needs to manipulate. */ @@ -314,28 +315,18 @@ static inline void gen_illegal_opcode(DisasContext *s) gen_program_exception(s, PGM_OPERATION); } -static inline void gen_trap(DisasContext *s) +static inline void gen_data_exception(uint8_t dxc) { - TCGv_i32 t; - - /* Set DXC to 0xff. */ - t = tcg_temp_new_i32(); - tcg_gen_ld_i32(t, cpu_env, offsetof(CPUS390XState, fpc)); - tcg_gen_ori_i32(t, t, 0xff00); - tcg_gen_st_i32(t, cpu_env, offsetof(CPUS390XState, fpc)); - tcg_temp_free_i32(t); - - gen_program_exception(s, PGM_DATA); + TCGv_i32 tmp = tcg_const_i32(dxc); + gen_helper_data_exception(cpu_env, tmp); + tcg_temp_free_i32(tmp); } -#ifndef CONFIG_USER_ONLY -static void check_privileged(DisasContext *s) +static inline void gen_trap(DisasContext *s) { - if (s->base.tb->flags & FLAG_MASK_PSTATE) { - gen_program_exception(s, PGM_PRIVILEGED); - } + /* Set DXC to 0xff */ + gen_data_exception(0xff); } -#endif static TCGv_i64 get_address(DisasContext *s, int x2, int b2, int d2) { @@ -1120,19 +1111,37 @@ typedef struct { /* We are exiting the TB to the main loop. */ #define DISAS_PC_STALE_NOCHAIN DISAS_TARGET_4 + +/* Instruction flags */ +#define IF_AFP1 0x0001 /* r1 is a fp reg for HFP/FPS instructions */ +#define IF_AFP2 0x0002 /* r2 is a fp reg for HFP/FPS instructions */ +#define IF_AFP3 0x0004 /* r3 is a fp reg for HFP/FPS instructions */ +#define IF_BFP 0x0008 /* binary floating point instruction */ +#define IF_DFP 0x0010 /* decimal floating point instruction */ +#define IF_PRIV 0x0020 /* privileged instruction */ + struct DisasInsn { unsigned opc:16; + unsigned flags:16; DisasFormat fmt:8; unsigned fac:8; unsigned spec:8; const char *name; + /* Pre-process arguments before HELP_OP. */ void (*help_in1)(DisasContext *, DisasFields *, DisasOps *); void (*help_in2)(DisasContext *, DisasFields *, DisasOps *); void (*help_prep)(DisasContext *, DisasFields *, DisasOps *); + + /* + * Post-process output after HELP_OP. + * Note that these are not called if HELP_OP returns DISAS_NORETURN. + */ void (*help_wout)(DisasContext *, DisasFields *, DisasOps *); void (*help_cout)(DisasContext *, DisasOps *); + + /* Implement the operation itself. */ DisasJumpType (*help_op)(DisasContext *, DisasOps *); uint64_t data; @@ -2032,6 +2041,7 @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o) int r3 = get_field(s->fields, r3); int d2 = get_field(s->fields, d2); int b2 = get_field(s->fields, b2); + DisasJumpType ret = DISAS_NEXT; TCGv_i64 addr; TCGv_i32 t_r1, t_r3; @@ -2039,17 +2049,20 @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o) addr = get_address(s, 0, b2, d2); t_r1 = tcg_const_i32(r1); t_r3 = tcg_const_i32(r3); - if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + gen_helper_cdsg(cpu_env, addr, t_r1, t_r3); + } else if (HAVE_CMPXCHG128) { gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3); } else { - gen_helper_cdsg(cpu_env, addr, t_r1, t_r3); + gen_helper_exit_atomic(cpu_env); + ret = DISAS_NORETURN; } tcg_temp_free_i64(addr); tcg_temp_free_i32(t_r1); tcg_temp_free_i32(t_r3); set_cc_static(s); - return DISAS_NEXT; + return ret; } static DisasJumpType op_csst(DisasContext *s, DisasOps *o) @@ -2078,7 +2091,6 @@ static DisasJumpType op_csp(DisasContext *s, DisasOps *o) /* Note that in1 = R1 (zero-extended expected value), out = R1 (original reg), out2 = R1+1 (new value). */ - check_privileged(s); addr = tcg_temp_new_i64(); old = tcg_temp_new_i64(); tcg_gen_andi_i64(addr, o->in2, -1ULL << (mop & MO_SIZE)); @@ -2202,7 +2214,6 @@ static DisasJumpType op_diag(DisasContext *s, DisasOps *o) TCGv_i32 r3 = tcg_const_i32(get_field(s->fields, r3)); TCGv_i32 func_code = tcg_const_i32(get_field(s->fields, i2)); - check_privileged(s); gen_helper_diag(cpu_env, r1, r3, func_code); tcg_temp_free_i32(func_code); @@ -2463,7 +2474,6 @@ static DisasJumpType op_idte(DisasContext *s, DisasOps *o) { TCGv_i32 m4; - check_privileged(s); if (s390_has_feat(S390_FEAT_LOCAL_TLB_CLEARING)) { m4 = tcg_const_i32(get_field(s->fields, m4)); } else { @@ -2478,7 +2488,6 @@ static DisasJumpType op_ipte(DisasContext *s, DisasOps *o) { TCGv_i32 m4; - check_privileged(s); if (s390_has_feat(S390_FEAT_LOCAL_TLB_CLEARING)) { m4 = tcg_const_i32(get_field(s->fields, m4)); } else { @@ -2491,7 +2500,6 @@ static DisasJumpType op_ipte(DisasContext *s, DisasOps *o) static DisasJumpType op_iske(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_iske(o->out, cpu_env, o->in2); return DISAS_NEXT; } @@ -2790,7 +2798,6 @@ static DisasJumpType op_lctl(DisasContext *s, DisasOps *o) { TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1)); TCGv_i32 r3 = tcg_const_i32(get_field(s->fields, r3)); - check_privileged(s); gen_helper_lctl(cpu_env, r1, o->in2, r3); tcg_temp_free_i32(r1); tcg_temp_free_i32(r3); @@ -2802,7 +2809,6 @@ static DisasJumpType op_lctlg(DisasContext *s, DisasOps *o) { TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1)); TCGv_i32 r3 = tcg_const_i32(get_field(s->fields, r3)); - check_privileged(s); gen_helper_lctlg(cpu_env, r1, o->in2, r3); tcg_temp_free_i32(r1); tcg_temp_free_i32(r3); @@ -2812,7 +2818,6 @@ static DisasJumpType op_lctlg(DisasContext *s, DisasOps *o) static DisasJumpType op_lra(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_lra(o->out, cpu_env, o->in2); set_cc_static(s); return DISAS_NEXT; @@ -2820,8 +2825,6 @@ static DisasJumpType op_lra(DisasContext *s, DisasOps *o) static DisasJumpType op_lpp(DisasContext *s, DisasOps *o) { - check_privileged(s); - tcg_gen_st_i64(o->in2, cpu_env, offsetof(CPUS390XState, pp)); return DISAS_NEXT; } @@ -2830,12 +2833,12 @@ static DisasJumpType op_lpsw(DisasContext *s, DisasOps *o) { TCGv_i64 t1, t2; - check_privileged(s); per_breaking_event(s); t1 = tcg_temp_new_i64(); t2 = tcg_temp_new_i64(); - tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s)); + tcg_gen_qemu_ld_i64(t1, o->in2, get_mem_index(s), + MO_TEUL | MO_ALIGN_8); tcg_gen_addi_i64(o->in2, o->in2, 4); tcg_gen_qemu_ld32u(t2, o->in2, get_mem_index(s)); /* Convert the 32-bit PSW_MASK into the 64-bit PSW_MASK. */ @@ -2850,12 +2853,12 @@ static DisasJumpType op_lpswe(DisasContext *s, DisasOps *o) { TCGv_i64 t1, t2; - check_privileged(s); per_breaking_event(s); t1 = tcg_temp_new_i64(); t2 = tcg_temp_new_i64(); - tcg_gen_qemu_ld64(t1, o->in2, get_mem_index(s)); + tcg_gen_qemu_ld_i64(t1, o->in2, get_mem_index(s), + MO_TEQ | MO_ALIGN_8); tcg_gen_addi_i64(o->in2, o->in2, 8); tcg_gen_qemu_ld64(t2, o->in2, get_mem_index(s)); gen_helper_load_psw(cpu_env, t1, t2); @@ -3036,10 +3039,13 @@ static DisasJumpType op_lpd(DisasContext *s, DisasOps *o) static DisasJumpType op_lpq(DisasContext *s, DisasOps *o) { - if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + gen_helper_lpq(o->out, cpu_env, o->in2); + } else if (HAVE_ATOMIC128) { gen_helper_lpq_parallel(o->out, cpu_env, o->in2); } else { - gen_helper_lpq(o->out, cpu_env, o->in2); + gen_helper_exit_atomic(cpu_env); + return DISAS_NORETURN; } return_low128(o->out2); return DISAS_NEXT; @@ -3048,14 +3054,12 @@ static DisasJumpType op_lpq(DisasContext *s, DisasOps *o) #ifndef CONFIG_USER_ONLY static DisasJumpType op_lura(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_lura(o->out, cpu_env, o->in2); return DISAS_NEXT; } static DisasJumpType op_lurag(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_lurag(o->out, cpu_env, o->in2); return DISAS_NEXT; } @@ -3214,7 +3218,6 @@ static DisasJumpType op_mvcos(DisasContext *s, DisasOps *o) static DisasJumpType op_mvcp(DisasContext *s, DisasOps *o) { int r1 = get_field(s->fields, l1); - check_privileged(s); gen_helper_mvcp(cc_op, cpu_env, regs[r1], o->addr1, o->in2); set_cc_static(s); return DISAS_NEXT; @@ -3223,7 +3226,6 @@ static DisasJumpType op_mvcp(DisasContext *s, DisasOps *o) static DisasJumpType op_mvcs(DisasContext *s, DisasOps *o) { int r1 = get_field(s->fields, l1); - check_privileged(s); gen_helper_mvcs(cc_op, cpu_env, regs[r1], o->addr1, o->in2); set_cc_static(s); return DISAS_NEXT; @@ -3509,7 +3511,6 @@ static DisasJumpType op_popcnt(DisasContext *s, DisasOps *o) #ifndef CONFIG_USER_ONLY static DisasJumpType op_ptlb(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_ptlb(cpu_env); return DISAS_NEXT; } @@ -3700,7 +3701,6 @@ static DisasJumpType op_rll64(DisasContext *s, DisasOps *o) #ifndef CONFIG_USER_ONLY static DisasJumpType op_rrbe(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_rrbe(cc_op, cpu_env, o->in2); set_cc_static(s); return DISAS_NEXT; @@ -3708,7 +3708,6 @@ static DisasJumpType op_rrbe(DisasContext *s, DisasOps *o) static DisasJumpType op_sacf(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_sacf(cpu_env, o->in2); /* Addressing mode has changed, so end the block. */ return DISAS_PC_STALE; @@ -3798,7 +3797,6 @@ static DisasJumpType op_sqxb(DisasContext *s, DisasOps *o) #ifndef CONFIG_USER_ONLY static DisasJumpType op_servc(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_servc(cc_op, cpu_env, o->in2, o->in1); set_cc_static(s); return DISAS_NEXT; @@ -3808,7 +3806,6 @@ static DisasJumpType op_sigp(DisasContext *s, DisasOps *o) { TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1)); TCGv_i32 r3 = tcg_const_i32(get_field(s->fields, r3)); - check_privileged(s); gen_helper_sigp(cc_op, cpu_env, o->in2, r1, r3); set_cc_static(s); tcg_temp_free_i32(r1); @@ -3990,7 +3987,6 @@ static DisasJumpType op_ectg(DisasContext *s, DisasOps *o) #ifndef CONFIG_USER_ONLY static DisasJumpType op_spka(DisasContext *s, DisasOps *o) { - check_privileged(s); tcg_gen_shri_i64(o->in2, o->in2, 4); tcg_gen_deposit_i64(psw_mask, psw_mask, o->in2, PSW_SHIFT_KEY, 4); return DISAS_NEXT; @@ -3998,14 +3994,12 @@ static DisasJumpType op_spka(DisasContext *s, DisasOps *o) static DisasJumpType op_sske(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_sske(cpu_env, o->in1, o->in2); return DISAS_NEXT; } static DisasJumpType op_ssm(DisasContext *s, DisasOps *o) { - check_privileged(s); tcg_gen_deposit_i64(psw_mask, psw_mask, o->in2, 56, 8); /* Exit to main loop to reevaluate s390_cpu_exec_interrupt. */ return DISAS_PC_STALE_NOCHAIN; @@ -4013,7 +4007,6 @@ static DisasJumpType op_ssm(DisasContext *s, DisasOps *o) static DisasJumpType op_stap(DisasContext *s, DisasOps *o) { - check_privileged(s); tcg_gen_ld32u_i64(o->out, cpu_env, offsetof(CPUS390XState, core_id)); return DISAS_NEXT; } @@ -4055,7 +4048,6 @@ static DisasJumpType op_stcke(DisasContext *s, DisasOps *o) static DisasJumpType op_sck(DisasContext *s, DisasOps *o) { - check_privileged(s); tcg_gen_qemu_ld_i64(o->in1, o->addr1, get_mem_index(s), MO_TEQ | MO_ALIGN); gen_helper_sck(cc_op, cpu_env, o->in1); set_cc_static(s); @@ -4064,21 +4056,18 @@ static DisasJumpType op_sck(DisasContext *s, DisasOps *o) static DisasJumpType op_sckc(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_sckc(cpu_env, o->in2); return DISAS_NEXT; } static DisasJumpType op_sckpf(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_sckpf(cpu_env, regs[0]); return DISAS_NEXT; } static DisasJumpType op_stckc(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_stckc(o->out, cpu_env); return DISAS_NEXT; } @@ -4087,7 +4076,6 @@ static DisasJumpType op_stctg(DisasContext *s, DisasOps *o) { TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1)); TCGv_i32 r3 = tcg_const_i32(get_field(s->fields, r3)); - check_privileged(s); gen_helper_stctg(cpu_env, r1, o->in2, r3); tcg_temp_free_i32(r1); tcg_temp_free_i32(r3); @@ -4098,7 +4086,6 @@ static DisasJumpType op_stctl(DisasContext *s, DisasOps *o) { TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1)); TCGv_i32 r3 = tcg_const_i32(get_field(s->fields, r3)); - check_privileged(s); gen_helper_stctl(cpu_env, r1, o->in2, r3); tcg_temp_free_i32(r1); tcg_temp_free_i32(r3); @@ -4107,35 +4094,30 @@ static DisasJumpType op_stctl(DisasContext *s, DisasOps *o) static DisasJumpType op_stidp(DisasContext *s, DisasOps *o) { - check_privileged(s); tcg_gen_ld_i64(o->out, cpu_env, offsetof(CPUS390XState, cpuid)); return DISAS_NEXT; } static DisasJumpType op_spt(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_spt(cpu_env, o->in2); return DISAS_NEXT; } static DisasJumpType op_stfl(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_stfl(cpu_env); return DISAS_NEXT; } static DisasJumpType op_stpt(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_stpt(o->out, cpu_env); return DISAS_NEXT; } static DisasJumpType op_stsi(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_stsi(cc_op, cpu_env, o->in2, regs[0], regs[1]); set_cc_static(s); return DISAS_NEXT; @@ -4143,14 +4125,12 @@ static DisasJumpType op_stsi(DisasContext *s, DisasOps *o) static DisasJumpType op_spx(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_spx(cpu_env, o->in2); return DISAS_NEXT; } static DisasJumpType op_xsch(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_xsch(cpu_env, regs[1]); set_cc_static(s); return DISAS_NEXT; @@ -4158,7 +4138,6 @@ static DisasJumpType op_xsch(DisasContext *s, DisasOps *o) static DisasJumpType op_csch(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_csch(cpu_env, regs[1]); set_cc_static(s); return DISAS_NEXT; @@ -4166,7 +4145,6 @@ static DisasJumpType op_csch(DisasContext *s, DisasOps *o) static DisasJumpType op_hsch(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_hsch(cpu_env, regs[1]); set_cc_static(s); return DISAS_NEXT; @@ -4174,7 +4152,6 @@ static DisasJumpType op_hsch(DisasContext *s, DisasOps *o) static DisasJumpType op_msch(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_msch(cpu_env, regs[1], o->in2); set_cc_static(s); return DISAS_NEXT; @@ -4182,7 +4159,6 @@ static DisasJumpType op_msch(DisasContext *s, DisasOps *o) static DisasJumpType op_rchp(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_rchp(cpu_env, regs[1]); set_cc_static(s); return DISAS_NEXT; @@ -4190,7 +4166,6 @@ static DisasJumpType op_rchp(DisasContext *s, DisasOps *o) static DisasJumpType op_rsch(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_rsch(cpu_env, regs[1]); set_cc_static(s); return DISAS_NEXT; @@ -4198,21 +4173,18 @@ static DisasJumpType op_rsch(DisasContext *s, DisasOps *o) static DisasJumpType op_sal(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_sal(cpu_env, regs[1]); return DISAS_NEXT; } static DisasJumpType op_schm(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_schm(cpu_env, regs[1], regs[2], o->in2); return DISAS_NEXT; } static DisasJumpType op_siga(DisasContext *s, DisasOps *o) { - check_privileged(s); /* From KVM code: Not provided, set CC = 3 for subchannel not operational */ gen_op_movi_cc(s, 3); return DISAS_NEXT; @@ -4220,14 +4192,12 @@ static DisasJumpType op_siga(DisasContext *s, DisasOps *o) static DisasJumpType op_stcps(DisasContext *s, DisasOps *o) { - check_privileged(s); /* The instruction is suppressed if not provided. */ return DISAS_NEXT; } static DisasJumpType op_ssch(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_ssch(cpu_env, regs[1], o->in2); set_cc_static(s); return DISAS_NEXT; @@ -4235,7 +4205,6 @@ static DisasJumpType op_ssch(DisasContext *s, DisasOps *o) static DisasJumpType op_stsch(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_stsch(cpu_env, regs[1], o->in2); set_cc_static(s); return DISAS_NEXT; @@ -4243,7 +4212,6 @@ static DisasJumpType op_stsch(DisasContext *s, DisasOps *o) static DisasJumpType op_stcrw(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_stcrw(cpu_env, o->in2); set_cc_static(s); return DISAS_NEXT; @@ -4251,7 +4219,6 @@ static DisasJumpType op_stcrw(DisasContext *s, DisasOps *o) static DisasJumpType op_tpi(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_tpi(cc_op, cpu_env, o->addr1); set_cc_static(s); return DISAS_NEXT; @@ -4259,7 +4226,6 @@ static DisasJumpType op_tpi(DisasContext *s, DisasOps *o) static DisasJumpType op_tsch(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_tsch(cpu_env, regs[1], o->in2); set_cc_static(s); return DISAS_NEXT; @@ -4267,7 +4233,6 @@ static DisasJumpType op_tsch(DisasContext *s, DisasOps *o) static DisasJumpType op_chsc(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_chsc(cpu_env, o->in2); set_cc_static(s); return DISAS_NEXT; @@ -4275,7 +4240,6 @@ static DisasJumpType op_chsc(DisasContext *s, DisasOps *o) static DisasJumpType op_stpx(DisasContext *s, DisasOps *o) { - check_privileged(s); tcg_gen_ld_i64(o->out, cpu_env, offsetof(CPUS390XState, psa)); tcg_gen_andi_i64(o->out, o->out, 0x7fffe000); return DISAS_NEXT; @@ -4286,8 +4250,6 @@ static DisasJumpType op_stnosm(DisasContext *s, DisasOps *o) uint64_t i2 = get_field(s->fields, i2); TCGv_i64 t; - check_privileged(s); - /* It is important to do what the instruction name says: STORE THEN. If we let the output hook perform the store then if we fault and restart, we'll have the wrong SYSTEM MASK in place. */ @@ -4309,14 +4271,12 @@ static DisasJumpType op_stnosm(DisasContext *s, DisasOps *o) static DisasJumpType op_stura(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_stura(cpu_env, o->in2, o->in1); return DISAS_NEXT; } static DisasJumpType op_sturg(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_sturg(cpu_env, o->in2, o->in1); return DISAS_NEXT; } @@ -4462,10 +4422,13 @@ static DisasJumpType op_stmh(DisasContext *s, DisasOps *o) static DisasJumpType op_stpq(DisasContext *s, DisasOps *o) { - if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + gen_helper_stpq(cpu_env, o->in2, o->out2, o->out); + } else if (HAVE_ATOMIC128) { gen_helper_stpq_parallel(cpu_env, o->in2, o->out2, o->out); } else { - gen_helper_stpq(cpu_env, o->in2, o->out2, o->out); + gen_helper_exit_atomic(cpu_env); + return DISAS_NORETURN; } return DISAS_NEXT; } @@ -4582,7 +4545,6 @@ static DisasJumpType op_tcxb(DisasContext *s, DisasOps *o) static DisasJumpType op_testblock(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_testblock(cc_op, cpu_env, o->in2); set_cc_static(s); return DISAS_NEXT; @@ -4840,7 +4802,6 @@ static DisasJumpType op_clp(DisasContext *s, DisasOps *o) { TCGv_i32 r2 = tcg_const_i32(get_field(s->fields, r2)); - check_privileged(s); gen_helper_clp(cpu_env, r2); tcg_temp_free_i32(r2); set_cc_static(s); @@ -4852,7 +4813,6 @@ static DisasJumpType op_pcilg(DisasContext *s, DisasOps *o) TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1)); TCGv_i32 r2 = tcg_const_i32(get_field(s->fields, r2)); - check_privileged(s); gen_helper_pcilg(cpu_env, r1, r2); tcg_temp_free_i32(r1); tcg_temp_free_i32(r2); @@ -4865,7 +4825,6 @@ static DisasJumpType op_pcistg(DisasContext *s, DisasOps *o) TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1)); TCGv_i32 r2 = tcg_const_i32(get_field(s->fields, r2)); - check_privileged(s); gen_helper_pcistg(cpu_env, r1, r2); tcg_temp_free_i32(r1); tcg_temp_free_i32(r2); @@ -4878,7 +4837,6 @@ static DisasJumpType op_stpcifc(DisasContext *s, DisasOps *o) TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1)); TCGv_i32 ar = tcg_const_i32(get_field(s->fields, b2)); - check_privileged(s); gen_helper_stpcifc(cpu_env, r1, o->addr1, ar); tcg_temp_free_i32(ar); tcg_temp_free_i32(r1); @@ -4888,7 +4846,6 @@ static DisasJumpType op_stpcifc(DisasContext *s, DisasOps *o) static DisasJumpType op_sic(DisasContext *s, DisasOps *o) { - check_privileged(s); gen_helper_sic(cpu_env, o->in1, o->in2); return DISAS_NEXT; } @@ -4898,7 +4855,6 @@ static DisasJumpType op_rpcit(DisasContext *s, DisasOps *o) TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1)); TCGv_i32 r2 = tcg_const_i32(get_field(s->fields, r2)); - check_privileged(s); gen_helper_rpcit(cpu_env, r1, r2); tcg_temp_free_i32(r1); tcg_temp_free_i32(r2); @@ -4912,7 +4868,6 @@ static DisasJumpType op_pcistb(DisasContext *s, DisasOps *o) TCGv_i32 r3 = tcg_const_i32(get_field(s->fields, r3)); TCGv_i32 ar = tcg_const_i32(get_field(s->fields, b2)); - check_privileged(s); gen_helper_pcistb(cpu_env, r1, r3, o->addr1, ar); tcg_temp_free_i32(ar); tcg_temp_free_i32(r1); @@ -4926,7 +4881,6 @@ static DisasJumpType op_mpcifc(DisasContext *s, DisasOps *o) TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1)); TCGv_i32 ar = tcg_const_i32(get_field(s->fields, b2)); - check_privileged(s); gen_helper_mpcifc(cpu_env, r1, o->addr1, ar); tcg_temp_free_i32(ar); tcg_temp_free_i32(r1); @@ -5834,17 +5788,24 @@ static void in2_insn(DisasContext *s, DisasFields *f, DisasOps *o) search tree, rather than us having to post-process the table. */ #define C(OPC, NM, FT, FC, I1, I2, P, W, OP, CC) \ - D(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, 0) + E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, 0, 0) + +#define D(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D) \ + E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, 0) -#define D(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D) insn_ ## NM, +#define F(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, FL) \ + E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, 0, FL) + +#define E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, FL) insn_ ## NM, enum DisasInsnEnum { #include "insn-data.def" }; -#undef D -#define D(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D) { \ +#undef E +#define E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, FL) { \ .opc = OPC, \ + .flags = FL, \ .fmt = FMT_##FT, \ .fac = FAC_##FC, \ .spec = SPEC_in1_##I1 | SPEC_in2_##I2 | SPEC_prep_##P | SPEC_wout_##W, \ @@ -5915,8 +5876,8 @@ static const DisasInsn insn_info[] = { #include "insn-data.def" }; -#undef D -#define D(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D) \ +#undef E +#define E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, FL) \ case OPC: return &insn_info[insn_ ## NM]; static const DisasInsn *lookup_opc(uint16_t opc) @@ -5928,6 +5889,8 @@ static const DisasInsn *lookup_opc(uint16_t opc) } } +#undef F +#undef E #undef D #undef C @@ -6075,6 +6038,17 @@ static const DisasInsn *extract_insn(CPUS390XState *env, DisasContext *s, return info; } +static bool is_afp_reg(int reg) +{ + return reg % 2 || reg > 6; +} + +static bool is_fp_pair(int reg) +{ + /* 0,1,4,5,8,9,12,13: to exclude the others, check for single bit */ + return !(reg & 0x2); +} + static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s) { const DisasInsn *insn; @@ -6101,42 +6075,48 @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s) } #endif - /* Check for insn specification exceptions. */ - if (insn->spec) { - int spec = insn->spec, excp = 0, r; + /* process flags */ + if (insn->flags) { + /* privileged instruction */ + if ((s->base.tb->flags & FLAG_MASK_PSTATE) && (insn->flags & IF_PRIV)) { + gen_program_exception(s, PGM_PRIVILEGED); + return DISAS_NORETURN; + } - if (spec & SPEC_r1_even) { - r = get_field(&f, r1); - if (r & 1) { - excp = PGM_SPECIFICATION; + /* if AFP is not enabled, instructions and registers are forbidden */ + if (!(s->base.tb->flags & FLAG_MASK_AFP)) { + uint8_t dxc = 0; + + if ((insn->flags & IF_AFP1) && is_afp_reg(get_field(&f, r1))) { + dxc = 1; } - } - if (spec & SPEC_r2_even) { - r = get_field(&f, r2); - if (r & 1) { - excp = PGM_SPECIFICATION; + if ((insn->flags & IF_AFP2) && is_afp_reg(get_field(&f, r2))) { + dxc = 1; } - } - if (spec & SPEC_r3_even) { - r = get_field(&f, r3); - if (r & 1) { - excp = PGM_SPECIFICATION; + if ((insn->flags & IF_AFP3) && is_afp_reg(get_field(&f, r3))) { + dxc = 1; } - } - if (spec & SPEC_r1_f128) { - r = get_field(&f, r1); - if (r > 13) { - excp = PGM_SPECIFICATION; + if (insn->flags & IF_BFP) { + dxc = 2; } - } - if (spec & SPEC_r2_f128) { - r = get_field(&f, r2); - if (r > 13) { - excp = PGM_SPECIFICATION; + if (insn->flags & IF_DFP) { + dxc = 3; + } + if (dxc) { + gen_data_exception(dxc); + return DISAS_NORETURN; } } - if (excp) { - gen_program_exception(s, excp); + } + + /* Check for insn specification exceptions. */ + if (insn->spec) { + if ((insn->spec & SPEC_r1_even && get_field(&f, r1) & 1) || + (insn->spec & SPEC_r2_even && get_field(&f, r2) & 1) || + (insn->spec & SPEC_r3_even && get_field(&f, r3) & 1) || + (insn->spec & SPEC_r1_f128 && !is_fp_pair(get_field(&f, r1))) || + (insn->spec & SPEC_r2_f128 && !is_fp_pair(get_field(&f, r2)))) { + gen_program_exception(s, PGM_SPECIFICATION); return DISAS_NORETURN; } } @@ -6164,11 +6144,13 @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s) if (insn->help_op) { ret = insn->help_op(s, &o); } - if (insn->help_wout) { - insn->help_wout(s, &f, &o); - } - if (insn->help_cout) { - insn->help_cout(s, &o); + if (ret != DISAS_NORETURN) { + if (insn->help_wout) { + insn->help_wout(s, &f, &o); + } + if (insn->help_cout) { + insn->help_cout(s, &o); + } } /* Free any temporaries created by the helpers. */ diff --git a/target/unicore32/cpu.c b/target/unicore32/cpu.c index 68f978d80b..2b49d1ca40 100644 --- a/target/unicore32/cpu.c +++ b/target/unicore32/cpu.c @@ -116,8 +116,6 @@ static void uc32_cpu_initfn(Object *obj) env->uncached_asr = ASR_MODE_PRIV; env->regs[31] = 0x03000000; #endif - - tlb_flush(cs); } static const VMStateDescription vmstate_uc32_cpu = { |
