diff options
Diffstat (limited to 'target')
47 files changed, 5010 insertions, 2233 deletions
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c index b08078e7fc..a953897fcc 100644 --- a/target/alpha/cpu.c +++ b/target/alpha/cpu.c @@ -201,7 +201,6 @@ static void alpha_cpu_initfn(Object *obj) CPUAlphaState *env = &cpu->env; cs->env_ptr = env; - tlb_flush(cs); env->lock_addr = -1; #if defined(CONFIG_USER_ONLY) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index cd48ad42d8..8f16e96b6c 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -144,9 +144,9 @@ static void arm_cpu_reset(CPUState *s) g_hash_table_foreach(cpu->cp_regs, cp_reg_check_reset, cpu); env->vfp.xregs[ARM_VFP_FPSID] = cpu->reset_fpsid; - env->vfp.xregs[ARM_VFP_MVFR0] = cpu->mvfr0; - env->vfp.xregs[ARM_VFP_MVFR1] = cpu->mvfr1; - env->vfp.xregs[ARM_VFP_MVFR2] = cpu->mvfr2; + env->vfp.xregs[ARM_VFP_MVFR0] = cpu->isar.mvfr0; + env->vfp.xregs[ARM_VFP_MVFR1] = cpu->isar.mvfr1; + env->vfp.xregs[ARM_VFP_MVFR2] = cpu->isar.mvfr2; cpu->power_state = cpu->start_powered_off ? PSCI_OFF : PSCI_ON; s->halted = cpu->start_powered_off; @@ -814,7 +814,11 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) /* Some features automatically imply others: */ if (arm_feature(env, ARM_FEATURE_V8)) { - set_feature(env, ARM_FEATURE_V7VE); + if (arm_feature(env, ARM_FEATURE_M)) { + set_feature(env, ARM_FEATURE_V7); + } else { + set_feature(env, ARM_FEATURE_V7VE); + } } if (arm_feature(env, ARM_FEATURE_V7VE)) { /* v7 Virtualization Extensions. In real hardware this implies @@ -825,7 +829,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * Presence of EL2 itself is ARM_FEATURE_EL2, and of the * Security Extensions is ARM_FEATURE_EL3. */ - set_feature(env, ARM_FEATURE_ARM_DIV); + assert(cpu_isar_feature(arm_div, cpu)); set_feature(env, ARM_FEATURE_LPAE); set_feature(env, ARM_FEATURE_V7); } @@ -850,20 +854,14 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) } if (arm_feature(env, ARM_FEATURE_V6)) { set_feature(env, ARM_FEATURE_V5); - set_feature(env, ARM_FEATURE_JAZELLE); if (!arm_feature(env, ARM_FEATURE_M)) { + assert(cpu_isar_feature(jazelle, cpu)); set_feature(env, ARM_FEATURE_AUXCR); } } if (arm_feature(env, ARM_FEATURE_V5)) { set_feature(env, ARM_FEATURE_V4T); } - if (arm_feature(env, ARM_FEATURE_M)) { - set_feature(env, ARM_FEATURE_THUMB_DIV); - } - if (arm_feature(env, ARM_FEATURE_ARM_DIV)) { - set_feature(env, ARM_FEATURE_THUMB_DIV); - } if (arm_feature(env, ARM_FEATURE_VFP4)) { set_feature(env, ARM_FEATURE_VFP3); set_feature(env, ARM_FEATURE_VFP_FP16); @@ -938,7 +936,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * registers as well. These are id_pfr1[7:4] and id_aa64pfr0[15:12]. */ cpu->id_pfr1 &= ~0xf0; - cpu->id_aa64pfr0 &= ~0xf000; + cpu->isar.id_aa64pfr0 &= ~0xf000; } if (!cpu->has_el2) { @@ -955,7 +953,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * registers if we don't have EL2. These are id_pfr1[15:12] and * id_aa64pfr0_el1[11:8]. */ - cpu->id_aa64pfr0 &= ~0xf00; + cpu->isar.id_aa64pfr0 &= ~0xf00; cpu->id_pfr1 &= ~0xf000; } @@ -1084,11 +1082,16 @@ static void arm926_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_VFP); set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); set_feature(&cpu->env, ARM_FEATURE_CACHE_TEST_CLEAN); - set_feature(&cpu->env, ARM_FEATURE_JAZELLE); cpu->midr = 0x41069265; cpu->reset_fpsid = 0x41011090; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00090078; + + /* + * ARMv5 does not have the ID_ISAR registers, but we can still + * set the field to indicate Jazelle support within QEMU. + */ + cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); } static void arm946_initfn(Object *obj) @@ -1114,12 +1117,18 @@ static void arm1026_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_AUXCR); set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); set_feature(&cpu->env, ARM_FEATURE_CACHE_TEST_CLEAN); - set_feature(&cpu->env, ARM_FEATURE_JAZELLE); cpu->midr = 0x4106a262; cpu->reset_fpsid = 0x410110a0; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00090078; cpu->reset_auxcr = 1; + + /* + * ARMv5 does not have the ID_ISAR registers, but we can still + * set the field to indicate Jazelle support within QEMU. + */ + cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); + { /* The 1026 had an IFAR at c6,c0,0,1 rather than the ARMv6 c6,c0,0,2 */ ARMCPRegInfo ifar = { @@ -1151,8 +1160,8 @@ static void arm1136_r2_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); cpu->midr = 0x4107b362; cpu->reset_fpsid = 0x410120b4; - cpu->mvfr0 = 0x11111111; - cpu->mvfr1 = 0x00000000; + cpu->isar.mvfr0 = 0x11111111; + cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; cpu->id_pfr0 = 0x111; @@ -1162,11 +1171,11 @@ static void arm1136_r2_initfn(Object *obj) cpu->id_mmfr0 = 0x01130003; cpu->id_mmfr1 = 0x10030302; cpu->id_mmfr2 = 0x01222110; - cpu->id_isar0 = 0x00140011; - cpu->id_isar1 = 0x12002111; - cpu->id_isar2 = 0x11231111; - cpu->id_isar3 = 0x01102131; - cpu->id_isar4 = 0x141; + cpu->isar.id_isar0 = 0x00140011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11231111; + cpu->isar.id_isar3 = 0x01102131; + cpu->isar.id_isar4 = 0x141; cpu->reset_auxcr = 7; } @@ -1183,8 +1192,8 @@ static void arm1136_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); cpu->midr = 0x4117b363; cpu->reset_fpsid = 0x410120b4; - cpu->mvfr0 = 0x11111111; - cpu->mvfr1 = 0x00000000; + cpu->isar.mvfr0 = 0x11111111; + cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; cpu->id_pfr0 = 0x111; @@ -1194,11 +1203,11 @@ static void arm1136_initfn(Object *obj) cpu->id_mmfr0 = 0x01130003; cpu->id_mmfr1 = 0x10030302; cpu->id_mmfr2 = 0x01222110; - cpu->id_isar0 = 0x00140011; - cpu->id_isar1 = 0x12002111; - cpu->id_isar2 = 0x11231111; - cpu->id_isar3 = 0x01102131; - cpu->id_isar4 = 0x141; + cpu->isar.id_isar0 = 0x00140011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11231111; + cpu->isar.id_isar3 = 0x01102131; + cpu->isar.id_isar4 = 0x141; cpu->reset_auxcr = 7; } @@ -1216,8 +1225,8 @@ static void arm1176_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_EL3); cpu->midr = 0x410fb767; cpu->reset_fpsid = 0x410120b5; - cpu->mvfr0 = 0x11111111; - cpu->mvfr1 = 0x00000000; + cpu->isar.mvfr0 = 0x11111111; + cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; cpu->id_pfr0 = 0x111; @@ -1227,11 +1236,11 @@ static void arm1176_initfn(Object *obj) cpu->id_mmfr0 = 0x01130003; cpu->id_mmfr1 = 0x10030302; cpu->id_mmfr2 = 0x01222100; - cpu->id_isar0 = 0x0140011; - cpu->id_isar1 = 0x12002111; - cpu->id_isar2 = 0x11231121; - cpu->id_isar3 = 0x01102131; - cpu->id_isar4 = 0x01141; + cpu->isar.id_isar0 = 0x0140011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11231121; + cpu->isar.id_isar3 = 0x01102131; + cpu->isar.id_isar4 = 0x01141; cpu->reset_auxcr = 7; } @@ -1247,8 +1256,8 @@ static void arm11mpcore_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); cpu->midr = 0x410fb022; cpu->reset_fpsid = 0x410120b4; - cpu->mvfr0 = 0x11111111; - cpu->mvfr1 = 0x00000000; + cpu->isar.mvfr0 = 0x11111111; + cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1d192992; /* 32K icache 32K dcache */ cpu->id_pfr0 = 0x111; cpu->id_pfr1 = 0x1; @@ -1257,11 +1266,11 @@ static void arm11mpcore_initfn(Object *obj) cpu->id_mmfr0 = 0x01100103; cpu->id_mmfr1 = 0x10020302; cpu->id_mmfr2 = 0x01222000; - cpu->id_isar0 = 0x00100011; - cpu->id_isar1 = 0x12002111; - cpu->id_isar2 = 0x11221011; - cpu->id_isar3 = 0x01102131; - cpu->id_isar4 = 0x141; + cpu->isar.id_isar0 = 0x00100011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11221011; + cpu->isar.id_isar3 = 0x01102131; + cpu->isar.id_isar4 = 0x141; cpu->reset_auxcr = 1; } @@ -1290,13 +1299,13 @@ static void cortex_m3_initfn(Object *obj) cpu->id_mmfr1 = 0x00000000; cpu->id_mmfr2 = 0x00000000; cpu->id_mmfr3 = 0x00000000; - cpu->id_isar0 = 0x01141110; - cpu->id_isar1 = 0x02111000; - cpu->id_isar2 = 0x21112231; - cpu->id_isar3 = 0x01111110; - cpu->id_isar4 = 0x01310102; - cpu->id_isar5 = 0x00000000; - cpu->id_isar6 = 0x00000000; + cpu->isar.id_isar0 = 0x01141110; + cpu->isar.id_isar1 = 0x02111000; + cpu->isar.id_isar2 = 0x21112231; + cpu->isar.id_isar3 = 0x01111110; + cpu->isar.id_isar4 = 0x01310102; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; } static void cortex_m4_initfn(Object *obj) @@ -1317,13 +1326,13 @@ static void cortex_m4_initfn(Object *obj) cpu->id_mmfr1 = 0x00000000; cpu->id_mmfr2 = 0x00000000; cpu->id_mmfr3 = 0x00000000; - cpu->id_isar0 = 0x01141110; - cpu->id_isar1 = 0x02111000; - cpu->id_isar2 = 0x21112231; - cpu->id_isar3 = 0x01111110; - cpu->id_isar4 = 0x01310102; - cpu->id_isar5 = 0x00000000; - cpu->id_isar6 = 0x00000000; + cpu->isar.id_isar0 = 0x01141110; + cpu->isar.id_isar1 = 0x02111000; + cpu->isar.id_isar2 = 0x21112231; + cpu->isar.id_isar3 = 0x01111110; + cpu->isar.id_isar4 = 0x01310102; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; } static void cortex_m33_initfn(Object *obj) @@ -1346,13 +1355,13 @@ static void cortex_m33_initfn(Object *obj) cpu->id_mmfr1 = 0x00000000; cpu->id_mmfr2 = 0x01000000; cpu->id_mmfr3 = 0x00000000; - cpu->id_isar0 = 0x01101110; - cpu->id_isar1 = 0x02212000; - cpu->id_isar2 = 0x20232232; - cpu->id_isar3 = 0x01111131; - cpu->id_isar4 = 0x01310132; - cpu->id_isar5 = 0x00000000; - cpu->id_isar6 = 0x00000000; + cpu->isar.id_isar0 = 0x01101110; + cpu->isar.id_isar1 = 0x02212000; + cpu->isar.id_isar2 = 0x20232232; + cpu->isar.id_isar3 = 0x01111131; + cpu->isar.id_isar4 = 0x01310132; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; cpu->clidr = 0x00000000; cpu->ctr = 0x8000c000; } @@ -1384,8 +1393,6 @@ static void cortex_r5_initfn(Object *obj) ARMCPU *cpu = ARM_CPU(obj); set_feature(&cpu->env, ARM_FEATURE_V7); - set_feature(&cpu->env, ARM_FEATURE_THUMB_DIV); - set_feature(&cpu->env, ARM_FEATURE_ARM_DIV); set_feature(&cpu->env, ARM_FEATURE_V7MP); set_feature(&cpu->env, ARM_FEATURE_PMSA); cpu->midr = 0x411fc153; /* r1p3 */ @@ -1397,13 +1404,13 @@ static void cortex_r5_initfn(Object *obj) cpu->id_mmfr1 = 0x00000000; cpu->id_mmfr2 = 0x01200000; cpu->id_mmfr3 = 0x0211; - cpu->id_isar0 = 0x02101111; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232141; - cpu->id_isar3 = 0x01112131; - cpu->id_isar4 = 0x0010142; - cpu->id_isar5 = 0x0; - cpu->id_isar6 = 0x0; + cpu->isar.id_isar0 = 0x02101111; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232141; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x0010142; + cpu->isar.id_isar5 = 0x0; + cpu->isar.id_isar6 = 0x0; cpu->mp_is_up = true; cpu->pmsav7_dregion = 16; define_arm_cp_regs(cpu, cortexr5_cp_reginfo); @@ -1438,8 +1445,8 @@ static void cortex_a8_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_EL3); cpu->midr = 0x410fc080; cpu->reset_fpsid = 0x410330c0; - cpu->mvfr0 = 0x11110222; - cpu->mvfr1 = 0x00011111; + cpu->isar.mvfr0 = 0x11110222; + cpu->isar.mvfr1 = 0x00011111; cpu->ctr = 0x82048004; cpu->reset_sctlr = 0x00c50078; cpu->id_pfr0 = 0x1031; @@ -1450,11 +1457,11 @@ static void cortex_a8_initfn(Object *obj) cpu->id_mmfr1 = 0x20000000; cpu->id_mmfr2 = 0x01202000; cpu->id_mmfr3 = 0x11; - cpu->id_isar0 = 0x00101111; - cpu->id_isar1 = 0x12112111; - cpu->id_isar2 = 0x21232031; - cpu->id_isar3 = 0x11112131; - cpu->id_isar4 = 0x00111142; + cpu->isar.id_isar0 = 0x00101111; + cpu->isar.id_isar1 = 0x12112111; + cpu->isar.id_isar2 = 0x21232031; + cpu->isar.id_isar3 = 0x11112131; + cpu->isar.id_isar4 = 0x00111142; cpu->dbgdidr = 0x15141000; cpu->clidr = (1 << 27) | (2 << 24) | 3; cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */ @@ -1512,8 +1519,8 @@ static void cortex_a9_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_CBAR); cpu->midr = 0x410fc090; cpu->reset_fpsid = 0x41033090; - cpu->mvfr0 = 0x11110222; - cpu->mvfr1 = 0x01111111; + cpu->isar.mvfr0 = 0x11110222; + cpu->isar.mvfr1 = 0x01111111; cpu->ctr = 0x80038003; cpu->reset_sctlr = 0x00c50078; cpu->id_pfr0 = 0x1031; @@ -1524,11 +1531,11 @@ static void cortex_a9_initfn(Object *obj) cpu->id_mmfr1 = 0x20000000; cpu->id_mmfr2 = 0x01230000; cpu->id_mmfr3 = 0x00002111; - cpu->id_isar0 = 0x00101111; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232041; - cpu->id_isar3 = 0x11112131; - cpu->id_isar4 = 0x00111142; + cpu->isar.id_isar0 = 0x00101111; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232041; + cpu->isar.id_isar3 = 0x11112131; + cpu->isar.id_isar4 = 0x00111142; cpu->dbgdidr = 0x35141000; cpu->clidr = (1 << 27) | (1 << 24) | 3; cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */ @@ -1573,8 +1580,8 @@ static void cortex_a7_initfn(Object *obj) cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A7; cpu->midr = 0x410fc075; cpu->reset_fpsid = 0x41023075; - cpu->mvfr0 = 0x10110222; - cpu->mvfr1 = 0x11111111; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x11111111; cpu->ctr = 0x84448003; cpu->reset_sctlr = 0x00c50078; cpu->id_pfr0 = 0x00001131; @@ -1590,11 +1597,11 @@ static void cortex_a7_initfn(Object *obj) /* a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but * table 4-41 gives 0x02101110, which includes the arm div insns. */ - cpu->id_isar0 = 0x02101110; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232041; - cpu->id_isar3 = 0x11112131; - cpu->id_isar4 = 0x10011142; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232041; + cpu->isar.id_isar3 = 0x11112131; + cpu->isar.id_isar4 = 0x10011142; cpu->dbgdidr = 0x3515f005; cpu->clidr = 0x0a200023; cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ @@ -1619,8 +1626,8 @@ static void cortex_a15_initfn(Object *obj) cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A15; cpu->midr = 0x412fc0f1; cpu->reset_fpsid = 0x410430f0; - cpu->mvfr0 = 0x10110222; - cpu->mvfr1 = 0x11111111; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x11111111; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50078; cpu->id_pfr0 = 0x00001131; @@ -1633,11 +1640,11 @@ static void cortex_a15_initfn(Object *obj) cpu->id_mmfr1 = 0x20000000; cpu->id_mmfr2 = 0x01240000; cpu->id_mmfr3 = 0x02102211; - cpu->id_isar0 = 0x02101110; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232041; - cpu->id_isar3 = 0x11112131; - cpu->id_isar4 = 0x10011142; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232041; + cpu->isar.id_isar3 = 0x11112131; + cpu->isar.id_isar4 = 0x10011142; cpu->dbgdidr = 0x3515f021; cpu->clidr = 0x0a200023; cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ @@ -1830,17 +1837,26 @@ static void arm_max_initfn(Object *obj) cortex_a15_initfn(obj); #ifdef CONFIG_USER_ONLY /* We don't set these in system emulation mode for the moment, - * since we don't correctly set the ID registers to advertise them, + * since we don't correctly set (all of) the ID registers to + * advertise them. */ set_feature(&cpu->env, ARM_FEATURE_V8); - set_feature(&cpu->env, ARM_FEATURE_V8_AES); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); - set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); - set_feature(&cpu->env, ARM_FEATURE_CRC); - set_feature(&cpu->env, ARM_FEATURE_V8_RDM); - set_feature(&cpu->env, ARM_FEATURE_V8_DOTPROD); - set_feature(&cpu->env, ARM_FEATURE_V8_FCMA); + { + uint32_t t; + + t = cpu->isar.id_isar5; + t = FIELD_DP32(t, ID_ISAR5, AES, 2); + t = FIELD_DP32(t, ID_ISAR5, SHA1, 1); + t = FIELD_DP32(t, ID_ISAR5, SHA2, 1); + t = FIELD_DP32(t, ID_ISAR5, CRC32, 1); + t = FIELD_DP32(t, ID_ISAR5, RDM, 1); + t = FIELD_DP32(t, ID_ISAR5, VCMA, 1); + cpu->isar.id_isar5 = t; + + t = cpu->isar.id_isar6; + t = FIELD_DP32(t, ID_ISAR6, DP, 1); + cpu->isar.id_isar6 = t; + } #endif } } diff --git a/target/arm/cpu.h b/target/arm/cpu.h index f00c0444c4..8e6779936e 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -531,6 +531,13 @@ typedef struct CPUARMState { */ } exception; + /* Information associated with an SError */ + struct { + uint8_t pending; + uint8_t has_esr; + uint64_t esr; + } serror; + /* Thumb-2 EE state. */ uint32_t teecr; uint32_t teehbr; @@ -669,6 +676,8 @@ typedef enum ARMPSCIState { PSCI_ON_PENDING = 2 } ARMPSCIState; +typedef struct ARMISARegisters ARMISARegisters; + /** * ARMCPU: * @env: #CPUARMState @@ -788,13 +797,28 @@ struct ARMCPU { * ARMv7AR ARM Architecture Reference Manual. A reset_ prefix * is used for reset values of non-constant registers; no reset_ * prefix means a constant register. + * Some of these registers are split out into a substructure that + * is shared with the translators to control the ISA. */ + struct ARMISARegisters { + uint32_t id_isar0; + uint32_t id_isar1; + uint32_t id_isar2; + uint32_t id_isar3; + uint32_t id_isar4; + uint32_t id_isar5; + uint32_t id_isar6; + uint32_t mvfr0; + uint32_t mvfr1; + uint32_t mvfr2; + uint64_t id_aa64isar0; + uint64_t id_aa64isar1; + uint64_t id_aa64pfr0; + uint64_t id_aa64pfr1; + } isar; uint32_t midr; uint32_t revidr; uint32_t reset_fpsid; - uint32_t mvfr0; - uint32_t mvfr1; - uint32_t mvfr2; uint32_t ctr; uint32_t reset_sctlr; uint32_t id_pfr0; @@ -808,21 +832,10 @@ struct ARMCPU { uint32_t id_mmfr2; uint32_t id_mmfr3; uint32_t id_mmfr4; - uint32_t id_isar0; - uint32_t id_isar1; - uint32_t id_isar2; - uint32_t id_isar3; - uint32_t id_isar4; - uint32_t id_isar5; - uint32_t id_isar6; - uint64_t id_aa64pfr0; - uint64_t id_aa64pfr1; uint64_t id_aa64dfr0; uint64_t id_aa64dfr1; uint64_t id_aa64afr0; uint64_t id_aa64afr1; - uint64_t id_aa64isar0; - uint64_t id_aa64isar1; uint64_t id_aa64mmfr0; uint64_t id_aa64mmfr1; uint32_t dbgdidr; @@ -1531,6 +1544,16 @@ FIELD(ID_AA64ISAR1, FRINTTS, 32, 4) FIELD(ID_AA64ISAR1, SB, 36, 4) FIELD(ID_AA64ISAR1, SPECRES, 40, 4) +FIELD(ID_AA64PFR0, EL0, 0, 4) +FIELD(ID_AA64PFR0, EL1, 4, 4) +FIELD(ID_AA64PFR0, EL2, 8, 4) +FIELD(ID_AA64PFR0, EL3, 12, 4) +FIELD(ID_AA64PFR0, FP, 16, 4) +FIELD(ID_AA64PFR0, ADVSIMD, 20, 4) +FIELD(ID_AA64PFR0, GIC, 24, 4) +FIELD(ID_AA64PFR0, RAS, 28, 4) +FIELD(ID_AA64PFR0, SVE, 32, 4) + QEMU_BUILD_BUG_ON(ARRAY_SIZE(((ARMCPU *)0)->ccsidr) <= R_V7M_CSSELR_INDEX_MASK); /* If adding a feature bit which corresponds to a Linux ELF @@ -1550,7 +1573,6 @@ enum arm_features { ARM_FEATURE_VFP3, ARM_FEATURE_VFP_FP16, ARM_FEATURE_NEON, - ARM_FEATURE_THUMB_DIV, /* divide supported in Thumb encoding */ ARM_FEATURE_M, /* Microcontroller profile. */ ARM_FEATURE_OMAPCP, /* OMAP specific CP15 ops handling. */ ARM_FEATURE_THUMB2EE, @@ -1560,7 +1582,6 @@ enum arm_features { ARM_FEATURE_V5, ARM_FEATURE_STRONGARM, ARM_FEATURE_VAPA, /* cp15 VA to PA lookups */ - ARM_FEATURE_ARM_DIV, /* divide supported in ARM encoding */ ARM_FEATURE_VFP4, /* VFPv4 (implies that NEON is v2) */ ARM_FEATURE_GENERIC_TIMER, ARM_FEATURE_MVFR, /* Media and VFP Feature Registers 0 and 1 */ @@ -1573,30 +1594,15 @@ enum arm_features { ARM_FEATURE_LPAE, /* has Large Physical Address Extension */ ARM_FEATURE_V8, ARM_FEATURE_AARCH64, /* supports 64 bit mode */ - ARM_FEATURE_V8_AES, /* implements AES part of v8 Crypto Extensions */ ARM_FEATURE_CBAR, /* has cp15 CBAR */ ARM_FEATURE_CRC, /* ARMv8 CRC instructions */ ARM_FEATURE_CBAR_RO, /* has cp15 CBAR and it is read-only */ ARM_FEATURE_EL2, /* has EL2 Virtualization support */ ARM_FEATURE_EL3, /* has EL3 Secure monitor support */ - ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_PMULL, /* implements PMULL part of v8 Crypto Extensions */ ARM_FEATURE_THUMB_DSP, /* DSP insns supported in the Thumb encodings */ ARM_FEATURE_PMU, /* has PMU support */ ARM_FEATURE_VBAR, /* has cp15 VBAR */ ARM_FEATURE_M_SECURITY, /* M profile Security Extension */ - ARM_FEATURE_JAZELLE, /* has (trivial) Jazelle implementation */ - ARM_FEATURE_SVE, /* has Scalable Vector Extension */ - ARM_FEATURE_V8_SHA512, /* implements SHA512 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_SHA3, /* implements SHA3 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_SM3, /* implements SM3 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_ATOMICS, /* ARMv8.1-Atomics feature */ - ARM_FEATURE_V8_RDM, /* implements v8.1 simd round multiply */ - ARM_FEATURE_V8_DOTPROD, /* implements v8.2 simd dot product */ - ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */ - ARM_FEATURE_V8_FCMA, /* has complex number part of v8.3 extensions. */ ARM_FEATURE_M_MAIN, /* M profile Main Extension */ }; @@ -3148,4 +3154,157 @@ static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno) /* Shared between translate-sve.c and sve_helper.c. */ extern const uint64_t pred_esz_masks[4]; +/* + * 32-bit feature tests via id registers. + */ +static inline bool isar_feature_thumb_div(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) != 0; +} + +static inline bool isar_feature_arm_div(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) > 1; +} + +static inline bool isar_feature_jazelle(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar1, ID_ISAR1, JAZELLE) != 0; +} + +static inline bool isar_feature_aa32_aes(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) != 0; +} + +static inline bool isar_feature_aa32_pmull(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) > 1; +} + +static inline bool isar_feature_aa32_sha1(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA1) != 0; +} + +static inline bool isar_feature_aa32_sha2(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA2) != 0; +} + +static inline bool isar_feature_aa32_crc32(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, CRC32) != 0; +} + +static inline bool isar_feature_aa32_rdm(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, RDM) != 0; +} + +static inline bool isar_feature_aa32_vcma(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, VCMA) != 0; +} + +static inline bool isar_feature_aa32_dp(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar6, ID_ISAR6, DP) != 0; +} + +static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id) +{ + /* + * This is a placeholder for use by VCMA until the rest of + * the ARMv8.2-FP16 extension is implemented for aa32 mode. + * At which point we can properly set and check MVFR1.FPHP. + */ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1; +} + +/* + * 64-bit feature tests via id registers. + */ +static inline bool isar_feature_aa64_aes(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) != 0; +} + +static inline bool isar_feature_aa64_pmull(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) > 1; +} + +static inline bool isar_feature_aa64_sha1(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA1) != 0; +} + +static inline bool isar_feature_aa64_sha256(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) != 0; +} + +static inline bool isar_feature_aa64_sha512(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) > 1; +} + +static inline bool isar_feature_aa64_crc32(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, CRC32) != 0; +} + +static inline bool isar_feature_aa64_atomics(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, ATOMIC) != 0; +} + +static inline bool isar_feature_aa64_rdm(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RDM) != 0; +} + +static inline bool isar_feature_aa64_sha3(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA3) != 0; +} + +static inline bool isar_feature_aa64_sm3(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM3) != 0; +} + +static inline bool isar_feature_aa64_sm4(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM4) != 0; +} + +static inline bool isar_feature_aa64_dp(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, DP) != 0; +} + +static inline bool isar_feature_aa64_fcma(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FCMA) != 0; +} + +static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id) +{ + /* We always set the AdvSIMD and FP fields identically wrt FP16. */ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1; +} + +static inline bool isar_feature_aa64_sve(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SVE) != 0; +} + +/* + * Forward to the above feature tests given an ARMCPU pointer. + */ +#define cpu_isar_feature(name, cpu) \ + ({ ARMCPU *cpu_ = (cpu); isar_feature_##name(&cpu_->isar); }) + #endif diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 44fdf0f6fa..873f059bf2 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -109,11 +109,6 @@ static void aarch64_a57_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); - set_feature(&cpu->env, ARM_FEATURE_V8_AES); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); - set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); - set_feature(&cpu->env, ARM_FEATURE_CRC); set_feature(&cpu->env, ARM_FEATURE_EL2); set_feature(&cpu->env, ARM_FEATURE_EL3); set_feature(&cpu->env, ARM_FEATURE_PMU); @@ -121,9 +116,9 @@ static void aarch64_a57_initfn(Object *obj) cpu->midr = 0x411fd070; cpu->revidr = 0x00000000; cpu->reset_fpsid = 0x41034070; - cpu->mvfr0 = 0x10110222; - cpu->mvfr1 = 0x12111111; - cpu->mvfr2 = 0x00000043; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x12111111; + cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50838; cpu->id_pfr0 = 0x00000131; @@ -134,18 +129,18 @@ static void aarch64_a57_initfn(Object *obj) cpu->id_mmfr1 = 0x40000000; cpu->id_mmfr2 = 0x01260000; cpu->id_mmfr3 = 0x02102211; - cpu->id_isar0 = 0x02101110; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232042; - cpu->id_isar3 = 0x01112131; - cpu->id_isar4 = 0x00011142; - cpu->id_isar5 = 0x00011121; - cpu->id_isar6 = 0; - cpu->id_aa64pfr0 = 0x00002222; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00011142; + cpu->isar.id_isar5 = 0x00011121; + cpu->isar.id_isar6 = 0; + cpu->isar.id_aa64pfr0 = 0x00002222; cpu->id_aa64dfr0 = 0x10305106; cpu->pmceid0 = 0x00000000; cpu->pmceid1 = 0x00000000; - cpu->id_aa64isar0 = 0x00011120; + cpu->isar.id_aa64isar0 = 0x00011120; cpu->id_aa64mmfr0 = 0x00001124; cpu->dbgdidr = 0x3516d000; cpu->clidr = 0x0a200023; @@ -170,11 +165,6 @@ static void aarch64_a53_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); - set_feature(&cpu->env, ARM_FEATURE_V8_AES); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); - set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); - set_feature(&cpu->env, ARM_FEATURE_CRC); set_feature(&cpu->env, ARM_FEATURE_EL2); set_feature(&cpu->env, ARM_FEATURE_EL3); set_feature(&cpu->env, ARM_FEATURE_PMU); @@ -182,9 +172,9 @@ static void aarch64_a53_initfn(Object *obj) cpu->midr = 0x410fd034; cpu->revidr = 0x00000000; cpu->reset_fpsid = 0x41034070; - cpu->mvfr0 = 0x10110222; - cpu->mvfr1 = 0x12111111; - cpu->mvfr2 = 0x00000043; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x12111111; + cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x84448004; /* L1Ip = VIPT */ cpu->reset_sctlr = 0x00c50838; cpu->id_pfr0 = 0x00000131; @@ -195,16 +185,16 @@ static void aarch64_a53_initfn(Object *obj) cpu->id_mmfr1 = 0x40000000; cpu->id_mmfr2 = 0x01260000; cpu->id_mmfr3 = 0x02102211; - cpu->id_isar0 = 0x02101110; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232042; - cpu->id_isar3 = 0x01112131; - cpu->id_isar4 = 0x00011142; - cpu->id_isar5 = 0x00011121; - cpu->id_isar6 = 0; - cpu->id_aa64pfr0 = 0x00002222; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00011142; + cpu->isar.id_isar5 = 0x00011121; + cpu->isar.id_isar6 = 0; + cpu->isar.id_aa64pfr0 = 0x00002222; cpu->id_aa64dfr0 = 0x10305106; - cpu->id_aa64isar0 = 0x00011120; + cpu->isar.id_aa64isar0 = 0x00011120; cpu->id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */ cpu->dbgdidr = 0x3516d000; cpu->clidr = 0x0a200023; @@ -229,20 +219,15 @@ static void aarch64_a72_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); - set_feature(&cpu->env, ARM_FEATURE_V8_AES); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); - set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); - set_feature(&cpu->env, ARM_FEATURE_CRC); set_feature(&cpu->env, ARM_FEATURE_EL2); set_feature(&cpu->env, ARM_FEATURE_EL3); set_feature(&cpu->env, ARM_FEATURE_PMU); cpu->midr = 0x410fd083; cpu->revidr = 0x00000000; cpu->reset_fpsid = 0x41034080; - cpu->mvfr0 = 0x10110222; - cpu->mvfr1 = 0x12111111; - cpu->mvfr2 = 0x00000043; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x12111111; + cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50838; cpu->id_pfr0 = 0x00000131; @@ -253,17 +238,17 @@ static void aarch64_a72_initfn(Object *obj) cpu->id_mmfr1 = 0x40000000; cpu->id_mmfr2 = 0x01260000; cpu->id_mmfr3 = 0x02102211; - cpu->id_isar0 = 0x02101110; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232042; - cpu->id_isar3 = 0x01112131; - cpu->id_isar4 = 0x00011142; - cpu->id_isar5 = 0x00011121; - cpu->id_aa64pfr0 = 0x00002222; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00011142; + cpu->isar.id_isar5 = 0x00011121; + cpu->isar.id_aa64pfr0 = 0x00002222; cpu->id_aa64dfr0 = 0x10305106; cpu->pmceid0 = 0x00000000; cpu->pmceid1 = 0x00000000; - cpu->id_aa64isar0 = 0x00011120; + cpu->isar.id_aa64isar0 = 0x00011120; cpu->id_aa64mmfr0 = 0x00001124; cpu->dbgdidr = 0x3516d000; cpu->clidr = 0x0a200023; @@ -312,24 +297,55 @@ static void aarch64_max_initfn(Object *obj) if (kvm_enabled()) { kvm_arm_set_cpu_features_from_host(cpu); } else { + uint64_t t; + uint32_t u; aarch64_a57_initfn(obj); -#ifdef CONFIG_USER_ONLY - /* We don't set these in system emulation mode for the moment, - * since we don't correctly set the ID registers to advertise them, - * and in some cases they're only available in AArch64 and not AArch32, - * whereas the architecture requires them to be present in both if - * present in either. + + t = cpu->isar.id_aa64isar0; + t = FIELD_DP64(t, ID_AA64ISAR0, AES, 2); /* AES + PMULL */ + t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, SHA2, 2); /* SHA512 */ + t = FIELD_DP64(t, ID_AA64ISAR0, CRC32, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, ATOMIC, 2); + t = FIELD_DP64(t, ID_AA64ISAR0, RDM, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, SHA3, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, SM3, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, SM4, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, DP, 1); + cpu->isar.id_aa64isar0 = t; + + t = cpu->isar.id_aa64isar1; + t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1); + cpu->isar.id_aa64isar1 = t; + + t = cpu->isar.id_aa64pfr0; + t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1); + t = FIELD_DP64(t, ID_AA64PFR0, FP, 1); + t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1); + cpu->isar.id_aa64pfr0 = t; + + /* Replicate the same data to the 32-bit id registers. */ + u = cpu->isar.id_isar5; + u = FIELD_DP32(u, ID_ISAR5, AES, 2); /* AES + PMULL */ + u = FIELD_DP32(u, ID_ISAR5, SHA1, 1); + u = FIELD_DP32(u, ID_ISAR5, SHA2, 1); + u = FIELD_DP32(u, ID_ISAR5, CRC32, 1); + u = FIELD_DP32(u, ID_ISAR5, RDM, 1); + u = FIELD_DP32(u, ID_ISAR5, VCMA, 1); + cpu->isar.id_isar5 = u; + + u = cpu->isar.id_isar6; + u = FIELD_DP32(u, ID_ISAR6, DP, 1); + cpu->isar.id_isar6 = u; + + /* + * FIXME: We do not yet support ARMv8.2-fp16 for AArch32 yet, + * so do not set MVFR1.FPHP. Strictly speaking this is not legal, + * but it is also not legal to enable SVE without support for FP16, + * and enabling SVE in system mode is more useful in the short term. */ - set_feature(&cpu->env, ARM_FEATURE_V8_SHA512); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA3); - set_feature(&cpu->env, ARM_FEATURE_V8_SM3); - set_feature(&cpu->env, ARM_FEATURE_V8_SM4); - set_feature(&cpu->env, ARM_FEATURE_V8_ATOMICS); - set_feature(&cpu->env, ARM_FEATURE_V8_RDM); - set_feature(&cpu->env, ARM_FEATURE_V8_DOTPROD); - set_feature(&cpu->env, ARM_FEATURE_V8_FP16); - set_feature(&cpu->env, ARM_FEATURE_V8_FCMA); - set_feature(&cpu->env, ARM_FEATURE_SVE); + +#ifdef CONFIG_USER_ONLY /* For usermode -cpu max we can use a larger and more efficient DCZ * blocksize since we don't have to follow what the hardware does. */ diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index 7f6ad3000b..61799d20e1 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -30,6 +30,7 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "qemu/int128.h" +#include "qemu/atomic128.h" #include "tcg.h" #include "fpu/softfloat.h" #include <zlib.h> /* For crc32 */ @@ -509,189 +510,187 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) return crc32c(acc, buf, bytes) ^ 0xffffffff; } -/* Returns 0 on success; 1 otherwise. */ -static uint64_t do_paired_cmpxchg64_le(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi, - bool parallel, uintptr_t ra) +uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, + uint64_t new_lo, uint64_t new_hi) { - Int128 oldv, cmpv, newv; + Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + Int128 newv = int128_make128(new_lo, new_hi); + Int128 oldv; + uintptr_t ra = GETPC(); + uint64_t o0, o1; bool success; - cmpv = int128_make128(env->exclusive_val, env->exclusive_high); - newv = int128_make128(new_lo, new_hi); - - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); - oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); - success = int128_eq(oldv, cmpv); -#endif - } else { - uint64_t o0, o1; - #ifdef CONFIG_USER_ONLY - /* ??? Enforce alignment. */ - uint64_t *haddr = g2h(addr); - - helper_retaddr = ra; - o0 = ldq_le_p(haddr + 0); - o1 = ldq_le_p(haddr + 1); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - stq_le_p(haddr + 0, int128_getlo(newv)); - stq_le_p(haddr + 1, int128_gethi(newv)); - } - helper_retaddr = 0; + /* ??? Enforce alignment. */ + uint64_t *haddr = g2h(addr); + + helper_retaddr = ra; + o0 = ldq_le_p(haddr + 0); + o1 = ldq_le_p(haddr + 1); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + stq_le_p(haddr + 0, int128_getlo(newv)); + stq_le_p(haddr + 1, int128_gethi(newv)); + } + helper_retaddr = 0; #else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); - TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); - - o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); - o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); - helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); - } -#endif + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); + TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); + + o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); + o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); + helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); } +#endif return !success; } -uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) -{ - return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, false, GETPC()); -} - uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr, uint64_t new_lo, uint64_t new_hi) { - return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, true, GETPC()); -} - -static uint64_t do_paired_cmpxchg64_be(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi, - bool parallel, uintptr_t ra) -{ Int128 oldv, cmpv, newv; + uintptr_t ra = GETPC(); bool success; + int mem_idx; + TCGMemOpIdx oi; - /* high and low need to be switched here because this is not actually a - * 128bit store but two doublewords stored consecutively - */ - cmpv = int128_make128(env->exclusive_high, env->exclusive_val); - newv = int128_make128(new_hi, new_lo); + assert(HAVE_CMPXCHG128); - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); - oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); - success = int128_eq(oldv, cmpv); -#endif - } else { - uint64_t o0, o1; + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); -#ifdef CONFIG_USER_ONLY - /* ??? Enforce alignment. */ - uint64_t *haddr = g2h(addr); - - helper_retaddr = ra; - o1 = ldq_be_p(haddr + 0); - o0 = ldq_be_p(haddr + 1); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - stq_be_p(haddr + 0, int128_gethi(newv)); - stq_be_p(haddr + 1, int128_getlo(newv)); - } - helper_retaddr = 0; -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); - TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); - - o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); - o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); - helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); - } -#endif - } + cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + newv = int128_make128(new_lo, new_hi); + oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); + success = int128_eq(oldv, cmpv); return !success; } uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr, uint64_t new_lo, uint64_t new_hi) { - return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, false, GETPC()); + /* + * High and low need to be switched here because this is not actually a + * 128bit store but two doublewords stored consecutively + */ + Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + Int128 newv = int128_make128(new_lo, new_hi); + Int128 oldv; + uintptr_t ra = GETPC(); + uint64_t o0, o1; + bool success; + +#ifdef CONFIG_USER_ONLY + /* ??? Enforce alignment. */ + uint64_t *haddr = g2h(addr); + + helper_retaddr = ra; + o1 = ldq_be_p(haddr + 0); + o0 = ldq_be_p(haddr + 1); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + stq_be_p(haddr + 0, int128_gethi(newv)); + stq_be_p(haddr + 1, int128_getlo(newv)); + } + helper_retaddr = 0; +#else + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); + TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); + + o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); + o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); + helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); + } +#endif + + return !success; } uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) + uint64_t new_lo, uint64_t new_hi) { - return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC()); + Int128 oldv, cmpv, newv; + uintptr_t ra = GETPC(); + bool success; + int mem_idx; + TCGMemOpIdx oi; + + assert(HAVE_CMPXCHG128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); + + /* + * High and low need to be switched here because this is not actually a + * 128bit store but two doublewords stored consecutively + */ + cmpv = int128_make128(env->exclusive_high, env->exclusive_val); + newv = int128_make128(new_hi, new_lo); + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); + + success = int128_eq(oldv, cmpv); + return !success; } /* Writes back the old data into Rs. */ void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, uint64_t new_lo, uint64_t new_hi) { - uintptr_t ra = GETPC(); -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else Int128 oldv, cmpv, newv; + uintptr_t ra = GETPC(); + int mem_idx; + TCGMemOpIdx oi; + + assert(HAVE_CMPXCHG128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]); newv = int128_make128(new_lo, new_hi); - - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); env->xregs[rs] = int128_getlo(oldv); env->xregs[rs + 1] = int128_gethi(oldv); -#endif } void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, uint64_t new_hi, uint64_t new_lo) { - uintptr_t ra = GETPC(); -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else Int128 oldv, cmpv, newv; + uintptr_t ra = GETPC(); + int mem_idx; + TCGMemOpIdx oi; + + assert(HAVE_CMPXCHG128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]); newv = int128_make128(new_lo, new_hi); - - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); env->xregs[rs + 1] = int128_getlo(oldv); env->xregs[rs] = int128_gethi(oldv); -#endif } /* diff --git a/target/arm/helper.c b/target/arm/helper.c index e3946562aa..0ea95b0815 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -56,6 +56,8 @@ static void v8m_security_lookup(CPUARMState *env, uint32_t address, V8M_SAttributes *sattrs); #endif +static void switch_mode(CPUARMState *env, int mode); + static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg) { int nregs; @@ -552,12 +554,61 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri, raw_write(env, ri, value); } +/* IS variants of TLB operations must affect all cores */ +static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = ENV_GET_CPU(env); + + tlb_flush_all_cpus_synced(cs); +} + +static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = ENV_GET_CPU(env); + + tlb_flush_all_cpus_synced(cs); +} + +static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = ENV_GET_CPU(env); + + tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK); +} + +static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = ENV_GET_CPU(env); + + tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK); +} + +/* + * Non-IS variants of TLB operations are upgraded to + * IS versions if we are at NS EL1 and HCR_EL2.FB is set to + * force broadcast of these operations. + */ +static bool tlb_force_broadcast(CPUARMState *env) +{ + return (env->cp15.hcr_el2 & HCR_FB) && + arm_current_el(env) == 1 && arm_is_secure_below_el3(env); +} + static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { /* Invalidate all (TLBIALL) */ ARMCPU *cpu = arm_env_get_cpu(env); + if (tlb_force_broadcast(env)) { + tlbiall_is_write(env, NULL, value); + return; + } + tlb_flush(CPU(cpu)); } @@ -567,6 +618,11 @@ static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri, /* Invalidate single TLB entry by MVA and ASID (TLBIMVA) */ ARMCPU *cpu = arm_env_get_cpu(env); + if (tlb_force_broadcast(env)) { + tlbimva_is_write(env, NULL, value); + return; + } + tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK); } @@ -576,6 +632,11 @@ static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri, /* Invalidate by ASID (TLBIASID) */ ARMCPU *cpu = arm_env_get_cpu(env); + if (tlb_force_broadcast(env)) { + tlbiasid_is_write(env, NULL, value); + return; + } + tlb_flush(CPU(cpu)); } @@ -585,40 +646,12 @@ static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri, /* Invalidate single entry by MVA, all ASIDs (TLBIMVAA) */ ARMCPU *cpu = arm_env_get_cpu(env); - tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK); -} - -/* IS variants of TLB operations must affect all cores */ -static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = ENV_GET_CPU(env); - - tlb_flush_all_cpus_synced(cs); -} - -static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = ENV_GET_CPU(env); - - tlb_flush_all_cpus_synced(cs); -} - -static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = ENV_GET_CPU(env); - - tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK); -} - -static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = ENV_GET_CPU(env); + if (tlb_force_broadcast(env)) { + tlbimvaa_is_write(env, NULL, value); + return; + } - tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK); + tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK); } static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -1296,12 +1329,26 @@ static uint64_t isr_read(CPUARMState *env, const ARMCPRegInfo *ri) CPUState *cs = ENV_GET_CPU(env); uint64_t ret = 0; - if (cs->interrupt_request & CPU_INTERRUPT_HARD) { - ret |= CPSR_I; + if (arm_hcr_el2_imo(env)) { + if (cs->interrupt_request & CPU_INTERRUPT_VIRQ) { + ret |= CPSR_I; + } + } else { + if (cs->interrupt_request & CPU_INTERRUPT_HARD) { + ret |= CPSR_I; + } } - if (cs->interrupt_request & CPU_INTERRUPT_FIQ) { - ret |= CPSR_F; + + if (arm_hcr_el2_fmo(env)) { + if (cs->interrupt_request & CPU_INTERRUPT_VFIQ) { + ret |= CPSR_F; + } + } else { + if (cs->interrupt_request & CPU_INTERRUPT_FIQ) { + ret |= CPSR_F; + } } + /* External aborts are not possible in QEMU so A bit is always clear */ return ret; } @@ -2270,13 +2317,15 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value, * * The Non-secure TTBCR.EAE bit is set to 1 * * The implementation includes EL2, and the value of HCR.VM is 1 * + * (Note that HCR.DC makes HCR.VM behave as if it is 1.) + * * ATS1Hx always uses the 64bit format (not supported yet). */ format64 = arm_s1_regime_using_lpae_format(env, mmu_idx); if (arm_feature(env, ARM_FEATURE_EL2)) { if (mmu_idx == ARMMMUIdx_S12NSE0 || mmu_idx == ARMMMUIdx_S12NSE1) { - format64 |= env->cp15.hcr_el2 & HCR_VM; + format64 |= env->cp15.hcr_el2 & (HCR_VM | HCR_DC); } else { format64 |= arm_current_el(env) == 2; } @@ -2709,12 +2758,10 @@ static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri, static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - /* 64 bit accesses to the TTBRs can change the ASID and so we - * must flush the TLB. - */ - if (cpreg_field_is_64bit(ri)) { + /* If the ASID changes (with a 64-bit write), we must flush the TLB. */ + if (cpreg_field_is_64bit(ri) && + extract64(raw_read(env, ri) ^ value, 48, 16) != 0) { ARMCPU *cpu = arm_env_get_cpu(env); - tlb_flush(CPU(cpu)); } raw_write(env, ri, value); @@ -3083,22 +3130,6 @@ static CPAccessResult aa64_cacheop_access(CPUARMState *env, * Page D4-1736 (DDI0487A.b) */ -static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = ENV_GET_CPU(env); - - if (arm_is_secure_below_el3(env)) { - tlb_flush_by_mmuidx(cs, - ARMMMUIdxBit_S1SE1 | - ARMMMUIdxBit_S1SE0); - } else { - tlb_flush_by_mmuidx(cs, - ARMMMUIdxBit_S12NSE1 | - ARMMMUIdxBit_S12NSE0); - } -} - static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -3116,6 +3147,27 @@ static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri, } } +static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = ENV_GET_CPU(env); + + if (tlb_force_broadcast(env)) { + tlbi_aa64_vmalle1_write(env, NULL, value); + return; + } + + if (arm_is_secure_below_el3(env)) { + tlb_flush_by_mmuidx(cs, + ARMMMUIdxBit_S1SE1 | + ARMMMUIdxBit_S1SE0); + } else { + tlb_flush_by_mmuidx(cs, + ARMMMUIdxBit_S12NSE1 | + ARMMMUIdxBit_S12NSE0); + } +} + static void tlbi_aa64_alle1_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -3205,29 +3257,6 @@ static void tlbi_aa64_alle3is_write(CPUARMState *env, const ARMCPRegInfo *ri, tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_S1E3); } -static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Invalidate by VA, EL1&0 (AArch64 version). - * Currently handles all of VAE1, VAAE1, VAALE1 and VALE1, - * since we don't support flush-for-specific-ASID-only or - * flush-last-level-only. - */ - ARMCPU *cpu = arm_env_get_cpu(env); - CPUState *cs = CPU(cpu); - uint64_t pageaddr = sextract64(value << 12, 0, 56); - - if (arm_is_secure_below_el3(env)) { - tlb_flush_page_by_mmuidx(cs, pageaddr, - ARMMMUIdxBit_S1SE1 | - ARMMMUIdxBit_S1SE0); - } else { - tlb_flush_page_by_mmuidx(cs, pageaddr, - ARMMMUIdxBit_S12NSE1 | - ARMMMUIdxBit_S12NSE0); - } -} - static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -3275,6 +3304,34 @@ static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri, } } +static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate by VA, EL1&0 (AArch64 version). + * Currently handles all of VAE1, VAAE1, VAALE1 and VALE1, + * since we don't support flush-for-specific-ASID-only or + * flush-last-level-only. + */ + ARMCPU *cpu = arm_env_get_cpu(env); + CPUState *cs = CPU(cpu); + uint64_t pageaddr = sextract64(value << 12, 0, 56); + + if (tlb_force_broadcast(env)) { + tlbi_aa64_vae1is_write(env, NULL, value); + return; + } + + if (arm_is_secure_below_el3(env)) { + tlb_flush_page_by_mmuidx(cs, pageaddr, + ARMMMUIdxBit_S1SE1 | + ARMMMUIdxBit_S1SE0); + } else { + tlb_flush_page_by_mmuidx(cs, pageaddr, + ARMMMUIdxBit_S12NSE1 | + ARMMMUIdxBit_S12NSE0); + } +} + static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -3872,6 +3929,7 @@ static const ARMCPRegInfo el3_no_el2_v8_cp_reginfo[] = { static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { ARMCPU *cpu = arm_env_get_cpu(env); + CPUState *cs = ENV_GET_CPU(env); uint64_t valid_mask = HCR_MASK; if (arm_feature(env, ARM_FEATURE_EL3)) { @@ -3890,6 +3948,28 @@ static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) /* Clear RES0 bits. */ value &= valid_mask; + /* + * VI and VF are kept in cs->interrupt_request. Modifying that + * requires that we have the iothread lock, which is done by + * marking the reginfo structs as ARM_CP_IO. + * Note that if a write to HCR pends a VIRQ or VFIQ it is never + * possible for it to be taken immediately, because VIRQ and + * VFIQ are masked unless running at EL0 or EL1, and HCR + * can only be written at EL2. + */ + g_assert(qemu_mutex_iothread_locked()); + if (value & HCR_VI) { + cs->interrupt_request |= CPU_INTERRUPT_VIRQ; + } else { + cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ; + } + if (value & HCR_VF) { + cs->interrupt_request |= CPU_INTERRUPT_VFIQ; + } else { + cs->interrupt_request &= ~CPU_INTERRUPT_VFIQ; + } + value &= ~(HCR_VI | HCR_VF); + /* These bits change the MMU setup: * HCR_VM enables stage 2 translation * HCR_PTW forbids certain page-table setups @@ -3917,16 +3997,32 @@ static void hcr_writelow(CPUARMState *env, const ARMCPRegInfo *ri, hcr_write(env, NULL, value); } +static uint64_t hcr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + /* The VI and VF bits live in cs->interrupt_request */ + uint64_t ret = env->cp15.hcr_el2 & ~(HCR_VI | HCR_VF); + CPUState *cs = ENV_GET_CPU(env); + + if (cs->interrupt_request & CPU_INTERRUPT_VIRQ) { + ret |= HCR_VI; + } + if (cs->interrupt_request & CPU_INTERRUPT_VFIQ) { + ret |= HCR_VF; + } + return ret; +} + static const ARMCPRegInfo el2_cp_reginfo[] = { { .name = "HCR_EL2", .state = ARM_CP_STATE_AA64, + .type = ARM_CP_IO, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2), - .writefn = hcr_write }, + .writefn = hcr_write, .readfn = hcr_read }, { .name = "HCR", .state = ARM_CP_STATE_AA32, - .type = ARM_CP_ALIAS, + .type = ARM_CP_ALIAS | ARM_CP_IO, .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2), - .writefn = hcr_writelow }, + .writefn = hcr_writelow, .readfn = hcr_read }, { .name = "ELR_EL2", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 1, @@ -4163,7 +4259,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { static const ARMCPRegInfo el2_v8_cp_reginfo[] = { { .name = "HCR2", .state = ARM_CP_STATE_AA32, - .type = ARM_CP_ALIAS, + .type = ARM_CP_ALIAS | ARM_CP_IO, .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 4, .access = PL2_RW, .fieldoffset = offsetofhigh32(CPUARMState, cp15.hcr_el2), @@ -4214,7 +4310,7 @@ static const ARMCPRegInfo el3_cp_reginfo[] = { .fieldoffset = offsetof(CPUARMState, cp15.mvbar) }, { .name = "TTBR0_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 0, .opc2 = 0, - .access = PL3_RW, .writefn = vmsa_ttbr_write, .resetvalue = 0, + .access = PL3_RW, .resetvalue = 0, .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el[3]) }, { .name = "TCR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 0, .opc2 = 2, @@ -4873,7 +4969,7 @@ static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri) static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri) { ARMCPU *cpu = arm_env_get_cpu(env); - uint64_t pfr0 = cpu->id_aa64pfr0; + uint64_t pfr0 = cpu->isar.id_aa64pfr0; if (env->gicv3state) { pfr0 |= 1 << 24; @@ -4940,27 +5036,27 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "ID_ISAR0", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar0 }, + .resetvalue = cpu->isar.id_isar0 }, { .name = "ID_ISAR1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar1 }, + .resetvalue = cpu->isar.id_isar1 }, { .name = "ID_ISAR2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar2 }, + .resetvalue = cpu->isar.id_isar2 }, { .name = "ID_ISAR3", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar3 }, + .resetvalue = cpu->isar.id_isar3 }, { .name = "ID_ISAR4", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar4 }, + .resetvalue = cpu->isar.id_isar4 }, { .name = "ID_ISAR5", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar5 }, + .resetvalue = cpu->isar.id_isar5 }, { .name = "ID_MMFR4", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, @@ -4968,7 +5064,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "ID_ISAR6", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar6 }, + .resetvalue = cpu->isar.id_isar6 }, REGINFO_SENTINEL }; define_arm_cp_regs(cpu, v6_idregs); @@ -5039,7 +5135,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "ID_AA64PFR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_aa64pfr1}, + .resetvalue = cpu->isar.id_aa64pfr1}, { .name = "ID_AA64PFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, @@ -5100,11 +5196,11 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "ID_AA64ISAR0_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_aa64isar0 }, + .resetvalue = cpu->isar.id_aa64isar0 }, { .name = "ID_AA64ISAR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_aa64isar1 }, + .resetvalue = cpu->isar.id_aa64isar1 }, { .name = "ID_AA64ISAR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, @@ -5164,15 +5260,15 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "MVFR0_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->mvfr0 }, + .resetvalue = cpu->isar.mvfr0 }, { .name = "MVFR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->mvfr1 }, + .resetvalue = cpu->isar.mvfr1 }, { .name = "MVFR2_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->mvfr2 }, + .resetvalue = cpu->isar.mvfr2 }, { .name = "MVFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, @@ -5618,7 +5714,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_one_arm_cp_reg(cpu, &sctlr); } - if (arm_feature(env, ARM_FEATURE_SVE)) { + if (cpu_isar_feature(aa64_sve, cpu)) { define_one_arm_cp_reg(cpu, &zcr_el1_reginfo); if (arm_feature(env, ARM_FEATURE_EL2)) { define_one_arm_cp_reg(cpu, &zcr_el2_reginfo); @@ -6208,7 +6304,17 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask, mask |= CPSR_IL; val |= CPSR_IL; } + qemu_log_mask(LOG_GUEST_ERROR, + "Illegal AArch32 mode switch attempt from %s to %s\n", + aarch32_mode_name(env->uncached_cpsr), + aarch32_mode_name(val)); } else { + qemu_log_mask(CPU_LOG_INT, "%s %s to %s PC 0x%" PRIx32 "\n", + write_type == CPSRWriteExceptionReturn ? + "Exception return from AArch32" : + "AArch32 mode switch from", + aarch32_mode_name(env->uncached_cpsr), + aarch32_mode_name(val), env->regs[15]); switch_mode(env, val & CPSR_M); } } @@ -6306,7 +6412,7 @@ uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op) return 0; } -void switch_mode(CPUARMState *env, int mode) +static void switch_mode(CPUARMState *env, int mode) { ARMCPU *cpu = arm_env_get_cpu(env); @@ -6328,7 +6434,7 @@ void aarch64_sync_64_to_32(CPUARMState *env) #else -void switch_mode(CPUARMState *env, int mode) +static void switch_mode(CPUARMState *env, int mode) { int old_mode; int i; @@ -8194,6 +8300,19 @@ static void arm_cpu_do_interrupt_aarch32_hyp(CPUState *cs) } if (cs->exception_index != EXCP_IRQ && cs->exception_index != EXCP_FIQ) { + if (!arm_feature(env, ARM_FEATURE_V8)) { + /* + * QEMU syndrome values are v8-style. v7 has the IL bit + * UNK/SBZP for "field not valid" cases, where v8 uses RES1. + * If this is a v7 CPU, squash the IL bit in those cases. + */ + if (cs->exception_index == EXCP_PREFETCH_ABORT || + (cs->exception_index == EXCP_DATA_ABORT && + !(env->exception.syndrome & ARM_EL_ISV)) || + syn_get_ec(env->exception.syndrome) == EC_UNCATEGORIZED) { + env->exception.syndrome &= ~ARM_EL_IL; + } + } env->cp15.esr_el[2] = env->exception.syndrome; } @@ -8228,7 +8347,7 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs) uint32_t moe; /* If this is a debug exception we must update the DBGDSCR.MOE bits */ - switch (env->exception.syndrome >> ARM_EL_EC_SHIFT) { + switch (syn_get_ec(env->exception.syndrome)) { case EC_BREAKPOINT: case EC_BREAKPOINT_SAME_EL: moe = 1; @@ -8425,6 +8544,15 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) case EXCP_HVC: case EXCP_HYP_TRAP: case EXCP_SMC: + if (syn_get_ec(env->exception.syndrome) == EC_ADVSIMDFPACCESSTRAP) { + /* + * QEMU internal FP/SIMD syndromes from AArch32 include the + * TA and coproc fields which are only exposed if the exception + * is taken to AArch32 Hyp mode. Mask them out to get a valid + * AArch64 format syndrome. + */ + env->exception.syndrome &= ~MAKE_64BIT_MASK(0, 20); + } env->cp15.esr_el[new_el] = env->exception.syndrome; break; case EXCP_IRQ: @@ -8568,7 +8696,7 @@ void arm_cpu_do_interrupt(CPUState *cs) if (qemu_loglevel_mask(CPU_LOG_INT) && !excp_is_internal(cs->exception_index)) { qemu_log_mask(CPU_LOG_INT, "...with ESR 0x%x/0x%" PRIx32 "\n", - env->exception.syndrome >> ARM_EL_EC_SHIFT, + syn_get_ec(env->exception.syndrome), env->exception.syndrome); } @@ -8665,7 +8793,8 @@ static inline bool regime_translation_disabled(CPUARMState *env, } if (mmu_idx == ARMMMUIdx_S2NS) { - return (env->cp15.hcr_el2 & HCR_VM) == 0; + /* HCR.DC means HCR.VM behaves as 1 */ + return (env->cp15.hcr_el2 & (HCR_DC | HCR_VM)) == 0; } if (env->cp15.hcr_el2 & HCR_TGE) { @@ -8675,6 +8804,12 @@ static inline bool regime_translation_disabled(CPUARMState *env, } } + if ((env->cp15.hcr_el2 & HCR_DC) && + (mmu_idx == ARMMMUIdx_S1NSE0 || mmu_idx == ARMMMUIdx_S1NSE1)) { + /* HCR.DC means SCTLR_EL1.M behaves as 0 */ + return true; + } + return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0; } @@ -9026,9 +9161,20 @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx, hwaddr s2pa; int s2prot; int ret; + ARMCacheAttrs cacheattrs = {}; + ARMCacheAttrs *pcacheattrs = NULL; + + if (env->cp15.hcr_el2 & HCR_PTW) { + /* + * PTW means we must fault if this S1 walk touches S2 Device + * memory; otherwise we don't care about the attributes and can + * save the S2 translation the effort of computing them. + */ + pcacheattrs = &cacheattrs; + } ret = get_phys_addr_lpae(env, addr, 0, ARMMMUIdx_S2NS, &s2pa, - &txattrs, &s2prot, &s2size, fi, NULL); + &txattrs, &s2prot, &s2size, fi, pcacheattrs); if (ret) { assert(fi->type != ARMFault_None); fi->s2addr = addr; @@ -9036,6 +9182,14 @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx, fi->s1ptw = true; return ~0; } + if (pcacheattrs && (pcacheattrs->attrs & 0xf0) == 0) { + /* Access was to Device memory: generate Permission fault */ + fi->type = ARMFault_Permission; + fi->s2addr = addr; + fi->stage2 = true; + fi->s1ptw = true; + return ~0; + } addr = s2pa; } return addr; @@ -10655,6 +10809,16 @@ static bool get_phys_addr(CPUARMState *env, target_ulong address, /* Combine the S1 and S2 cache attributes, if needed */ if (!ret && cacheattrs != NULL) { + if (env->cp15.hcr_el2 & HCR_DC) { + /* + * HCR.DC forces the first stage attributes to + * Normal Non-Shareable, + * Inner Write-Back Read-Allocate Write-Allocate, + * Outer Write-Back Read-Allocate Write-Allocate. + */ + cacheattrs->attrs = 0xff; + cacheattrs->shareability = 0; + } *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2); } @@ -11612,7 +11776,7 @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val) uint32_t changed; /* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */ - if (!arm_feature(env, ARM_FEATURE_V8_FP16)) { + if (!cpu_isar_feature(aa64_fp16, arm_env_get_cpu(env))) { val &= ~FPCR_FZ16; } @@ -12671,13 +12835,15 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, uint32_t flags; if (is_a64(env)) { + ARMCPU *cpu = arm_env_get_cpu(env); + *pc = env->pc; flags = ARM_TBFLAG_AARCH64_STATE_MASK; /* Get control bits for tagged addresses */ flags |= (arm_regime_tbi0(env, mmu_idx) << ARM_TBFLAG_TBI0_SHIFT); flags |= (arm_regime_tbi1(env, mmu_idx) << ARM_TBFLAG_TBI1_SHIFT); - if (arm_feature(env, ARM_FEATURE_SVE)) { + if (cpu_isar_feature(aa64_sve, cpu)) { int sve_el = sve_exception_el(env, current_el); uint32_t zcr_len; @@ -12801,11 +12967,12 @@ void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) void aarch64_sve_change_el(CPUARMState *env, int old_el, int new_el, bool el0_a64) { + ARMCPU *cpu = arm_env_get_cpu(env); int old_len, new_len; bool old_a64, new_a64; /* Nothing to do if no SVE. */ - if (!arm_feature(env, ARM_FEATURE_SVE)) { + if (!cpu_isar_feature(aa64_sve, cpu)) { return; } diff --git a/target/arm/internals.h b/target/arm/internals.h index a4fc709bcc..6c2bb2deeb 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -145,7 +145,6 @@ static inline int bank_number(int mode) g_assert_not_reached(); } -void switch_mode(CPUARMState *, int); void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu); void arm_translate_init(void); @@ -279,14 +278,19 @@ enum arm_exception_class { #define ARM_EL_IL (1 << ARM_EL_IL_SHIFT) #define ARM_EL_ISV (1 << ARM_EL_ISV_SHIFT) +static inline uint32_t syn_get_ec(uint32_t syn) +{ + return syn >> ARM_EL_EC_SHIFT; +} + /* Utility functions for constructing various kinds of syndrome value. * Note that in general we follow the AArch64 syndrome values; in a * few cases the value in HSR for exceptions taken to AArch32 Hyp - * mode differs slightly, so if we ever implemented Hyp mode then the - * syndrome value would need some massaging on exception entry. - * (One example of this is that AArch64 defaults to IL bit set for - * exceptions which don't specifically indicate information about the - * trapping instruction, whereas AArch32 defaults to IL bit clear.) + * mode differs slightly, and we fix this up when populating HSR in + * arm_cpu_do_interrupt_aarch32_hyp(). + * The exception is FP/SIMD access traps -- these report extra information + * when taking an exception to AArch32. For those we include the extra coproc + * and TA fields, and mask them out when taking the exception to AArch64. */ static inline uint32_t syn_uncategorized(void) { @@ -386,9 +390,18 @@ static inline uint32_t syn_cp15_rrt_trap(int cv, int cond, int opc1, int crm, static inline uint32_t syn_fp_access_trap(int cv, int cond, bool is_16bit) { + /* AArch32 FP trap or any AArch64 FP/SIMD trap: TA == 0 coproc == 0xa */ + return (EC_ADVSIMDFPACCESSTRAP << ARM_EL_EC_SHIFT) + | (is_16bit ? 0 : ARM_EL_IL) + | (cv << 24) | (cond << 20) | 0xa; +} + +static inline uint32_t syn_simd_access_trap(int cv, int cond, bool is_16bit) +{ + /* AArch32 SIMD trap: TA == 1 coproc == 0 */ return (EC_ADVSIMDFPACCESSTRAP << ARM_EL_EC_SHIFT) | (is_16bit ? 0 : ARM_EL_IL) - | (cv << 24) | (cond << 20); + | (cv << 24) | (cond << 20) | (1 << 5); } static inline uint32_t syn_sve_access_trap(void) @@ -840,4 +853,22 @@ static inline uint32_t v7m_sp_limit(CPUARMState *env) } } +/** + * aarch32_mode_name(): Return name of the AArch32 CPU mode + * @psr: Program Status Register indicating CPU mode + * + * Returns, for debug logging purposes, a printable representation + * of the AArch32 CPU mode ("svc", "usr", etc) as indicated by + * the low bits of the specified PSR. + */ +static inline const char *aarch32_mode_name(uint32_t psr) +{ + static const char cpu_mode_names[16][4] = { + "usr", "fiq", "irq", "svc", "???", "???", "mon", "abt", + "???", "???", "hyp", "und", "???", "???", "???", "sys" + }; + + return cpu_mode_names[psr & 0xf]; +} + #endif diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 54ef5f711b..09a86e2820 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -34,6 +34,7 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { }; static bool cap_has_mp_state; +static bool cap_has_inject_serror_esr; static ARMHostCPUFeatures arm_host_cpu_features; @@ -48,6 +49,12 @@ int kvm_arm_vcpu_init(CPUState *cs) return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init); } +void kvm_arm_init_serror_injection(CPUState *cs) +{ + cap_has_inject_serror_esr = kvm_check_extension(cs->kvm_state, + KVM_CAP_ARM_INJECT_SERROR_ESR); +} + bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, int *fdarray, struct kvm_vcpu_init *init) @@ -522,6 +529,59 @@ int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) return 0; } +int kvm_put_vcpu_events(ARMCPU *cpu) +{ + CPUARMState *env = &cpu->env; + struct kvm_vcpu_events events; + int ret; + + if (!kvm_has_vcpu_events()) { + return 0; + } + + memset(&events, 0, sizeof(events)); + events.exception.serror_pending = env->serror.pending; + + /* Inject SError to guest with specified syndrome if host kernel + * supports it, otherwise inject SError without syndrome. + */ + if (cap_has_inject_serror_esr) { + events.exception.serror_has_esr = env->serror.has_esr; + events.exception.serror_esr = env->serror.esr; + } + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events); + if (ret) { + error_report("failed to put vcpu events"); + } + + return ret; +} + +int kvm_get_vcpu_events(ARMCPU *cpu) +{ + CPUARMState *env = &cpu->env; + struct kvm_vcpu_events events; + int ret; + + if (!kvm_has_vcpu_events()) { + return 0; + } + + memset(&events, 0, sizeof(events)); + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events); + if (ret) { + error_report("failed to get vcpu events"); + return ret; + } + + env->serror.pending = events.exception.serror_pending; + env->serror.has_esr = events.exception.serror_has_esr; + env->serror.esr = events.exception.serror_esr; + + return 0; +} + void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) { } diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c index 4e91c11796..0f1e94c7b5 100644 --- a/target/arm/kvm32.c +++ b/target/arm/kvm32.c @@ -217,6 +217,9 @@ int kvm_arch_init_vcpu(CPUState *cs) } cpu->mp_affinity = mpidr & ARM32_AFFINITY_MASK; + /* Check whether userspace can specify guest syndrome value */ + kvm_arm_init_serror_injection(cs); + return kvm_arm_init_cpreg_list(cpu); } @@ -358,6 +361,11 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } + ret = kvm_put_vcpu_events(cpu); + if (ret) { + return ret; + } + /* Note that we do not call write_cpustate_to_list() * here, so we are only writing the tuple list back to * KVM. This is safe because nothing can change the @@ -445,6 +453,11 @@ int kvm_arch_get_registers(CPUState *cs) } vfp_set_fpscr(env, fpscr); + ret = kvm_get_vcpu_events(cpu); + if (ret) { + return ret; + } + if (!write_kvmstate_to_list(cpu)) { return EINVAL; } diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index e0b8246283..5de8ff0ac5 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -546,6 +546,9 @@ int kvm_arch_init_vcpu(CPUState *cs) kvm_arm_init_debug(cs); + /* Check whether user space can specify guest syndrome value */ + kvm_arm_init_serror_injection(cs); + return kvm_arm_init_cpreg_list(cpu); } @@ -727,6 +730,11 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } + ret = kvm_put_vcpu_events(cpu); + if (ret) { + return ret; + } + if (!write_list_to_kvmstate(cpu, level)) { return EINVAL; } @@ -863,6 +871,11 @@ int kvm_arch_get_registers(CPUState *cs) } vfp_set_fpcr(env, fpr); + ret = kvm_get_vcpu_events(cpu); + if (ret) { + return ret; + } + if (!write_kvmstate_to_list(cpu)) { return EINVAL; } @@ -920,7 +933,7 @@ int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit) { - int hsr_ec = debug_exit->hsr >> ARM_EL_EC_SHIFT; + int hsr_ec = syn_get_ec(debug_exit->hsr); ARMCPU *cpu = ARM_CPU(cs); CPUClass *cc = CPU_GET_CLASS(cs); CPUARMState *env = &cpu->env; diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index 5948e8b560..21c0129da2 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -121,6 +121,30 @@ bool write_kvmstate_to_list(ARMCPU *cpu); */ void kvm_arm_reset_vcpu(ARMCPU *cpu); +/** + * kvm_arm_init_serror_injection: + * @cs: CPUState + * + * Check whether KVM can set guest SError syndrome. + */ +void kvm_arm_init_serror_injection(CPUState *cs); + +/** + * kvm_get_vcpu_events: + * @cpu: ARMCPU + * + * Get VCPU related state from kvm. + */ +int kvm_get_vcpu_events(ARMCPU *cpu); + +/** + * kvm_put_vcpu_events: + * @cpu: ARMCPU + * + * Put VCPU related state to kvm. + */ +int kvm_put_vcpu_events(ARMCPU *cpu); + #ifdef CONFIG_KVM /** * kvm_arm_create_scratch_host_vcpu: diff --git a/target/arm/machine.c b/target/arm/machine.c index ff4ec22bf7..239fe4e84d 100644 --- a/target/arm/machine.c +++ b/target/arm/machine.c @@ -131,9 +131,8 @@ static const VMStateDescription vmstate_iwmmxt = { static bool sve_needed(void *opaque) { ARMCPU *cpu = opaque; - CPUARMState *env = &cpu->env; - return arm_feature(env, ARM_FEATURE_SVE); + return cpu_isar_feature(aa64_sve, cpu); } /* The first two words of each Zreg is stored in VFP state. */ @@ -172,6 +171,27 @@ static const VMStateDescription vmstate_sve = { }; #endif /* AARCH64 */ +static bool serror_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + + return env->serror.pending != 0; +} + +static const VMStateDescription vmstate_serror = { + .name = "cpu/serror", + .version_id = 1, + .minimum_version_id = 1, + .needed = serror_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8(env.serror.pending, ARMCPU), + VMSTATE_UINT8(env.serror.has_esr, ARMCPU), + VMSTATE_UINT64(env.serror.esr, ARMCPU), + VMSTATE_END_OF_LIST() + } +}; + static bool m_needed(void *opaque) { ARMCPU *cpu = opaque; @@ -726,6 +746,7 @@ const VMStateDescription vmstate_arm_cpu = { #ifdef TARGET_AARCH64 &vmstate_sve, #endif + &vmstate_serror, NULL } }; diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c index d915579712..90741f6331 100644 --- a/target/arm/op_helper.c +++ b/target/arm/op_helper.c @@ -42,7 +42,7 @@ void raise_exception(CPUARMState *env, uint32_t excp, * (see DDI0478C.a D1.10.4) */ target_el = 2; - if (syndrome >> ARM_EL_EC_SHIFT == EC_ADVSIMDFPACCESSTRAP) { + if (syn_get_ec(syndrome) == EC_ADVSIMDFPACCESSTRAP) { syndrome = syn_uncategorized(); } } diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 8a24278d79..88195ab949 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -37,6 +37,7 @@ #include "trace-tcg.h" #include "translate-a64.h" +#include "qemu/atomic128.h" static TCGv_i64 cpu_X[32]; static TCGv_i64 cpu_pc; @@ -173,7 +174,7 @@ void aarch64_cpu_dump_state(CPUState *cs, FILE *f, cpu_fprintf(f, " FPCR=%08x FPSR=%08x\n", vfp_get_fpcr(env), vfp_get_fpsr(env)); - if (arm_feature(env, ARM_FEATURE_SVE) && sve_exception_el(env, el) == 0) { + if (cpu_isar_feature(aa64_sve, cpu) && sve_exception_el(env, el) == 0) { int j, zcr_len = sve_zcr_len_for_el(env, el); for (i = 0; i <= FFR_PRED_NUM; i++) { @@ -1200,25 +1201,23 @@ static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, /* Store from vector register to memory */ static void do_vec_st(DisasContext *s, int srcidx, int element, - TCGv_i64 tcg_addr, int size) + TCGv_i64 tcg_addr, int size, TCGMemOp endian) { - TCGMemOp memop = s->be_data + size; TCGv_i64 tcg_tmp = tcg_temp_new_i64(); read_vec_element(s, tcg_tmp, srcidx, element, size); - tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop); + tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size); tcg_temp_free_i64(tcg_tmp); } /* Load from memory to vector register */ static void do_vec_ld(DisasContext *s, int destidx, int element, - TCGv_i64 tcg_addr, int size) + TCGv_i64 tcg_addr, int size, TCGMemOp endian) { - TCGMemOp memop = s->be_data + size; TCGv_i64 tcg_tmp = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop); + tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size); write_vec_element(s, tcg_tmp, destidx, element, size); tcg_temp_free_i64(tcg_tmp); @@ -2086,26 +2085,27 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, get_mem_index(s), MO_64 | MO_ALIGN | s->be_data); tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); - } else if (s->be_data == MO_LE) { - if (tb_cflags(s->base.tb) & CF_PARALLEL) { + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (!HAVE_CMPXCHG128) { + gen_helper_exit_atomic(cpu_env); + s->base.is_jmp = DISAS_NORETURN; + } else if (s->be_data == MO_LE) { gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env, cpu_exclusive_addr, cpu_reg(s, rt), cpu_reg(s, rt2)); } else { - gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr, - cpu_reg(s, rt), cpu_reg(s, rt2)); - } - } else { - if (tb_cflags(s->base.tb) & CF_PARALLEL) { gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env, cpu_exclusive_addr, cpu_reg(s, rt), cpu_reg(s, rt2)); - } else { - gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr, - cpu_reg(s, rt), cpu_reg(s, rt2)); } + } else if (s->be_data == MO_LE) { + gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr, + cpu_reg(s, rt), cpu_reg(s, rt2)); + } else { + gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr, + cpu_reg(s, rt), cpu_reg(s, rt2)); } } else { tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, @@ -2175,14 +2175,18 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, } tcg_temp_free_i64(cmp); } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { - TCGv_i32 tcg_rs = tcg_const_i32(rs); - - if (s->be_data == MO_LE) { - gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2); + if (HAVE_CMPXCHG128) { + TCGv_i32 tcg_rs = tcg_const_i32(rs); + if (s->be_data == MO_LE) { + gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2); + } else { + gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2); + } + tcg_temp_free_i32(tcg_rs); } else { - gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2); + gen_helper_exit_atomic(cpu_env); + s->base.is_jmp = DISAS_NORETURN; } - tcg_temp_free_i32(tcg_rs); } else { TCGv_i64 d1 = tcg_temp_new_i64(); TCGv_i64 d2 = tcg_temp_new_i64(); @@ -2322,7 +2326,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) } if (rt2 == 31 && ((rt | rs) & 1) == 0 - && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + && dc_isar_feature(aa64_atomics, s)) { /* CASP / CASPL */ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); return; @@ -2344,7 +2348,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) } if (rt2 == 31 && ((rt | rs) & 1) == 0 - && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + && dc_isar_feature(aa64_atomics, s)) { /* CASPA / CASPAL */ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); return; @@ -2355,7 +2359,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) case 0xb: /* CASL */ case 0xe: /* CASA */ case 0xf: /* CASAL */ - if (rt2 == 31 && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) { gen_compare_and_swap(s, rs, rt, rn, size); return; } @@ -2894,11 +2898,10 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, int rs = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int o3_opc = extract32(insn, 12, 4); - int feature = ARM_FEATURE_V8_ATOMICS; TCGv_i64 tcg_rn, tcg_rs; AtomicThreeOpFn *fn; - if (is_vector) { + if (is_vector || !dc_isar_feature(aa64_atomics, s)) { unallocated_encoding(s); return; } @@ -2934,10 +2937,6 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, unallocated_encoding(s); return; } - if (!arm_dc_feature(s, feature)) { - unallocated_encoding(s); - return; - } if (rn == 31) { gen_check_sp_alignment(s); @@ -3017,10 +3016,11 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) bool is_store = !extract32(insn, 22, 1); bool is_postidx = extract32(insn, 23, 1); bool is_q = extract32(insn, 30, 1); - TCGv_i64 tcg_addr, tcg_rn; + TCGv_i64 tcg_addr, tcg_rn, tcg_ebytes; + TCGMemOp endian = s->be_data; - int ebytes = 1 << size; - int elements = (is_q ? 128 : 64) / (8 << size); + int ebytes; /* bytes per element */ + int elements; /* elements per vector */ int rpt; /* num iterations */ int selem; /* structure elements */ int r; @@ -3079,39 +3079,55 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) gen_check_sp_alignment(s); } + /* For our purposes, bytes are always little-endian. */ + if (size == 0) { + endian = MO_LE; + } + + /* Consecutive little-endian elements from a single register + * can be promoted to a larger little-endian operation. + */ + if (selem == 1 && endian == MO_LE) { + size = 3; + } + ebytes = 1 << size; + elements = (is_q ? 16 : 8) / ebytes; + tcg_rn = cpu_reg_sp(s, rn); tcg_addr = tcg_temp_new_i64(); tcg_gen_mov_i64(tcg_addr, tcg_rn); + tcg_ebytes = tcg_const_i64(ebytes); for (r = 0; r < rpt; r++) { int e; for (e = 0; e < elements; e++) { - int tt = (rt + r) % 32; int xs; for (xs = 0; xs < selem; xs++) { + int tt = (rt + r + xs) % 32; if (is_store) { - do_vec_st(s, tt, e, tcg_addr, size); + do_vec_st(s, tt, e, tcg_addr, size, endian); } else { - do_vec_ld(s, tt, e, tcg_addr, size); - - /* For non-quad operations, setting a slice of the low - * 64 bits of the register clears the high 64 bits (in - * the ARM ARM pseudocode this is implicit in the fact - * that 'rval' is a 64 bit wide variable). - * For quad operations, we might still need to zero the - * high bits of SVE. We optimize by noticing that we only - * need to do this the first time we touch a register. - */ - if (e == 0 && (r == 0 || xs == selem - 1)) { - clear_vec_high(s, is_q, tt); - } + do_vec_ld(s, tt, e, tcg_addr, size, endian); } - tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes); - tt = (tt + 1) % 32; + tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_ebytes); } } } + if (!is_store) { + /* For non-quad operations, setting a slice of the low + * 64 bits of the register clears the high 64 bits (in + * the ARM ARM pseudocode this is implicit in the fact + * that 'rval' is a 64 bit wide variable). + * For quad operations, we might still need to zero the + * high bits of SVE. + */ + for (r = 0; r < rpt * selem; r++) { + int tt = (rt + r) % 32; + clear_vec_high(s, is_q, tt); + } + } + if (is_postidx) { int rm = extract32(insn, 16, 5); if (rm == 31) { @@ -3120,6 +3136,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); } } + tcg_temp_free_i64(tcg_ebytes); tcg_temp_free_i64(tcg_addr); } @@ -3162,7 +3179,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) bool replicate = false; int index = is_q << 3 | S << 2 | size; int ebytes, xs; - TCGv_i64 tcg_addr, tcg_rn; + TCGv_i64 tcg_addr, tcg_rn, tcg_ebytes; switch (scale) { case 3: @@ -3215,49 +3232,28 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) tcg_rn = cpu_reg_sp(s, rn); tcg_addr = tcg_temp_new_i64(); tcg_gen_mov_i64(tcg_addr, tcg_rn); + tcg_ebytes = tcg_const_i64(ebytes); for (xs = 0; xs < selem; xs++) { if (replicate) { /* Load and replicate to all elements */ - uint64_t mulconst; TCGv_i64 tcg_tmp = tcg_temp_new_i64(); tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), s->be_data + scale); - switch (scale) { - case 0: - mulconst = 0x0101010101010101ULL; - break; - case 1: - mulconst = 0x0001000100010001ULL; - break; - case 2: - mulconst = 0x0000000100000001ULL; - break; - case 3: - mulconst = 0; - break; - default: - g_assert_not_reached(); - } - if (mulconst) { - tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst); - } - write_vec_element(s, tcg_tmp, rt, 0, MO_64); - if (is_q) { - write_vec_element(s, tcg_tmp, rt, 1, MO_64); - } + tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt), + (is_q + 1) * 8, vec_full_reg_size(s), + tcg_tmp); tcg_temp_free_i64(tcg_tmp); - clear_vec_high(s, is_q, rt); } else { /* Load/store one element per register */ if (is_load) { - do_vec_ld(s, rt, index, tcg_addr, scale); + do_vec_ld(s, rt, index, tcg_addr, scale, s->be_data); } else { - do_vec_st(s, rt, index, tcg_addr, scale); + do_vec_st(s, rt, index, tcg_addr, scale, s->be_data); } } - tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes); + tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_ebytes); rt = (rt + 1) % 32; } @@ -3269,6 +3265,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); } } + tcg_temp_free_i64(tcg_ebytes); tcg_temp_free_i64(tcg_addr); } @@ -4568,7 +4565,7 @@ static void handle_crc32(DisasContext *s, TCGv_i64 tcg_acc, tcg_val; TCGv_i32 tcg_bytes; - if (!arm_dc_feature(s, ARM_FEATURE_CRC) + if (!dc_isar_feature(aa64_crc32, s) || (sf == 1 && sz != 3) || (sf == 0 && sz == 3)) { unallocated_encoding(s); @@ -4810,7 +4807,7 @@ static void disas_fp_compare(DisasContext *s, uint32_t insn) break; case 3: size = MO_16; - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -4861,7 +4858,7 @@ static void disas_fp_ccomp(DisasContext *s, uint32_t insn) break; case 3: size = MO_16; - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -4927,7 +4924,7 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn) break; case 3: sz = MO_16; - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -5260,7 +5257,7 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn) handle_fp_1src_double(s, opcode, rd, rn); break; case 3: - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -5475,7 +5472,7 @@ static void disas_fp_2src(DisasContext *s, uint32_t insn) handle_fp_2src_double(s, opcode, rd, rn, rm); break; case 3: - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -5633,7 +5630,7 @@ static void disas_fp_3src(DisasContext *s, uint32_t insn) handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra); break; case 3: - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -5703,7 +5700,7 @@ static void disas_fp_imm(DisasContext *s, uint32_t insn) break; case 3: sz = MO_16; - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -5928,7 +5925,7 @@ static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn) case 1: /* float64 */ break; case 3: /* float16 */ - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -6058,7 +6055,7 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn) break; case 0x6: /* 16-bit float, 32-bit int */ case 0xe: /* 16-bit float, 64-bit int */ - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -6085,7 +6082,7 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn) case 1: /* float64 */ break; case 3: /* float16 */ - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -6522,7 +6519,7 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) */ is_min = extract32(size, 1, 1); is_fp = true; - if (!is_u && arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!is_u && dc_isar_feature(aa64_fp16, s)) { size = 1; } else if (!is_u || !is_q || extract32(size, 0, 1)) { unallocated_encoding(s); @@ -6918,7 +6915,7 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) { /* Check for FMOV (vector, immediate) - half-precision */ - if (!(arm_dc_feature(s, ARM_FEATURE_V8_FP16) && o2 && cmode == 0xf)) { + if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) { unallocated_encoding(s); return; } @@ -7085,7 +7082,7 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) case 0x2f: /* FMINP */ /* FP op, size[0] is 32 or 64 bit*/ if (!u) { - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } else { @@ -7730,7 +7727,7 @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, size = MO_32; } else if (immh & 2) { size = MO_16; - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -7775,7 +7772,7 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, size = MO_32; } else if (immh & 0x2) { size = MO_16; - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -8040,28 +8037,6 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) } } -/* CMTST : test is "if (X & Y != 0)". */ -static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_and_i32(d, a, b); - tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0); - tcg_gen_neg_i32(d, d); -} - -static void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_and_i64(d, a, b); - tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0); - tcg_gen_neg_i64(d, d); -} - -static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_and_vec(vece, d, a, b); - tcg_gen_dupi_vec(vece, a, 0); - tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); -} - static void handle_3same_64(DisasContext *s, int opcode, bool u, TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm) { @@ -8539,7 +8514,7 @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s, return; } - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); } @@ -8612,7 +8587,7 @@ static void disas_simd_scalar_three_reg_same_extra(DisasContext *s, bool u = extract32(insn, 29, 1); TCGv_i32 ele1, ele2, ele3; TCGv_i64 res; - int feature; + bool feature; switch (u * 16 + opcode) { case 0x10: /* SQRDMLAH (vector) */ @@ -8621,13 +8596,13 @@ static void disas_simd_scalar_three_reg_same_extra(DisasContext *s, unallocated_encoding(s); return; } - feature = ARM_FEATURE_V8_RDM; + feature = dc_isar_feature(aa64_rdm, s); break; default: unallocated_encoding(s); return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -9401,191 +9376,10 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) } } -static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_sar8i_i64(a, a, shift); - tcg_gen_vec_add8_i64(d, d, a); -} - -static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_sar16i_i64(a, a, shift); - tcg_gen_vec_add16_i64(d, d, a); -} - -static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_sari_i32(a, a, shift); - tcg_gen_add_i32(d, d, a); -} - -static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_sari_i64(a, a, shift); - tcg_gen_add_i64(d, d, a); -} - -static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - tcg_gen_sari_vec(vece, a, a, sh); - tcg_gen_add_vec(vece, d, d, a); -} - -static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_shr8i_i64(a, a, shift); - tcg_gen_vec_add8_i64(d, d, a); -} - -static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_shr16i_i64(a, a, shift); - tcg_gen_vec_add16_i64(d, d, a); -} - -static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_shri_i32(a, a, shift); - tcg_gen_add_i32(d, d, a); -} - -static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_shri_i64(a, a, shift); - tcg_gen_add_i64(d, d, a); -} - -static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - tcg_gen_shri_vec(vece, a, a, sh); - tcg_gen_add_vec(vece, d, d, a); -} - -static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_8, 0xff >> shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shri_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); -} - -static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_16, 0xffff >> shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shri_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); -} - -static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_shri_i32(a, a, shift); - tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); -} - -static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_shri_i64(a, a, shift); - tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); -} - -static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - uint64_t mask = (2ull << ((8 << vece) - 1)) - 1; - TCGv_vec t = tcg_temp_new_vec_matching(d); - TCGv_vec m = tcg_temp_new_vec_matching(d); - - tcg_gen_dupi_vec(vece, m, mask ^ (mask >> sh)); - tcg_gen_shri_vec(vece, t, a, sh); - tcg_gen_and_vec(vece, d, d, m); - tcg_gen_or_vec(vece, d, d, t); - - tcg_temp_free_vec(t); - tcg_temp_free_vec(m); -} - /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, int immh, int immb, int opcode, int rn, int rd) { - static const GVecGen2i ssra_op[4] = { - { .fni8 = gen_ssra8_i64, - .fniv = gen_ssra_vec, - .load_dest = true, - .opc = INDEX_op_sari_vec, - .vece = MO_8 }, - { .fni8 = gen_ssra16_i64, - .fniv = gen_ssra_vec, - .load_dest = true, - .opc = INDEX_op_sari_vec, - .vece = MO_16 }, - { .fni4 = gen_ssra32_i32, - .fniv = gen_ssra_vec, - .load_dest = true, - .opc = INDEX_op_sari_vec, - .vece = MO_32 }, - { .fni8 = gen_ssra64_i64, - .fniv = gen_ssra_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opc = INDEX_op_sari_vec, - .vece = MO_64 }, - }; - static const GVecGen2i usra_op[4] = { - { .fni8 = gen_usra8_i64, - .fniv = gen_usra_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_8, }, - { .fni8 = gen_usra16_i64, - .fniv = gen_usra_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_16, }, - { .fni4 = gen_usra32_i32, - .fniv = gen_usra_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_32, }, - { .fni8 = gen_usra64_i64, - .fniv = gen_usra_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_64, }, - }; - static const GVecGen2i sri_op[4] = { - { .fni8 = gen_shr8_ins_i64, - .fniv = gen_shr_ins_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_8 }, - { .fni8 = gen_shr16_ins_i64, - .fniv = gen_shr_ins_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_16 }, - { .fni4 = gen_shr32_ins_i32, - .fniv = gen_shr_ins_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_32 }, - { .fni8 = gen_shr64_ins_i64, - .fniv = gen_shr_ins_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_64 }, - }; - int size = 32 - clz32(immh) - 1; int immhb = immh << 3 | immb; int shift = 2 * (8 << size) - immhb; @@ -9681,85 +9475,10 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, clear_vec_high(s, is_q, rd); } -static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_8, 0xff << shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shli_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); -} - -static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_16, 0xffff << shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shli_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); -} - -static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); -} - -static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); -} - -static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - uint64_t mask = (1ull << sh) - 1; - TCGv_vec t = tcg_temp_new_vec_matching(d); - TCGv_vec m = tcg_temp_new_vec_matching(d); - - tcg_gen_dupi_vec(vece, m, mask); - tcg_gen_shli_vec(vece, t, a, sh); - tcg_gen_and_vec(vece, d, d, m); - tcg_gen_or_vec(vece, d, d, t); - - tcg_temp_free_vec(t); - tcg_temp_free_vec(m); -} - /* SHL/SLI - Vector shift left */ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, int immh, int immb, int opcode, int rn, int rd) { - static const GVecGen2i shi_op[4] = { - { .fni8 = gen_shl8_ins_i64, - .fniv = gen_shl_ins_vec, - .opc = INDEX_op_shli_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_8 }, - { .fni8 = gen_shl16_ins_i64, - .fniv = gen_shl_ins_vec, - .opc = INDEX_op_shli_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_shl32_ins_i32, - .fniv = gen_shl_ins_vec, - .opc = INDEX_op_shli_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_shl64_ins_i64, - .fniv = gen_shl_ins_vec, - .opc = INDEX_op_shli_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_64 }, - }; int size = 32 - clz32(immh) - 1; int immhb = immh << 3 | immb; int shift = immhb - (8 << size); @@ -9779,7 +9498,7 @@ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, } if (insert) { - gen_gvec_op2i(s, is_q, rd, rn, shift, &shi_op[size]); + gen_gvec_op2i(s, is_q, rd, rn, shift, &sli_op[size]); } else { gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size); } @@ -10356,7 +10075,7 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) return; } if (size == 3) { - if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) { + if (!dc_isar_feature(aa64_pmull, s)) { unallocated_encoding(s); return; } @@ -10401,70 +10120,9 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) } } -static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) -{ - tcg_gen_xor_i64(rn, rn, rm); - tcg_gen_and_i64(rn, rn, rd); - tcg_gen_xor_i64(rd, rm, rn); -} - -static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) -{ - tcg_gen_xor_i64(rn, rn, rd); - tcg_gen_and_i64(rn, rn, rm); - tcg_gen_xor_i64(rd, rd, rn); -} - -static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) -{ - tcg_gen_xor_i64(rn, rn, rd); - tcg_gen_andc_i64(rn, rn, rm); - tcg_gen_xor_i64(rd, rd, rn); -} - -static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) -{ - tcg_gen_xor_vec(vece, rn, rn, rm); - tcg_gen_and_vec(vece, rn, rn, rd); - tcg_gen_xor_vec(vece, rd, rm, rn); -} - -static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) -{ - tcg_gen_xor_vec(vece, rn, rn, rd); - tcg_gen_and_vec(vece, rn, rn, rm); - tcg_gen_xor_vec(vece, rd, rd, rn); -} - -static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) -{ - tcg_gen_xor_vec(vece, rn, rn, rd); - tcg_gen_andc_vec(vece, rn, rn, rm); - tcg_gen_xor_vec(vece, rd, rd, rn); -} - /* Logic op (opcode == 3) subgroup of C3.6.16. */ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) { - static const GVecGen3 bsl_op = { - .fni8 = gen_bsl_i64, - .fniv = gen_bsl_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true - }; - static const GVecGen3 bit_op = { - .fni8 = gen_bit_i64, - .fniv = gen_bit_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true - }; - static const GVecGen3 bif_op = { - .fni8 = gen_bif_i64, - .fniv = gen_bif_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true - }; - int rd = extract32(insn, 0, 5); int rn = extract32(insn, 5, 5); int rm = extract32(insn, 16, 5); @@ -10736,131 +10394,9 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn) } } -static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u8(a, a, b); - gen_helper_neon_add_u8(d, d, a); -} - -static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u16(a, a, b); - gen_helper_neon_add_u16(d, d, a); -} - -static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_mul_i32(a, a, b); - tcg_gen_add_i32(d, d, a); -} - -static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_mul_i64(a, a, b); - tcg_gen_add_i64(d, d, a); -} - -static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_mul_vec(vece, a, a, b); - tcg_gen_add_vec(vece, d, d, a); -} - -static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u8(a, a, b); - gen_helper_neon_sub_u8(d, d, a); -} - -static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u16(a, a, b); - gen_helper_neon_sub_u16(d, d, a); -} - -static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_mul_i32(a, a, b); - tcg_gen_sub_i32(d, d, a); -} - -static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_mul_i64(a, a, b); - tcg_gen_sub_i64(d, d, a); -} - -static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_mul_vec(vece, a, a, b); - tcg_gen_sub_vec(vece, d, d, a); -} - /* Integer op subgroup of C3.6.16. */ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) { - static const GVecGen3 cmtst_op[4] = { - { .fni4 = gen_helper_neon_tst_u8, - .fniv = gen_cmtst_vec, - .vece = MO_8 }, - { .fni4 = gen_helper_neon_tst_u16, - .fniv = gen_cmtst_vec, - .vece = MO_16 }, - { .fni4 = gen_cmtst_i32, - .fniv = gen_cmtst_vec, - .vece = MO_32 }, - { .fni8 = gen_cmtst_i64, - .fniv = gen_cmtst_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .vece = MO_64 }, - }; - static const GVecGen3 mla_op[4] = { - { .fni4 = gen_mla8_i32, - .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_8 }, - { .fni4 = gen_mla16_i32, - .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_mla32_i32, - .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_mla64_i64, - .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_64 }, - }; - static const GVecGen3 mls_op[4] = { - { .fni4 = gen_mls8_i32, - .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_8 }, - { .fni4 = gen_mls16_i32, - .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_mls32_i32, - .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_mls64_i64, - .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_64 }, - }; - int is_q = extract32(insn, 30, 1); int u = extract32(insn, 29, 1); int size = extract32(insn, 22, 2); @@ -11220,7 +10756,7 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) TCGv_ptr fpst; bool pairwise = false; - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -11408,7 +10944,8 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) int size = extract32(insn, 22, 2); bool u = extract32(insn, 29, 1); bool is_q = extract32(insn, 30, 1); - int feature, rot; + bool feature; + int rot; switch (u * 16 + opcode) { case 0x10: /* SQRDMLAH (vector) */ @@ -11417,7 +10954,7 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - feature = ARM_FEATURE_V8_RDM; + feature = dc_isar_feature(aa64_rdm, s); break; case 0x02: /* SDOT (vector) */ case 0x12: /* UDOT (vector) */ @@ -11425,7 +10962,7 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - feature = ARM_FEATURE_V8_DOTPROD; + feature = dc_isar_feature(aa64_dp, s); break; case 0x18: /* FCMLA, #0 */ case 0x19: /* FCMLA, #90 */ @@ -11434,18 +10971,18 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) case 0x1c: /* FCADD, #90 */ case 0x1e: /* FCADD, #270 */ if (size == 0 - || (size == 1 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16)) + || (size == 1 && !dc_isar_feature(aa64_fp16, s)) || (size == 3 && !is_q)) { unallocated_encoding(s); return; } - feature = ARM_FEATURE_V8_FCMA; + feature = dc_isar_feature(aa64_fcma, s); break; default: unallocated_encoding(s); return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -12314,7 +11851,7 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) bool need_fpst = true; int rmode; - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -12659,14 +12196,14 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) break; case 0x1d: /* SQRDMLAH */ case 0x1f: /* SQRDMLSH */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_RDM)) { + if (!dc_isar_feature(aa64_rdm, s)) { unallocated_encoding(s); return; } break; case 0x0e: /* SDOT */ case 0x1e: /* UDOT */ - if (size != MO_32 || !arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) { + if (size != MO_32 || !dc_isar_feature(aa64_dp, s)) { unallocated_encoding(s); return; } @@ -12675,7 +12212,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) case 0x13: /* FCMLA #90 */ case 0x15: /* FCMLA #180 */ case 0x17: /* FCMLA #270 */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) { + if (!dc_isar_feature(aa64_fcma, s)) { unallocated_encoding(s); return; } @@ -12731,7 +12268,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } break; } - if (is_fp16 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -13202,8 +12739,7 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) TCGv_i32 tcg_decrypt; CryptoThreeOpIntFn *genfn; - if (!arm_dc_feature(s, ARM_FEATURE_V8_AES) - || size != 0) { + if (!dc_isar_feature(aa64_aes, s) || size != 0) { unallocated_encoding(s); return; } @@ -13260,7 +12796,7 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) int rd = extract32(insn, 0, 5); CryptoThreeOpFn *genfn; TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; - int feature = ARM_FEATURE_V8_SHA256; + bool feature; if (size != 0) { unallocated_encoding(s); @@ -13273,23 +12809,26 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) case 2: /* SHA1M */ case 3: /* SHA1SU0 */ genfn = NULL; - feature = ARM_FEATURE_V8_SHA1; + feature = dc_isar_feature(aa64_sha1, s); break; case 4: /* SHA256H */ genfn = gen_helper_crypto_sha256h; + feature = dc_isar_feature(aa64_sha256, s); break; case 5: /* SHA256H2 */ genfn = gen_helper_crypto_sha256h2; + feature = dc_isar_feature(aa64_sha256, s); break; case 6: /* SHA256SU1 */ genfn = gen_helper_crypto_sha256su1; + feature = dc_isar_feature(aa64_sha256, s); break; default: unallocated_encoding(s); return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -13330,7 +12869,7 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); CryptoTwoOpFn *genfn; - int feature; + bool feature; TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; if (size != 0) { @@ -13340,15 +12879,15 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) switch (opcode) { case 0: /* SHA1H */ - feature = ARM_FEATURE_V8_SHA1; + feature = dc_isar_feature(aa64_sha1, s); genfn = gen_helper_crypto_sha1h; break; case 1: /* SHA1SU1 */ - feature = ARM_FEATURE_V8_SHA1; + feature = dc_isar_feature(aa64_sha1, s); genfn = gen_helper_crypto_sha1su1; break; case 2: /* SHA256SU0 */ - feature = ARM_FEATURE_V8_SHA256; + feature = dc_isar_feature(aa64_sha256, s); genfn = gen_helper_crypto_sha256su0; break; default: @@ -13356,7 +12895,7 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -13387,40 +12926,40 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) int rm = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - int feature; + bool feature; CryptoThreeOpFn *genfn; if (o == 0) { switch (opcode) { case 0: /* SHA512H */ - feature = ARM_FEATURE_V8_SHA512; + feature = dc_isar_feature(aa64_sha512, s); genfn = gen_helper_crypto_sha512h; break; case 1: /* SHA512H2 */ - feature = ARM_FEATURE_V8_SHA512; + feature = dc_isar_feature(aa64_sha512, s); genfn = gen_helper_crypto_sha512h2; break; case 2: /* SHA512SU1 */ - feature = ARM_FEATURE_V8_SHA512; + feature = dc_isar_feature(aa64_sha512, s); genfn = gen_helper_crypto_sha512su1; break; case 3: /* RAX1 */ - feature = ARM_FEATURE_V8_SHA3; + feature = dc_isar_feature(aa64_sha3, s); genfn = NULL; break; } } else { switch (opcode) { case 0: /* SM3PARTW1 */ - feature = ARM_FEATURE_V8_SM3; + feature = dc_isar_feature(aa64_sm3, s); genfn = gen_helper_crypto_sm3partw1; break; case 1: /* SM3PARTW2 */ - feature = ARM_FEATURE_V8_SM3; + feature = dc_isar_feature(aa64_sm3, s); genfn = gen_helper_crypto_sm3partw2; break; case 2: /* SM4EKEY */ - feature = ARM_FEATURE_V8_SM4; + feature = dc_isar_feature(aa64_sm4, s); genfn = gen_helper_crypto_sm4ekey; break; default: @@ -13429,7 +12968,7 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) } } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -13488,16 +13027,16 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; - int feature; + bool feature; CryptoTwoOpFn *genfn; switch (opcode) { case 0: /* SHA512SU0 */ - feature = ARM_FEATURE_V8_SHA512; + feature = dc_isar_feature(aa64_sha512, s); genfn = gen_helper_crypto_sha512su0; break; case 1: /* SM4E */ - feature = ARM_FEATURE_V8_SM4; + feature = dc_isar_feature(aa64_sm4, s); genfn = gen_helper_crypto_sm4e; break; default: @@ -13505,7 +13044,7 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -13536,22 +13075,22 @@ static void disas_crypto_four_reg(DisasContext *s, uint32_t insn) int ra = extract32(insn, 10, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - int feature; + bool feature; switch (op0) { case 0: /* EOR3 */ case 1: /* BCAX */ - feature = ARM_FEATURE_V8_SHA3; + feature = dc_isar_feature(aa64_sha3, s); break; case 2: /* SM3SS1 */ - feature = ARM_FEATURE_V8_SM3; + feature = dc_isar_feature(aa64_sm3, s); break; default: unallocated_encoding(s); return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -13638,7 +13177,7 @@ static void disas_crypto_xar(DisasContext *s, uint32_t insn) TCGv_i64 tcg_op1, tcg_op2, tcg_res[2]; int pass; - if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA3)) { + if (!dc_isar_feature(aa64_sha3, s)) { unallocated_encoding(s); return; } @@ -13684,7 +13223,7 @@ static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; TCGv_i32 tcg_imm2, tcg_opcode; - if (!arm_dc_feature(s, ARM_FEATURE_V8_SM3)) { + if (!dc_isar_feature(aa64_sm3, s)) { unallocated_encoding(s); return; } @@ -13792,7 +13331,7 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s) unallocated_encoding(s); break; case 0x2: - if (!arm_dc_feature(s, ARM_FEATURE_SVE) || !disas_sve(s, insn)) { + if (!dc_isar_feature(aa64_sve, s) || !disas_sve(s, insn)) { unallocated_encoding(s); } break; @@ -13833,6 +13372,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, ARMCPU *arm_cpu = arm_env_get_cpu(env); int bound; + dc->isar = &arm_cpu->isar; dc->pc = dc->base.pc_first; dc->condjmp = 0; @@ -13896,7 +13436,6 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) { - tcg_clear_temp_count(); } static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) diff --git a/target/arm/translate.c b/target/arm/translate.c index 1b4bacb522..7c4675ffd8 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -42,7 +42,7 @@ #define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5) /* currently all emulated v5 cores are also v5TE, so don't bother */ #define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5) -#define ENABLE_ARCH_5J arm_dc_feature(s, ARM_FEATURE_JAZELLE) +#define ENABLE_ARCH_5J dc_isar_feature(jazelle, s) #define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6) #define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K) #define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2) @@ -72,7 +72,7 @@ static TCGv_i64 cpu_F0d, cpu_F1d; #include "exec/gen-icount.h" -static const char *regnames[] = +static const char * const regnames[] = { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" }; @@ -1585,6 +1585,25 @@ neon_reg_offset (int reg, int n) return vfp_reg_offset(0, sreg); } +/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE, + * where 0 is the least significant end of the register. + */ +static inline long +neon_element_offset(int reg, int element, TCGMemOp size) +{ + int element_size = 1 << size; + int ofs = element * element_size; +#ifdef HOST_WORDS_BIGENDIAN + /* Calculate the offset assuming fully little-endian, + * then XOR to account for the order of the 8-byte units. + */ + if (element_size < 8) { + ofs ^= 8 - element_size; + } +#endif + return neon_reg_offset(reg, 0) + ofs; +} + static TCGv_i32 neon_load_reg(int reg, int pass) { TCGv_i32 tmp = tcg_temp_new_i32(); @@ -1592,12 +1611,94 @@ static TCGv_i32 neon_load_reg(int reg, int pass) return tmp; } +static void neon_load_element(TCGv_i32 var, int reg, int ele, TCGMemOp mop) +{ + long offset = neon_element_offset(reg, ele, mop & MO_SIZE); + + switch (mop) { + case MO_UB: + tcg_gen_ld8u_i32(var, cpu_env, offset); + break; + case MO_UW: + tcg_gen_ld16u_i32(var, cpu_env, offset); + break; + case MO_UL: + tcg_gen_ld_i32(var, cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + +static void neon_load_element64(TCGv_i64 var, int reg, int ele, TCGMemOp mop) +{ + long offset = neon_element_offset(reg, ele, mop & MO_SIZE); + + switch (mop) { + case MO_UB: + tcg_gen_ld8u_i64(var, cpu_env, offset); + break; + case MO_UW: + tcg_gen_ld16u_i64(var, cpu_env, offset); + break; + case MO_UL: + tcg_gen_ld32u_i64(var, cpu_env, offset); + break; + case MO_Q: + tcg_gen_ld_i64(var, cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + static void neon_store_reg(int reg, int pass, TCGv_i32 var) { tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass)); tcg_temp_free_i32(var); } +static void neon_store_element(int reg, int ele, TCGMemOp size, TCGv_i32 var) +{ + long offset = neon_element_offset(reg, ele, size); + + switch (size) { + case MO_8: + tcg_gen_st8_i32(var, cpu_env, offset); + break; + case MO_16: + tcg_gen_st16_i32(var, cpu_env, offset); + break; + case MO_32: + tcg_gen_st_i32(var, cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + +static void neon_store_element64(int reg, int ele, TCGMemOp size, TCGv_i64 var) +{ + long offset = neon_element_offset(reg, ele, size); + + switch (size) { + case MO_8: + tcg_gen_st8_i64(var, cpu_env, offset); + break; + case MO_16: + tcg_gen_st16_i64(var, cpu_env, offset); + break; + case MO_32: + tcg_gen_st32_i64(var, cpu_env, offset); + break; + case MO_64: + tcg_gen_st_i64(var, cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + static inline void neon_load_reg64(TCGv_i64 var, int reg) { tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg)); @@ -2974,19 +3075,6 @@ static void gen_vfp_msr(TCGv_i32 tmp) tcg_temp_free_i32(tmp); } -static void gen_neon_dup_u8(TCGv_i32 var, int shift) -{ - TCGv_i32 tmp = tcg_temp_new_i32(); - if (shift) - tcg_gen_shri_i32(var, var, shift); - tcg_gen_ext8u_i32(var, var); - tcg_gen_shli_i32(tmp, var, 8); - tcg_gen_or_i32(var, var, tmp); - tcg_gen_shli_i32(tmp, var, 16); - tcg_gen_or_i32(var, var, tmp); - tcg_temp_free_i32(tmp); -} - static void gen_neon_dup_low16(TCGv_i32 var) { TCGv_i32 tmp = tcg_temp_new_i32(); @@ -3005,28 +3093,6 @@ static void gen_neon_dup_high16(TCGv_i32 var) tcg_temp_free_i32(tmp); } -static TCGv_i32 gen_load_and_replicate(DisasContext *s, TCGv_i32 addr, int size) -{ - /* Load a single Neon element and replicate into a 32 bit TCG reg */ - TCGv_i32 tmp = tcg_temp_new_i32(); - switch (size) { - case 0: - gen_aa32_ld8u(s, tmp, addr, get_mem_index(s)); - gen_neon_dup_u8(tmp, 0); - break; - case 1: - gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); - gen_neon_dup_low16(tmp); - break; - case 2: - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - break; - default: /* Avoid compiler warnings. */ - abort(); - } - return tmp; -} - static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm, uint32_t dp) { @@ -3432,17 +3498,10 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn) tmp = load_reg(s, rd); if (insn & (1 << 23)) { /* VDUP */ - if (size == 0) { - gen_neon_dup_u8(tmp, 0); - } else if (size == 1) { - gen_neon_dup_low16(tmp); - } - for (n = 0; n <= pass * 2; n++) { - tmp2 = tcg_temp_new_i32(); - tcg_gen_mov_i32(tmp2, tmp); - neon_store_reg(rn, n, tmp2); - } - neon_store_reg(rn, n, tmp); + int vec_size = pass ? 16 : 8; + tcg_gen_gvec_dup_i32(size, neon_reg_offset(rn, 0), + vec_size, vec_size, tmp); + tcg_temp_free_i32(tmp); } else { /* VMOV */ switch (size) { @@ -4907,17 +4966,17 @@ static struct { int nregs; int interleave; int spacing; -} neon_ls_element_type[11] = { - {4, 4, 1}, - {4, 4, 2}, +} const neon_ls_element_type[11] = { + {1, 4, 1}, + {1, 4, 2}, {4, 1, 1}, - {4, 2, 1}, - {3, 3, 1}, - {3, 3, 2}, + {2, 2, 2}, + {1, 3, 1}, + {1, 3, 2}, {3, 1, 1}, {1, 1, 1}, - {2, 2, 1}, - {2, 2, 2}, + {1, 2, 1}, + {1, 2, 2}, {2, 1, 1} }; @@ -4933,10 +4992,11 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) int stride; int size; int reg; - int pass; int load; - int shift; int n; + int vec_size; + int mmu_idx; + TCGMemOp endian; TCGv_i32 addr; TCGv_i32 tmp; TCGv_i32 tmp2; @@ -4948,7 +5008,7 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) */ if (s->fp_excp_el) { gen_exception_insn(s, 4, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); return 0; } @@ -4958,6 +5018,8 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) rn = (insn >> 16) & 0xf; rm = insn & 0xf; load = (insn & (1 << 21)) != 0; + endian = s->be_data; + mmu_idx = get_mem_index(s); if ((insn & (1 << 23)) == 0) { /* Load store all elements. */ op = (insn >> 8) & 0xf; @@ -4982,104 +5044,44 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) nregs = neon_ls_element_type[op].nregs; interleave = neon_ls_element_type[op].interleave; spacing = neon_ls_element_type[op].spacing; - if (size == 3 && (interleave | spacing) != 1) + if (size == 3 && (interleave | spacing) != 1) { return 1; + } + /* For our purposes, bytes are always little-endian. */ + if (size == 0) { + endian = MO_LE; + } + /* Consecutive little-endian elements from a single register + * can be promoted to a larger little-endian operation. + */ + if (interleave == 1 && endian == MO_LE) { + size = 3; + } + tmp64 = tcg_temp_new_i64(); addr = tcg_temp_new_i32(); + tmp2 = tcg_const_i32(1 << size); load_reg_var(s, addr, rn); - stride = (1 << size) * interleave; for (reg = 0; reg < nregs; reg++) { - if (interleave > 2 || (interleave == 2 && nregs == 2)) { - load_reg_var(s, addr, rn); - tcg_gen_addi_i32(addr, addr, (1 << size) * reg); - } else if (interleave == 2 && nregs == 4 && reg == 2) { - load_reg_var(s, addr, rn); - tcg_gen_addi_i32(addr, addr, 1 << size); - } - if (size == 3) { - tmp64 = tcg_temp_new_i64(); - if (load) { - gen_aa32_ld64(s, tmp64, addr, get_mem_index(s)); - neon_store_reg64(tmp64, rd); - } else { - neon_load_reg64(tmp64, rd); - gen_aa32_st64(s, tmp64, addr, get_mem_index(s)); - } - tcg_temp_free_i64(tmp64); - tcg_gen_addi_i32(addr, addr, stride); - } else { - for (pass = 0; pass < 2; pass++) { - if (size == 2) { - if (load) { - tmp = tcg_temp_new_i32(); - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - neon_store_reg(rd, pass, tmp); - } else { - tmp = neon_load_reg(rd, pass); - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); - tcg_temp_free_i32(tmp); - } - tcg_gen_addi_i32(addr, addr, stride); - } else if (size == 1) { - if (load) { - tmp = tcg_temp_new_i32(); - gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); - tcg_gen_addi_i32(addr, addr, stride); - tmp2 = tcg_temp_new_i32(); - gen_aa32_ld16u(s, tmp2, addr, get_mem_index(s)); - tcg_gen_addi_i32(addr, addr, stride); - tcg_gen_shli_i32(tmp2, tmp2, 16); - tcg_gen_or_i32(tmp, tmp, tmp2); - tcg_temp_free_i32(tmp2); - neon_store_reg(rd, pass, tmp); - } else { - tmp = neon_load_reg(rd, pass); - tmp2 = tcg_temp_new_i32(); - tcg_gen_shri_i32(tmp2, tmp, 16); - gen_aa32_st16(s, tmp, addr, get_mem_index(s)); - tcg_temp_free_i32(tmp); - tcg_gen_addi_i32(addr, addr, stride); - gen_aa32_st16(s, tmp2, addr, get_mem_index(s)); - tcg_temp_free_i32(tmp2); - tcg_gen_addi_i32(addr, addr, stride); - } - } else /* size == 0 */ { - if (load) { - tmp2 = NULL; - for (n = 0; n < 4; n++) { - tmp = tcg_temp_new_i32(); - gen_aa32_ld8u(s, tmp, addr, get_mem_index(s)); - tcg_gen_addi_i32(addr, addr, stride); - if (n == 0) { - tmp2 = tmp; - } else { - tcg_gen_shli_i32(tmp, tmp, n * 8); - tcg_gen_or_i32(tmp2, tmp2, tmp); - tcg_temp_free_i32(tmp); - } - } - neon_store_reg(rd, pass, tmp2); - } else { - tmp2 = neon_load_reg(rd, pass); - for (n = 0; n < 4; n++) { - tmp = tcg_temp_new_i32(); - if (n == 0) { - tcg_gen_mov_i32(tmp, tmp2); - } else { - tcg_gen_shri_i32(tmp, tmp2, n * 8); - } - gen_aa32_st8(s, tmp, addr, get_mem_index(s)); - tcg_temp_free_i32(tmp); - tcg_gen_addi_i32(addr, addr, stride); - } - tcg_temp_free_i32(tmp2); - } + for (n = 0; n < 8 >> size; n++) { + int xs; + for (xs = 0; xs < interleave; xs++) { + int tt = rd + reg + spacing * xs; + + if (load) { + gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size); + neon_store_element64(tt, n, size, tmp64); + } else { + neon_load_element64(tmp64, tt, n, size); + gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size); } + tcg_gen_add_i32(addr, addr, tmp2); } } - rd += spacing; } tcg_temp_free_i32(addr); - stride = nregs * 8; + tcg_temp_free_i32(tmp2); + tcg_temp_free_i64(tmp64); + stride = nregs * interleave * 8; } else { size = (insn >> 10) & 3; if (size == 3) { @@ -5106,45 +5108,50 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) } addr = tcg_temp_new_i32(); load_reg_var(s, addr, rn); - if (nregs == 1) { - /* VLD1 to all lanes: bit 5 indicates how many Dregs to write */ - tmp = gen_load_and_replicate(s, addr, size); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0)); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1)); - if (insn & (1 << 5)) { - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0)); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1)); - } - tcg_temp_free_i32(tmp); - } else { - /* VLD2/3/4 to all lanes: bit 5 indicates register stride */ - stride = (insn & (1 << 5)) ? 2 : 1; - for (reg = 0; reg < nregs; reg++) { - tmp = gen_load_and_replicate(s, addr, size); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0)); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1)); - tcg_temp_free_i32(tmp); - tcg_gen_addi_i32(addr, addr, 1 << size); - rd += stride; + + /* VLD1 to all lanes: bit 5 indicates how many Dregs to write. + * VLD2/3/4 to all lanes: bit 5 indicates register stride. + */ + stride = (insn & (1 << 5)) ? 2 : 1; + vec_size = nregs == 1 ? stride * 8 : 8; + + tmp = tcg_temp_new_i32(); + for (reg = 0; reg < nregs; reg++) { + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), + s->be_data | size); + if ((rd & 1) && vec_size == 16) { + /* We cannot write 16 bytes at once because the + * destination is unaligned. + */ + tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0), + 8, 8, tmp); + tcg_gen_gvec_mov(0, neon_reg_offset(rd + 1, 0), + neon_reg_offset(rd, 0), 8, 8); + } else { + tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0), + vec_size, vec_size, tmp); } + tcg_gen_addi_i32(addr, addr, 1 << size); + rd += stride; } + tcg_temp_free_i32(tmp); tcg_temp_free_i32(addr); stride = (1 << size) * nregs; } else { /* Single element. */ int idx = (insn >> 4) & 0xf; - pass = (insn >> 7) & 1; + int reg_idx; switch (size) { case 0: - shift = ((insn >> 5) & 3) * 8; + reg_idx = (insn >> 5) & 7; stride = 1; break; case 1: - shift = ((insn >> 6) & 1) * 16; + reg_idx = (insn >> 6) & 3; stride = (insn & (1 << 5)) ? 2 : 1; break; case 2: - shift = 0; + reg_idx = (insn >> 7) & 1; stride = (insn & (1 << 6)) ? 2 : 1; break; default: @@ -5184,52 +5191,24 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) */ return 1; } + tmp = tcg_temp_new_i32(); addr = tcg_temp_new_i32(); load_reg_var(s, addr, rn); for (reg = 0; reg < nregs; reg++) { if (load) { - tmp = tcg_temp_new_i32(); - switch (size) { - case 0: - gen_aa32_ld8u(s, tmp, addr, get_mem_index(s)); - break; - case 1: - gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); - break; - case 2: - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - break; - default: /* Avoid compiler warnings. */ - abort(); - } - if (size != 2) { - tmp2 = neon_load_reg(rd, pass); - tcg_gen_deposit_i32(tmp, tmp2, tmp, - shift, size ? 16 : 8); - tcg_temp_free_i32(tmp2); - } - neon_store_reg(rd, pass, tmp); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), + s->be_data | size); + neon_store_element(rd, reg_idx, size, tmp); } else { /* Store */ - tmp = neon_load_reg(rd, pass); - if (shift) - tcg_gen_shri_i32(tmp, tmp, shift); - switch (size) { - case 0: - gen_aa32_st8(s, tmp, addr, get_mem_index(s)); - break; - case 1: - gen_aa32_st16(s, tmp, addr, get_mem_index(s)); - break; - case 2: - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); - break; - } - tcg_temp_free_i32(tmp); + neon_load_element(tmp, rd, reg_idx, size); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), + s->be_data | size); } rd += stride; tcg_gen_addi_i32(addr, addr, 1 << size); } tcg_temp_free_i32(addr); + tcg_temp_free_i32(tmp); stride = nregs * (1 << size); } } @@ -5250,14 +5229,6 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) return 0; } -/* Bitwise select. dest = c ? t : f. Clobbers T and F. */ -static void gen_neon_bsl(TCGv_i32 dest, TCGv_i32 t, TCGv_i32 f, TCGv_i32 c) -{ - tcg_gen_and_i32(t, t, c); - tcg_gen_andc_i32(f, f, c); - tcg_gen_or_i32(dest, t, f); -} - static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src) { switch (size) { @@ -5464,7 +5435,7 @@ static void gen_neon_narrow_op(int op, int u, int size, #define NEON_3R_VABA 15 #define NEON_3R_VADD_VSUB 16 #define NEON_3R_VTST_VCEQ 17 -#define NEON_3R_VML 18 /* VMLA, VMLAL, VMLS, VMLSL */ +#define NEON_3R_VML 18 /* VMLA, VMLS */ #define NEON_3R_VMUL 19 #define NEON_3R_VPMAX 20 #define NEON_3R_VPMIN 21 @@ -5689,7 +5660,7 @@ static const uint8_t neon_2rm_sizes[] = { static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn, int q, int rd, int rn, int rm) { - if (arm_dc_feature(s, ARM_FEATURE_V8_RDM)) { + if (dc_isar_feature(aa32_rdm, s)) { int opr_sz = (1 + q) * 8; tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), vfp_reg_offset(1, rn), @@ -5700,6 +5671,483 @@ static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn, return 1; } +/* + * Expanders for VBitOps_VBIF, VBIT, VBSL. + */ +static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + tcg_gen_xor_i64(rn, rn, rm); + tcg_gen_and_i64(rn, rn, rd); + tcg_gen_xor_i64(rd, rm, rn); +} + +static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + tcg_gen_xor_i64(rn, rn, rd); + tcg_gen_and_i64(rn, rn, rm); + tcg_gen_xor_i64(rd, rd, rn); +} + +static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + tcg_gen_xor_i64(rn, rn, rd); + tcg_gen_andc_i64(rn, rn, rm); + tcg_gen_xor_i64(rd, rd, rn); +} + +static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) +{ + tcg_gen_xor_vec(vece, rn, rn, rm); + tcg_gen_and_vec(vece, rn, rn, rd); + tcg_gen_xor_vec(vece, rd, rm, rn); +} + +static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) +{ + tcg_gen_xor_vec(vece, rn, rn, rd); + tcg_gen_and_vec(vece, rn, rn, rm); + tcg_gen_xor_vec(vece, rd, rd, rn); +} + +static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) +{ + tcg_gen_xor_vec(vece, rn, rn, rd); + tcg_gen_andc_vec(vece, rn, rn, rm); + tcg_gen_xor_vec(vece, rd, rd, rn); +} + +const GVecGen3 bsl_op = { + .fni8 = gen_bsl_i64, + .fniv = gen_bsl_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true +}; + +const GVecGen3 bit_op = { + .fni8 = gen_bit_i64, + .fniv = gen_bit_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true +}; + +const GVecGen3 bif_op = { + .fni8 = gen_bif_i64, + .fniv = gen_bif_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true +}; + +static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_sar8i_i64(a, a, shift); + tcg_gen_vec_add8_i64(d, d, a); +} + +static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_sar16i_i64(a, a, shift); + tcg_gen_vec_add16_i64(d, d, a); +} + +static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_sari_i32(a, a, shift); + tcg_gen_add_i32(d, d, a); +} + +static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_sari_i64(a, a, shift); + tcg_gen_add_i64(d, d, a); +} + +static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + tcg_gen_sari_vec(vece, a, a, sh); + tcg_gen_add_vec(vece, d, d, a); +} + +const GVecGen2i ssra_op[4] = { + { .fni8 = gen_ssra8_i64, + .fniv = gen_ssra_vec, + .load_dest = true, + .opc = INDEX_op_sari_vec, + .vece = MO_8 }, + { .fni8 = gen_ssra16_i64, + .fniv = gen_ssra_vec, + .load_dest = true, + .opc = INDEX_op_sari_vec, + .vece = MO_16 }, + { .fni4 = gen_ssra32_i32, + .fniv = gen_ssra_vec, + .load_dest = true, + .opc = INDEX_op_sari_vec, + .vece = MO_32 }, + { .fni8 = gen_ssra64_i64, + .fniv = gen_ssra_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opc = INDEX_op_sari_vec, + .vece = MO_64 }, +}; + +static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_shr8i_i64(a, a, shift); + tcg_gen_vec_add8_i64(d, d, a); +} + +static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_shr16i_i64(a, a, shift); + tcg_gen_vec_add16_i64(d, d, a); +} + +static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_shri_i32(a, a, shift); + tcg_gen_add_i32(d, d, a); +} + +static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_shri_i64(a, a, shift); + tcg_gen_add_i64(d, d, a); +} + +static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + tcg_gen_shri_vec(vece, a, a, sh); + tcg_gen_add_vec(vece, d, d, a); +} + +const GVecGen2i usra_op[4] = { + { .fni8 = gen_usra8_i64, + .fniv = gen_usra_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_8, }, + { .fni8 = gen_usra16_i64, + .fniv = gen_usra_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_16, }, + { .fni4 = gen_usra32_i32, + .fniv = gen_usra_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_32, }, + { .fni8 = gen_usra64_i64, + .fniv = gen_usra_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_64, }, +}; + +static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_8, 0xff >> shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); + tcg_temp_free_i64(t); +} + +static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_16, 0xffff >> shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); + tcg_temp_free_i64(t); +} + +static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_shri_i32(a, a, shift); + tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); +} + +static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_shri_i64(a, a, shift); + tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); +} + +static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + if (sh == 0) { + tcg_gen_mov_vec(d, a); + } else { + TCGv_vec t = tcg_temp_new_vec_matching(d); + TCGv_vec m = tcg_temp_new_vec_matching(d); + + tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); + tcg_gen_shri_vec(vece, t, a, sh); + tcg_gen_and_vec(vece, d, d, m); + tcg_gen_or_vec(vece, d, d, t); + + tcg_temp_free_vec(t); + tcg_temp_free_vec(m); + } +} + +const GVecGen2i sri_op[4] = { + { .fni8 = gen_shr8_ins_i64, + .fniv = gen_shr_ins_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_8 }, + { .fni8 = gen_shr16_ins_i64, + .fniv = gen_shr_ins_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_16 }, + { .fni4 = gen_shr32_ins_i32, + .fniv = gen_shr_ins_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_32 }, + { .fni8 = gen_shr64_ins_i64, + .fniv = gen_shr_ins_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_64 }, +}; + +static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_8, 0xff << shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shli_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); + tcg_temp_free_i64(t); +} + +static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_16, 0xffff << shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shli_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); + tcg_temp_free_i64(t); +} + +static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); +} + +static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); +} + +static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + if (sh == 0) { + tcg_gen_mov_vec(d, a); + } else { + TCGv_vec t = tcg_temp_new_vec_matching(d); + TCGv_vec m = tcg_temp_new_vec_matching(d); + + tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); + tcg_gen_shli_vec(vece, t, a, sh); + tcg_gen_and_vec(vece, d, d, m); + tcg_gen_or_vec(vece, d, d, t); + + tcg_temp_free_vec(t); + tcg_temp_free_vec(m); + } +} + +const GVecGen2i sli_op[4] = { + { .fni8 = gen_shl8_ins_i64, + .fniv = gen_shl_ins_vec, + .load_dest = true, + .opc = INDEX_op_shli_vec, + .vece = MO_8 }, + { .fni8 = gen_shl16_ins_i64, + .fniv = gen_shl_ins_vec, + .load_dest = true, + .opc = INDEX_op_shli_vec, + .vece = MO_16 }, + { .fni4 = gen_shl32_ins_i32, + .fniv = gen_shl_ins_vec, + .load_dest = true, + .opc = INDEX_op_shli_vec, + .vece = MO_32 }, + { .fni8 = gen_shl64_ins_i64, + .fniv = gen_shl_ins_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opc = INDEX_op_shli_vec, + .vece = MO_64 }, +}; + +static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u8(a, a, b); + gen_helper_neon_add_u8(d, d, a); +} + +static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u8(a, a, b); + gen_helper_neon_sub_u8(d, d, a); +} + +static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u16(a, a, b); + gen_helper_neon_add_u16(d, d, a); +} + +static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u16(a, a, b); + gen_helper_neon_sub_u16(d, d, a); +} + +static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_mul_i32(a, a, b); + tcg_gen_add_i32(d, d, a); +} + +static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_mul_i32(a, a, b); + tcg_gen_sub_i32(d, d, a); +} + +static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_mul_i64(a, a, b); + tcg_gen_add_i64(d, d, a); +} + +static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_mul_i64(a, a, b); + tcg_gen_sub_i64(d, d, a); +} + +static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_mul_vec(vece, a, a, b); + tcg_gen_add_vec(vece, d, d, a); +} + +static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_mul_vec(vece, a, a, b); + tcg_gen_sub_vec(vece, d, d, a); +} + +/* Note that while NEON does not support VMLA and VMLS as 64-bit ops, + * these tables are shared with AArch64 which does support them. + */ +const GVecGen3 mla_op[4] = { + { .fni4 = gen_mla8_i32, + .fniv = gen_mla_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_8 }, + { .fni4 = gen_mla16_i32, + .fniv = gen_mla_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_mla32_i32, + .fniv = gen_mla_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_mla64_i64, + .fniv = gen_mla_vec, + .opc = INDEX_op_mul_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .vece = MO_64 }, +}; + +const GVecGen3 mls_op[4] = { + { .fni4 = gen_mls8_i32, + .fniv = gen_mls_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_8 }, + { .fni4 = gen_mls16_i32, + .fniv = gen_mls_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_mls32_i32, + .fniv = gen_mls_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_mls64_i64, + .fniv = gen_mls_vec, + .opc = INDEX_op_mul_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .vece = MO_64 }, +}; + +/* CMTST : test is "if (X & Y != 0)". */ +static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_and_i32(d, a, b); + tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0); + tcg_gen_neg_i32(d, d); +} + +void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_and_i64(d, a, b); + tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0); + tcg_gen_neg_i64(d, d); +} + +static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_and_vec(vece, d, a, b); + tcg_gen_dupi_vec(vece, a, 0); + tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); +} + +const GVecGen3 cmtst_op[4] = { + { .fni4 = gen_helper_neon_tst_u8, + .fniv = gen_cmtst_vec, + .vece = MO_8 }, + { .fni4 = gen_helper_neon_tst_u16, + .fniv = gen_cmtst_vec, + .vece = MO_16 }, + { .fni4 = gen_cmtst_i32, + .fniv = gen_cmtst_vec, + .vece = MO_32 }, + { .fni8 = gen_cmtst_i64, + .fniv = gen_cmtst_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, +}; + /* Translate a NEON data processing instruction. Return nonzero if the instruction is invalid. We process data in a mixture of 32-bit and 64-bit chunks. @@ -5709,14 +6157,15 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) { int op; int q; - int rd, rn, rm; + int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs; int size; int shift; int pass; int count; int pairwise; int u; - uint32_t imm, mask; + int vec_size; + uint32_t imm; TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5; TCGv_ptr ptr1, ptr2, ptr3; TCGv_i64 tmp64; @@ -5727,7 +6176,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) */ if (s->fp_excp_el) { gen_exception_insn(s, 4, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); return 0; } @@ -5739,6 +6188,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) VFP_DREG_N(rn, insn); VFP_DREG_M(rm, insn); size = (insn >> 20) & 3; + vec_size = q ? 16 : 8; + rd_ofs = neon_reg_offset(rd, 0); + rn_ofs = neon_reg_offset(rn, 0); + rm_ofs = neon_reg_offset(rm, 0); + if ((insn & (1 << 23)) == 0) { /* Three register same length. */ op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1); @@ -5763,7 +6217,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 1; } if (!u) { /* SHA-1 */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) { + if (!dc_isar_feature(aa32_sha1, s)) { return 1; } ptr1 = vfp_reg_ptr(true, rd); @@ -5773,7 +6227,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4); tcg_temp_free_i32(tmp4); } else { /* SHA-256 */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256) || size == 3) { + if (!dc_isar_feature(aa32_sha2, s) || size == 3) { return 1; } ptr1 = vfp_reg_ptr(true, rd); @@ -5829,8 +6283,100 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) q, rd, rn, rm); } return 1; + + case NEON_3R_LOGIC: /* Logic ops. */ + switch ((u << 2) | size) { + case 0: /* VAND */ + tcg_gen_gvec_and(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + break; + case 1: /* VBIC */ + tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + break; + case 2: + if (rn == rm) { + /* VMOV */ + tcg_gen_gvec_mov(0, rd_ofs, rn_ofs, vec_size, vec_size); + } else { + /* VORR */ + tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } + break; + case 3: /* VORN */ + tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + break; + case 4: /* VEOR */ + tcg_gen_gvec_xor(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + break; + case 5: /* VBSL */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size, &bsl_op); + break; + case 6: /* VBIT */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size, &bit_op); + break; + case 7: /* VBIF */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size, &bif_op); + break; + } + return 0; + + case NEON_3R_VADD_VSUB: + if (u) { + tcg_gen_gvec_sub(size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } else { + tcg_gen_gvec_add(size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } + return 0; + + case NEON_3R_VMUL: /* VMUL */ + if (u) { + /* Polynomial case allows only P8 and is handled below. */ + if (size != 0) { + return 1; + } + } else { + tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + return 0; + } + break; + + case NEON_3R_VML: /* VMLA, VMLS */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size, + u ? &mls_op[size] : &mla_op[size]); + return 0; + + case NEON_3R_VTST_VCEQ: + if (u) { /* VCEQ */ + tcg_gen_gvec_cmp(TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } else { /* VTST */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size, &cmtst_op[size]); + } + return 0; + + case NEON_3R_VCGT: + tcg_gen_gvec_cmp(u ? TCG_COND_GTU : TCG_COND_GT, size, + rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); + return 0; + + case NEON_3R_VCGE: + tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size, + rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); + return 0; } - if (size == 3 && op != NEON_3R_LOGIC) { + + if (size == 3) { /* 64-bit element instructions. */ for (pass = 0; pass < (q ? 2 : 1); pass++) { neon_load_reg64(cpu_V0, rn + pass); @@ -5886,13 +6432,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) cpu_V1, cpu_V0); } break; - case NEON_3R_VADD_VSUB: - if (u) { - tcg_gen_sub_i64(CPU_V001); - } else { - tcg_gen_add_i64(CPU_V001); - } - break; default: abort(); } @@ -5942,12 +6481,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 1; } break; - case NEON_3R_VMUL: - if (u && (size != 0)) { - /* UNDEF on invalid size for polynomial subcase */ - return 1; - } - break; case NEON_3R_VFM_VQRDMLSH: if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) { return 1; @@ -5988,52 +6521,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_3R_VRHADD: GEN_NEON_INTEGER_OP(rhadd); break; - case NEON_3R_LOGIC: /* Logic ops. */ - switch ((u << 2) | size) { - case 0: /* VAND */ - tcg_gen_and_i32(tmp, tmp, tmp2); - break; - case 1: /* BIC */ - tcg_gen_andc_i32(tmp, tmp, tmp2); - break; - case 2: /* VORR */ - tcg_gen_or_i32(tmp, tmp, tmp2); - break; - case 3: /* VORN */ - tcg_gen_orc_i32(tmp, tmp, tmp2); - break; - case 4: /* VEOR */ - tcg_gen_xor_i32(tmp, tmp, tmp2); - break; - case 5: /* VBSL */ - tmp3 = neon_load_reg(rd, pass); - gen_neon_bsl(tmp, tmp, tmp2, tmp3); - tcg_temp_free_i32(tmp3); - break; - case 6: /* VBIT */ - tmp3 = neon_load_reg(rd, pass); - gen_neon_bsl(tmp, tmp, tmp3, tmp2); - tcg_temp_free_i32(tmp3); - break; - case 7: /* VBIF */ - tmp3 = neon_load_reg(rd, pass); - gen_neon_bsl(tmp, tmp3, tmp, tmp2); - tcg_temp_free_i32(tmp3); - break; - } - break; case NEON_3R_VHSUB: GEN_NEON_INTEGER_OP(hsub); break; case NEON_3R_VQSUB: GEN_NEON_INTEGER_OP_ENV(qsub); break; - case NEON_3R_VCGT: - GEN_NEON_INTEGER_OP(cgt); - break; - case NEON_3R_VCGE: - GEN_NEON_INTEGER_OP(cge); - break; case NEON_3R_VSHL: GEN_NEON_INTEGER_OP(shl); break; @@ -6061,61 +6554,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tmp2 = neon_load_reg(rd, pass); gen_neon_add(size, tmp, tmp2); break; - case NEON_3R_VADD_VSUB: - if (!u) { /* VADD */ - gen_neon_add(size, tmp, tmp2); - } else { /* VSUB */ - switch (size) { - case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break; - case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break; - default: abort(); - } - } - break; - case NEON_3R_VTST_VCEQ: - if (!u) { /* VTST */ - switch (size) { - case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break; - case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break; - default: abort(); - } - } else { /* VCEQ */ - switch (size) { - case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break; - case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break; - default: abort(); - } - } - break; - case NEON_3R_VML: /* VMLA, VMLAL, VMLS,VMLSL */ - switch (size) { - case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; - case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; - default: abort(); - } - tcg_temp_free_i32(tmp2); - tmp2 = neon_load_reg(rd, pass); - if (u) { /* VMLS */ - gen_neon_rsb(size, tmp, tmp2); - } else { /* VMLA */ - gen_neon_add(size, tmp, tmp2); - } - break; case NEON_3R_VMUL: - if (u) { /* polynomial */ - gen_helper_neon_mul_p8(tmp, tmp, tmp2); - } else { /* Integer */ - switch (size) { - case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; - case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; - default: abort(); - } - } + /* VMUL.P8; other cases already eliminated. */ + gen_helper_neon_mul_p8(tmp, tmp, tmp2); break; case NEON_3R_VPMAX: GEN_NEON_INTEGER_OP(pmax); @@ -6297,8 +6738,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) size--; } shift = (insn >> 16) & ((1 << (3 + size)) - 1); - /* To avoid excessive duplication of ops we implement shift - by immediate using the variable shift operations. */ if (op < 8) { /* Shift by immediate: VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */ @@ -6310,43 +6749,99 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } /* Right shifts are encoded as N - shift, where N is the element size in bits. */ - if (op <= 4) + if (op <= 4) { shift = shift - (1 << (size + 3)); + } + + switch (op) { + case 0: /* VSHR */ + /* Right shift comes here negative. */ + shift = -shift; + /* Shifts larger than the element size are architecturally + * valid. Unsigned results in all zeros; signed results + * in all sign bits. + */ + if (!u) { + tcg_gen_gvec_sari(size, rd_ofs, rm_ofs, + MIN(shift, (8 << size) - 1), + vec_size, vec_size); + } else if (shift >= 8 << size) { + tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0); + } else { + tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift, + vec_size, vec_size); + } + return 0; + + case 1: /* VSRA */ + /* Right shift comes here negative. */ + shift = -shift; + /* Shifts larger than the element size are architecturally + * valid. Unsigned results in all zeros; signed results + * in all sign bits. + */ + if (!u) { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size, + MIN(shift, (8 << size) - 1), + &ssra_op[size]); + } else if (shift >= 8 << size) { + /* rd += 0 */ + } else { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size, + shift, &usra_op[size]); + } + return 0; + + case 4: /* VSRI */ + if (!u) { + return 1; + } + /* Right shift comes here negative. */ + shift = -shift; + /* Shift out of range leaves destination unchanged. */ + if (shift < 8 << size) { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size, + shift, &sri_op[size]); + } + return 0; + + case 5: /* VSHL, VSLI */ + if (u) { /* VSLI */ + /* Shift out of range leaves destination unchanged. */ + if (shift < 8 << size) { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, + vec_size, shift, &sli_op[size]); + } + } else { /* VSHL */ + /* Shifts larger than the element size are + * architecturally valid and results in zero. + */ + if (shift >= 8 << size) { + tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0); + } else { + tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift, + vec_size, vec_size); + } + } + return 0; + } + if (size == 3) { count = q + 1; } else { count = q ? 4: 2; } - switch (size) { - case 0: - imm = (uint8_t) shift; - imm |= imm << 8; - imm |= imm << 16; - break; - case 1: - imm = (uint16_t) shift; - imm |= imm << 16; - break; - case 2: - case 3: - imm = shift; - break; - default: - abort(); - } + + /* To avoid excessive duplication of ops we implement shift + * by immediate using the variable shift operations. + */ + imm = dup_const(size, shift); for (pass = 0; pass < count; pass++) { if (size == 3) { neon_load_reg64(cpu_V0, rm + pass); tcg_gen_movi_i64(cpu_V1, imm); switch (op) { - case 0: /* VSHR */ - case 1: /* VSRA */ - if (u) - gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); - else - gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1); - break; case 2: /* VRSHR */ case 3: /* VRSRA */ if (u) @@ -6354,10 +6849,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) else gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1); break; - case 4: /* VSRI */ - case 5: /* VSHL, VSLI */ - gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); - break; case 6: /* VQSHLU */ gen_helper_neon_qshlu_s64(cpu_V0, cpu_env, cpu_V0, cpu_V1); @@ -6371,26 +6862,13 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) cpu_V0, cpu_V1); } break; + default: + g_assert_not_reached(); } - if (op == 1 || op == 3) { + if (op == 3) { /* Accumulate. */ neon_load_reg64(cpu_V1, rd + pass); tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1); - } else if (op == 4 || (op == 5 && u)) { - /* Insert */ - neon_load_reg64(cpu_V1, rd + pass); - uint64_t mask; - if (shift < -63 || shift > 63) { - mask = 0; - } else { - if (op == 4) { - mask = 0xffffffffffffffffull >> -shift; - } else { - mask = 0xffffffffffffffffull << shift; - } - } - tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask); - tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); } neon_store_reg64(cpu_V0, rd + pass); } else { /* size < 3 */ @@ -6399,23 +6877,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tmp2 = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp2, imm); switch (op) { - case 0: /* VSHR */ - case 1: /* VSRA */ - GEN_NEON_INTEGER_OP(shl); - break; case 2: /* VRSHR */ case 3: /* VRSRA */ GEN_NEON_INTEGER_OP(rshl); break; - case 4: /* VSRI */ - case 5: /* VSHL, VSLI */ - switch (size) { - case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break; - case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break; - default: abort(); - } - break; case 6: /* VQSHLU */ switch (size) { case 0: @@ -6437,50 +6902,16 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case 7: /* VQSHL */ GEN_NEON_INTEGER_OP_ENV(qshl); break; + default: + g_assert_not_reached(); } tcg_temp_free_i32(tmp2); - if (op == 1 || op == 3) { + if (op == 3) { /* Accumulate. */ tmp2 = neon_load_reg(rd, pass); gen_neon_add(size, tmp, tmp2); tcg_temp_free_i32(tmp2); - } else if (op == 4 || (op == 5 && u)) { - /* Insert */ - switch (size) { - case 0: - if (op == 4) - mask = 0xff >> -shift; - else - mask = (uint8_t)(0xff << shift); - mask |= mask << 8; - mask |= mask << 16; - break; - case 1: - if (op == 4) - mask = 0xffff >> -shift; - else - mask = (uint16_t)(0xffff << shift); - mask |= mask << 16; - break; - case 2: - if (shift < -31 || shift > 31) { - mask = 0; - } else { - if (op == 4) - mask = 0xffffffffu >> -shift; - else - mask = 0xffffffffu << shift; - } - break; - default: - abort(); - } - tmp2 = neon_load_reg(rd, pass); - tcg_gen_andi_i32(tmp, tmp, mask); - tcg_gen_andi_i32(tmp2, tmp2, ~mask); - tcg_gen_or_i32(tmp, tmp, tmp2); - tcg_temp_free_i32(tmp2); } neon_store_reg(rd, pass, tmp); } @@ -6629,7 +7060,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 1; } } else { /* (insn & 0x00380080) == 0 */ - int invert; + int invert, reg_ofs, vec_size; + if (q && (rd & 1)) { return 1; } @@ -6669,8 +7101,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) break; case 14: imm |= (imm << 8) | (imm << 16) | (imm << 24); - if (invert) + if (invert) { imm = ~imm; + } break; case 15: if (invert) { @@ -6680,36 +7113,45 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) | ((imm & 0x40) ? (0x1f << 25) : (1 << 30)); break; } - if (invert) + if (invert) { imm = ~imm; + } - for (pass = 0; pass < (q ? 4 : 2); pass++) { - if (op & 1 && op < 12) { - tmp = neon_load_reg(rd, pass); - if (invert) { - /* The immediate value has already been inverted, so - BIC becomes AND. */ - tcg_gen_andi_i32(tmp, tmp, imm); - } else { - tcg_gen_ori_i32(tmp, tmp, imm); - } + reg_ofs = neon_reg_offset(rd, 0); + vec_size = q ? 16 : 8; + + if (op & 1 && op < 12) { + if (invert) { + /* The immediate value has already been inverted, + * so BIC becomes AND. + */ + tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm, + vec_size, vec_size); } else { - /* VMOV, VMVN. */ - tmp = tcg_temp_new_i32(); - if (op == 14 && invert) { + tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm, + vec_size, vec_size); + } + } else { + /* VMOV, VMVN. */ + if (op == 14 && invert) { + TCGv_i64 t64 = tcg_temp_new_i64(); + + for (pass = 0; pass <= q; ++pass) { + uint64_t val = 0; int n; - uint32_t val; - val = 0; - for (n = 0; n < 4; n++) { - if (imm & (1 << (n + (pass & 1) * 4))) - val |= 0xff << (n * 8); + + for (n = 0; n < 8; n++) { + if (imm & (1 << (n + pass * 8))) { + val |= 0xffull << (n * 8); + } } - tcg_gen_movi_i32(tmp, val); - } else { - tcg_gen_movi_i32(tmp, imm); + tcg_gen_movi_i64(t64, val); + neon_store_reg64(t64, rd + pass); } + tcg_temp_free_i64(t64); + } else { + tcg_gen_gvec_dup32i(reg_ofs, vec_size, vec_size, imm); } - neon_store_reg(rd, pass, tmp); } } } else { /* (insn & 0x00800010 == 0x00800000) */ @@ -6768,7 +7210,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) if (op == 14 && size == 2) { TCGv_i64 tcg_rn, tcg_rm, tcg_rd; - if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) { + if (!dc_isar_feature(aa32_pmull, s)) { return 1; } tcg_rn = tcg_temp_new_i64(); @@ -7085,7 +7527,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) { NeonGenThreeOpEnvFn *fn; - if (!arm_dc_feature(s, ARM_FEATURE_V8_RDM)) { + if (!dc_isar_feature(aa32_rdm, s)) { return 1; } if (u && ((rd | rn) & 1)) { @@ -7359,8 +7801,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) break; } case NEON_2RM_AESE: case NEON_2RM_AESMC: - if (!arm_dc_feature(s, ARM_FEATURE_V8_AES) - || ((rm | rd) & 1)) { + if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) { return 1; } ptr1 = vfp_reg_ptr(true, rd); @@ -7381,8 +7822,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_temp_free_i32(tmp3); break; case NEON_2RM_SHA1H: - if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1) - || ((rm | rd) & 1)) { + if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) { return 1; } ptr1 = vfp_reg_ptr(true, rd); @@ -7399,10 +7839,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */ if (q) { - if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256)) { + if (!dc_isar_feature(aa32_sha2, s)) { return 1; } - } else if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) { + } else if (!dc_isar_feature(aa32_sha1, s)) { return 1; } ptr1 = vfp_reg_ptr(true, rd); @@ -7415,6 +7855,14 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_temp_free_ptr(ptr1); tcg_temp_free_ptr(ptr2); break; + + case NEON_2RM_VMVN: + tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size); + break; + case NEON_2RM_VNEG: + tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size); + break; + default: elementwise: for (pass = 0; pass < (q ? 4 : 2); pass++) { @@ -7455,9 +7903,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_2RM_VCNT: gen_helper_neon_cnt_u8(tmp, tmp); break; - case NEON_2RM_VMVN: - tcg_gen_not_i32(tmp, tmp); - break; case NEON_2RM_VQABS: switch (size) { case 0: @@ -7530,11 +7975,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) default: abort(); } break; - case NEON_2RM_VNEG: - tmp2 = tcg_const_i32(0); - gen_neon_rsb(size, tmp, tmp2); - tcg_temp_free_i32(tmp2); - break; case NEON_2RM_VCGT0_F: { TCGv_ptr fpstatus = get_fpstatus_ptr(1); @@ -7757,28 +8197,25 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_temp_free_i32(tmp); } else if ((insn & 0x380) == 0) { /* VDUP */ + int element; + TCGMemOp size; + if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) { return 1; } - if (insn & (1 << 19)) { - tmp = neon_load_reg(rm, 1); - } else { - tmp = neon_load_reg(rm, 0); - } if (insn & (1 << 16)) { - gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8); + size = MO_8; + element = (insn >> 17) & 7; } else if (insn & (1 << 17)) { - if ((insn >> 18) & 1) - gen_neon_dup_high16(tmp); - else - gen_neon_dup_low16(tmp); - } - for (pass = 0; pass < (q ? 4 : 2); pass++) { - tmp2 = tcg_temp_new_i32(); - tcg_gen_mov_i32(tmp2, tmp); - neon_store_reg(rd, pass, tmp2); + size = MO_16; + element = (insn >> 18) & 3; + } else { + size = MO_32; + element = (insn >> 19) & 1; } - tcg_temp_free_i32(tmp); + tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0), + neon_element_offset(rm, element, size), + q ? 16 : 8, q ? 16 : 8); } else { return 1; } @@ -7813,8 +8250,8 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn) /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */ int size = extract32(insn, 20, 1); data = extract32(insn, 23, 2); /* rot */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA) - || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) { + if (!dc_isar_feature(aa32_vcma, s) + || (!size && !dc_isar_feature(aa32_fp16_arith, s))) { return 1; } fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah; @@ -7822,15 +8259,15 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn) /* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */ int size = extract32(insn, 20, 1); data = extract32(insn, 24, 1); /* rot */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA) - || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) { + if (!dc_isar_feature(aa32_vcma, s) + || (!size && !dc_isar_feature(aa32_fp16_arith, s))) { return 1; } fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh; } else if ((insn & 0xfeb00f00) == 0xfc200d00) { /* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */ bool u = extract32(insn, 4, 1); - if (!arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) { + if (!dc_isar_feature(aa32_dp, s)) { return 1; } fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b; @@ -7840,7 +8277,7 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn) if (s->fp_excp_el) { gen_exception_insn(s, 4, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); return 0; } if (!s->vfp_enabled) { @@ -7892,11 +8329,11 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) int size = extract32(insn, 23, 1); int index; - if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) { + if (!dc_isar_feature(aa32_vcma, s)) { return 1; } if (size == 0) { - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa32_fp16_arith, s)) { return 1; } /* For fp16, rm is just Vm, and index is M. */ @@ -7913,7 +8350,7 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) } else if ((insn & 0xffb00f00) == 0xfe200d00) { /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */ int u = extract32(insn, 4, 1); - if (!arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) { + if (!dc_isar_feature(aa32_dp, s)) { return 1; } fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b; @@ -7926,7 +8363,7 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) if (s->fp_excp_el) { gen_exception_insn(s, 4, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); return 0; } if (!s->vfp_enabled) { @@ -8889,8 +9326,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) * op1 == 3 is UNPREDICTABLE but handle as UNDEFINED. * Bits 8, 10 and 11 should be zero. */ - if (!arm_dc_feature(s, ARM_FEATURE_CRC) || op1 == 0x3 || - (c & 0xd) != 0) { + if (!dc_isar_feature(aa32_crc32, s) || op1 == 0x3 || (c & 0xd) != 0) { goto illegal_op; } @@ -9758,7 +10194,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) case 1: case 3: /* SDIV, UDIV */ - if (!arm_dc_feature(s, ARM_FEATURE_ARM_DIV)) { + if (!dc_isar_feature(arm_div, s)) { goto illegal_op; } if (((insn >> 5) & 7) || (rd != 15)) { @@ -10785,7 +11221,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) case 0x28: case 0x29: case 0x2a: - if (!arm_dc_feature(s, ARM_FEATURE_CRC)) { + if (!dc_isar_feature(aa32_crc32, s)) { goto illegal_op; } break; @@ -10966,7 +11402,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) tmp2 = load_reg(s, rm); if ((op & 0x50) == 0x10) { /* sdiv, udiv */ - if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DIV)) { + if (!dc_isar_feature(thumb_div, s)) { goto illegal_op; } if (op & 0x20) @@ -12586,6 +13022,7 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) CPUARMState *env = cs->env_ptr; ARMCPU *cpu = arm_env_get_cpu(env); + dc->isar = &cpu->isar; dc->pc = dc->base.pc_first; dc->condjmp = 0; @@ -12703,7 +13140,6 @@ static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu) tcg_gen_movi_i32(tmp, 0); store_cpu_field(tmp, condexec_bits); } - tcg_clear_temp_count(); } static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) @@ -13092,11 +13528,6 @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) translator_loop(ops, &dc.base, cpu, tb); } -static const char *cpu_mode_names[16] = { - "usr", "fiq", "irq", "svc", "???", "???", "mon", "abt", - "???", "???", "hyp", "und", "???", "???", "???", "sys" -}; - void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, int flags) { @@ -13162,7 +13593,7 @@ void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, psr & CPSR_V ? 'V' : '-', psr & CPSR_T ? 'T' : 'A', ns_status, - cpu_mode_names[psr & 0xf], (psr & 0x10) ? 32 : 26); + aarch32_mode_name(psr), (psr & 0x10) ? 32 : 26); } if (flags & CPU_DUMP_FPU) { diff --git a/target/arm/translate.h b/target/arm/translate.h index c1b65f3efb..1550aa8bc7 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -7,6 +7,7 @@ /* internal defines */ typedef struct DisasContext { DisasContextBase base; + const ARMISARegisters *isar; target_ulong pc; target_ulong page_start; @@ -190,4 +191,24 @@ static inline TCGv_i32 get_ahp_flag(void) return ret; } + +/* Vector operations shared between ARM and AArch64. */ +extern const GVecGen3 bsl_op; +extern const GVecGen3 bit_op; +extern const GVecGen3 bif_op; +extern const GVecGen3 mla_op[4]; +extern const GVecGen3 mls_op[4]; +extern const GVecGen3 cmtst_op[4]; +extern const GVecGen2i ssra_op[4]; +extern const GVecGen2i usra_op[4]; +extern const GVecGen2i sri_op[4]; +extern const GVecGen2i sli_op[4]; +void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); + +/* + * Forward to the isar_feature_* tests given a DisasContext pointer. + */ +#define dc_isar_feature(name, ctx) \ + ({ DisasContext *ctx_ = (ctx); isar_feature_##name(ctx_->isar); }) + #endif /* TARGET_ARM_TRANSLATE_H */ diff --git a/target/cris/translate.c b/target/cris/translate.c index 4ae1c04daf..11b2c11174 100644 --- a/target/cris/translate.c +++ b/target/cris/translate.c @@ -137,11 +137,7 @@ typedef struct DisasContext { static void gen_BUG(DisasContext *dc, const char *file, int line) { - fprintf(stderr, "BUG: pc=%x %s %d\n", dc->pc, file, line); - if (qemu_log_separate()) { - qemu_log("BUG: pc=%x %s %d\n", dc->pc, file, line); - } - cpu_abort(CPU(dc->cpu), "%s:%d\n", file, line); + cpu_abort(CPU(dc->cpu), "%s:%d pc=%x\n", file, line, dc->pc); } static const char *regnames_v32[] = diff --git a/target/hppa/mem_helper.c b/target/hppa/mem_helper.c index ab160c2a74..aecf3075f6 100644 --- a/target/hppa/mem_helper.c +++ b/target/hppa/mem_helper.c @@ -137,7 +137,8 @@ int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx, if (unlikely(!(prot & type))) { /* The access isn't allowed -- Inst/Data Memory Protection Fault. */ - ret = (type & PAGE_EXEC ? EXCP_IMP : EXCP_DMP); + ret = (type & PAGE_EXEC ? EXCP_IMP : + prot & PAGE_READ ? EXCP_DMP : EXCP_DMAR); goto egress; } diff --git a/target/i386/Makefile.objs b/target/i386/Makefile.objs index 04678f5503..32bf966300 100644 --- a/target/i386/Makefile.objs +++ b/target/i386/Makefile.objs @@ -3,17 +3,20 @@ obj-$(CONFIG_TCG) += translate.o obj-$(CONFIG_TCG) += bpt_helper.o cc_helper.o excp_helper.o fpu_helper.o obj-$(CONFIG_TCG) += int_helper.o mem_helper.o misc_helper.o mpx_helper.o obj-$(CONFIG_TCG) += seg_helper.o smm_helper.o svm_helper.o -obj-$(CONFIG_SOFTMMU) += machine.o arch_memory_mapping.o arch_dump.o monitor.o -obj-$(CONFIG_KVM) += kvm.o hyperv.o -obj-$(CONFIG_SEV) += sev.o +ifeq ($(CONFIG_SOFTMMU),y) +obj-y += machine.o arch_memory_mapping.o arch_dump.o monitor.o +obj-$(CONFIG_KVM) += kvm.o obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o -obj-$(call lnot,$(CONFIG_SEV)) += sev-stub.o -# HAX support -ifdef CONFIG_WIN32 +obj-$(CONFIG_HYPERV) += hyperv.o +obj-$(call lnot,$(CONFIG_HYPERV)) += hyperv-stub.o +ifeq ($(CONFIG_WIN32),y) obj-$(CONFIG_HAX) += hax-all.o hax-mem.o hax-windows.o endif -ifdef CONFIG_DARWIN +ifeq ($(CONFIG_DARWIN),y) obj-$(CONFIG_HAX) += hax-all.o hax-mem.o hax-darwin.o obj-$(CONFIG_HVF) += hvf/ endif obj-$(CONFIG_WHPX) += whpx-all.o +endif +obj-$(CONFIG_SEV) += sev.o +obj-$(call lnot,$(CONFIG_SEV)) += sev-stub.o diff --git a/target/i386/cpu.c b/target/i386/cpu.c index c88876dfe3..1469a1be01 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5123,14 +5123,15 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) * NOTE: the following code has to follow qemu_init_vcpu(). Otherwise * cs->nr_threads hasn't be populated yet and the checking is incorrect. */ - if (IS_AMD_CPU(env) && - !(env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_TOPOEXT) && - cs->nr_threads > 1 && !ht_warned) { - error_report("This family of AMD CPU doesn't support " - "hyperthreading(%d). Please configure -smp " - "options properly or try enabling topoext feature.", - cs->nr_threads); - ht_warned = true; + if (IS_AMD_CPU(env) && + !(env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_TOPOEXT) && + cs->nr_threads > 1 && !ht_warned) { + warn_report("This family of AMD CPU doesn't support " + "hyperthreading(%d)", + cs->nr_threads); + error_printf("Please configure -smp options properly" + " or try enabling topoext feature.\n"); + ht_warned = true; } x86_cpu_apic_realize(cpu, &local_err); @@ -5564,6 +5565,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false), DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false), DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false), + DEFINE_PROP_BOOL("hv-ipi", X86CPU, hyperv_ipi, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), @@ -5606,6 +5608,8 @@ static Property x86_cpu_properties[] = { * to the specific Windows version being used." */ DEFINE_PROP_INT32("x-hv-max-vps", X86CPU, hv_max_vps, -1), + DEFINE_PROP_BOOL("x-hv-synic-kvm-only", X86CPU, hyperv_synic_kvm_only, + false), DEFINE_PROP_END_OF_LIST() }; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 730c06f80a..663f3a5e67 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1378,10 +1378,12 @@ struct X86CPU { bool hyperv_vpindex; bool hyperv_runtime; bool hyperv_synic; + bool hyperv_synic_kvm_only; bool hyperv_stimer; bool hyperv_frequencies; bool hyperv_reenlightenment; bool hyperv_tlbflush; + bool hyperv_ipi; bool check_cpuid; bool enforce_cpuid; bool expose_kvm; diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 9f52bc413a..e193022c03 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -73,7 +73,6 @@ #include "target/i386/cpu.h" HVFState *hvf_state; -int hvf_disabled = 1; static void assert_hvf_ok(hv_return_t ret) { @@ -604,11 +603,6 @@ int hvf_init_vcpu(CPUState *cpu) return 0; } -void hvf_disable(int shouldDisable) -{ - hvf_disabled = shouldDisable; -} - static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info) { X86CPU *x86_cpu = X86_CPU(cpu); @@ -934,7 +928,7 @@ int hvf_vcpu_exec(CPUState *cpu) return ret; } -static bool hvf_allowed; +bool hvf_allowed; static int hvf_accel_init(MachineState *ms) { @@ -942,7 +936,6 @@ static int hvf_accel_init(MachineState *ms) hv_return_t ret; HVFState *s; - hvf_disable(0); ret = hv_vm_create(HV_VM_DEFAULT); assert_hvf_ok(ret); diff --git a/target/i386/hvf/x86_decode.c b/target/i386/hvf/x86_decode.c index 2d7540fe7c..2e33b69541 100644 --- a/target/i386/hvf/x86_decode.c +++ b/target/i386/hvf/x86_decode.c @@ -113,7 +113,8 @@ static void decode_modrm_reg(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = decode->modrm.reg; - op->ptr = get_reg_ref(env, op->reg, decode->rex.r, decode->operand_size); + op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.r, + decode->operand_size); } static void decode_rax(CPUX86State *env, struct x86_decode *decode, @@ -121,7 +122,8 @@ static void decode_rax(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = R_EAX; - op->ptr = get_reg_ref(env, op->reg, 0, decode->operand_size); + op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, 0, + decode->operand_size); } static inline void decode_immediate(CPUX86State *env, struct x86_decode *decode, @@ -263,16 +265,16 @@ static void decode_incgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x40; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_decgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x48; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_incgroup2(CPUX86State *env, struct x86_decode *decode) @@ -288,16 +290,16 @@ static void decode_pushgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x50; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_popgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x58; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_jxx(CPUX86State *env, struct x86_decode *decode) @@ -378,16 +380,16 @@ static void decode_xchgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x90; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_movgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0xb8; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); decode_immediate(env, decode, &decode->op[1], decode->operand_size); } @@ -402,8 +404,8 @@ static void decode_movgroup8(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0xb0; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); decode_immediate(env, decode, &decode->op[1], decode->operand_size); } @@ -412,7 +414,8 @@ static void decode_rcx(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = R_ECX; - op->ptr = get_reg_ref(env, op->reg, decode->rex.b, decode->operand_size); + op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.b, + decode->operand_size); } struct decode_tbl { @@ -639,8 +642,8 @@ static void decode_bswap(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[1] - 0xc8; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_d9_4(CPUX86State *env, struct x86_decode *decode) @@ -1686,7 +1689,8 @@ calc_addr: } } -target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size) +target_ulong get_reg_ref(CPUX86State *env, int reg, int rex, int is_extended, + int size) { target_ulong ptr = 0; int which = 0; @@ -1698,7 +1702,7 @@ target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size) switch (size) { case 1: - if (is_extended || reg < 4) { + if (is_extended || reg < 4 || rex) { which = 1; ptr = (target_ulong)&RL(env, reg); } else { @@ -1714,10 +1718,11 @@ target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size) return ptr; } -target_ulong get_reg_val(CPUX86State *env, int reg, int is_extended, int size) +target_ulong get_reg_val(CPUX86State *env, int reg, int rex, int is_extended, + int size) { target_ulong val = 0; - memcpy(&val, (void *)get_reg_ref(env, reg, is_extended, size), size); + memcpy(&val, (void *)get_reg_ref(env, reg, rex, is_extended, size), size); return val; } @@ -1739,7 +1744,8 @@ static target_ulong get_sib_val(CPUX86State *env, struct x86_decode *decode, if (base_reg == R_ESP || base_reg == R_EBP) { *sel = R_SS; } - base = get_reg_val(env, decode->sib.base, decode->rex.b, addr_size); + base = get_reg_val(env, decode->sib.base, decode->rex.rex, + decode->rex.b, addr_size); } if (decode->rex.x) { @@ -1747,7 +1753,8 @@ static target_ulong get_sib_val(CPUX86State *env, struct x86_decode *decode, } if (index_reg != R_ESP) { - scaled_index = get_reg_val(env, index_reg, decode->rex.x, addr_size) << + scaled_index = get_reg_val(env, index_reg, decode->rex.rex, + decode->rex.x, addr_size) << decode->sib.scale; } return base + scaled_index; @@ -1776,7 +1783,8 @@ void calc_modrm_operand32(CPUX86State *env, struct x86_decode *decode, if (decode->modrm.rm == R_EBP || decode->modrm.rm == R_ESP) { seg = R_SS; } - ptr += get_reg_val(env, decode->modrm.rm, decode->rex.b, addr_size); + ptr += get_reg_val(env, decode->modrm.rm, decode->rex.rex, + decode->rex.b, addr_size); } if (X86_DECODE_CMD_LEA == decode->cmd) { @@ -1805,7 +1813,8 @@ void calc_modrm_operand64(CPUX86State *env, struct x86_decode *decode, } else if (0 == mod && 5 == rm) { ptr = RIP(env) + decode->len + (int32_t) offset; } else { - ptr = get_reg_val(env, src, decode->rex.b, 8) + (int64_t) offset; + ptr = get_reg_val(env, src, decode->rex.rex, decode->rex.b, 8) + + (int64_t) offset; } if (X86_DECODE_CMD_LEA == decode->cmd) { @@ -1822,8 +1831,8 @@ void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode, if (3 == decode->modrm.mod) { op->reg = decode->modrm.reg; op->type = X86_VAR_REG; - op->ptr = get_reg_ref(env, decode->modrm.rm, decode->rex.b, - decode->operand_size); + op->ptr = get_reg_ref(env, decode->modrm.rm, decode->rex.rex, + decode->rex.b, decode->operand_size); return; } diff --git a/target/i386/hvf/x86_decode.h b/target/i386/hvf/x86_decode.h index 5ab6f31fa5..ef4bcab310 100644 --- a/target/i386/hvf/x86_decode.h +++ b/target/i386/hvf/x86_decode.h @@ -303,8 +303,10 @@ uint64_t sign(uint64_t val, int size); uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode); -target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size); -target_ulong get_reg_val(CPUX86State *env, int reg, int is_extended, int size); +target_ulong get_reg_ref(CPUX86State *env, int reg, int rex, int is_extended, + int size); +target_ulong get_reg_val(CPUX86State *env, int reg, int rex, int is_extended, + int size); void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode, struct x86_decode_op *op); target_ulong decode_linear_addr(CPUX86State *env, struct x86_decode *decode, diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h index d6d5a79293..8c572cd7c2 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/hyperv-proto.h @@ -1,7 +1,7 @@ /* - * Definitions for Hyper-V guest/hypervisor interaction + * Definitions for Hyper-V guest/hypervisor interaction - x86-specific part * - * Copyright (C) 2017 Parallels International GmbH + * Copyright (c) 2017-2018 Virtuozzo International GmbH. * * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. @@ -10,7 +10,7 @@ #ifndef TARGET_I386_HYPERV_PROTO_H #define TARGET_I386_HYPERV_PROTO_H -#include "qemu/bitmap.h" +#include "hw/hyperv/hyperv-proto.h" #define HV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000 #define HV_CPUID_INTERFACE 0x40000001 @@ -58,6 +58,7 @@ #define HV_APIC_ACCESS_RECOMMENDED (1u << 3) #define HV_SYSTEM_RESET_RECOMMENDED (1u << 4) #define HV_RELAXED_TIMING_RECOMMENDED (1u << 5) +#define HV_CLUSTER_IPI_RECOMMENDED (1u << 10) #define HV_EX_PROCESSOR_MASKS_RECOMMENDED (1u << 11) /* @@ -138,25 +139,6 @@ #define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 /* - * Hypercall status code - */ -#define HV_STATUS_SUCCESS 0 -#define HV_STATUS_INVALID_HYPERCALL_CODE 2 -#define HV_STATUS_INVALID_HYPERCALL_INPUT 3 -#define HV_STATUS_INVALID_ALIGNMENT 4 -#define HV_STATUS_INVALID_PARAMETER 5 -#define HV_STATUS_INSUFFICIENT_MEMORY 11 -#define HV_STATUS_INVALID_CONNECTION_ID 18 -#define HV_STATUS_INSUFFICIENT_BUFFERS 19 - -/* - * Hypercall numbers - */ -#define HV_POST_MESSAGE 0x005c -#define HV_SIGNAL_EVENT 0x005d -#define HV_HYPERCALL_FAST (1u << 16) - -/* * Hypercall MSR bits */ #define HV_HYPERCALL_ENABLE (1u << 0) @@ -165,7 +147,6 @@ * Synthetic interrupt controller definitions */ #define HV_SYNIC_VERSION 1 -#define HV_SINT_COUNT 16 #define HV_SYNIC_ENABLE (1u << 0) #define HV_SIMP_ENABLE (1u << 0) #define HV_SIEFP_ENABLE (1u << 0) @@ -175,94 +156,5 @@ #define HV_STIMER_COUNT 4 -/* - * Message size - */ -#define HV_MESSAGE_PAYLOAD_SIZE 240 - -/* - * Message types - */ -#define HV_MESSAGE_NONE 0x00000000 -#define HV_MESSAGE_VMBUS 0x00000001 -#define HV_MESSAGE_UNMAPPED_GPA 0x80000000 -#define HV_MESSAGE_GPA_INTERCEPT 0x80000001 -#define HV_MESSAGE_TIMER_EXPIRED 0x80000010 -#define HV_MESSAGE_INVALID_VP_REGISTER_VALUE 0x80000020 -#define HV_MESSAGE_UNRECOVERABLE_EXCEPTION 0x80000021 -#define HV_MESSAGE_UNSUPPORTED_FEATURE 0x80000022 -#define HV_MESSAGE_EVENTLOG_BUFFERCOMPLETE 0x80000040 -#define HV_MESSAGE_X64_IOPORT_INTERCEPT 0x80010000 -#define HV_MESSAGE_X64_MSR_INTERCEPT 0x80010001 -#define HV_MESSAGE_X64_CPUID_INTERCEPT 0x80010002 -#define HV_MESSAGE_X64_EXCEPTION_INTERCEPT 0x80010003 -#define HV_MESSAGE_X64_APIC_EOI 0x80010004 -#define HV_MESSAGE_X64_LEGACY_FP_ERROR 0x80010005 - -/* - * Message flags - */ -#define HV_MESSAGE_FLAG_PENDING 0x1 - -/* - * Event flags number per SINT - */ -#define HV_EVENT_FLAGS_COUNT (256 * 8) - -/* - * Connection id valid bits - */ -#define HV_CONNECTION_ID_MASK 0x00ffffff - -/* - * Input structure for POST_MESSAGE hypercall - */ -struct hyperv_post_message_input { - uint32_t connection_id; - uint32_t _reserved; - uint32_t message_type; - uint32_t payload_size; - uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE]; -}; - -/* - * Input structure for SIGNAL_EVENT hypercall - */ -struct hyperv_signal_event_input { - uint32_t connection_id; - uint16_t flag_number; - uint16_t _reserved_zero; -}; - -/* - * SynIC message structures - */ -struct hyperv_message_header { - uint32_t message_type; - uint8_t payload_size; - uint8_t message_flags; /* HV_MESSAGE_FLAG_XX */ - uint8_t _reserved[2]; - uint64_t sender; -}; - -struct hyperv_message { - struct hyperv_message_header header; - uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE]; -}; - -struct hyperv_message_page { - struct hyperv_message slot[HV_SINT_COUNT]; -}; - -/* - * SynIC event flags structures - */ -struct hyperv_event_flags { - DECLARE_BITMAP(flags, HV_EVENT_FLAGS_COUNT); -}; - -struct hyperv_event_flags_page { - struct hyperv_event_flags slot[HV_SINT_COUNT]; -}; #endif diff --git a/target/i386/hyperv-stub.c b/target/i386/hyperv-stub.c new file mode 100644 index 0000000000..fe548cbae2 --- /dev/null +++ b/target/i386/hyperv-stub.c @@ -0,0 +1,48 @@ +/* + * Stubs for CONFIG_HYPERV=n + * + * Copyright (c) 2015-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hyperv.h" + +#ifdef CONFIG_KVM +int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) +{ + switch (exit->type) { + case KVM_EXIT_HYPERV_SYNIC: + if (!cpu->hyperv_synic) { + return -1; + } + + /* + * Tracking the changes in the MSRs is unnecessary as there are no + * users for them beside save/load, which is handled nicely by the + * generic MSR save/load code + */ + return 0; + case KVM_EXIT_HYPERV_HCALL: + exit->u.hcall.result = HV_STATUS_INVALID_HYPERCALL_CODE; + return 0; + default: + return -1; + } +} +#endif + +int hyperv_x86_synic_add(X86CPU *cpu) +{ + return -ENOSYS; +} + +void hyperv_x86_synic_reset(X86CPU *cpu) +{ +} + +void hyperv_x86_synic_update(X86CPU *cpu) +{ +} diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c index 3065d765ed..b264a28620 100644 --- a/target/i386/hyperv.c +++ b/target/i386/hyperv.c @@ -14,16 +14,36 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" #include "hyperv.h" +#include "hw/hyperv/hyperv.h" #include "hyperv-proto.h" -uint32_t hyperv_vp_index(X86CPU *cpu) +int hyperv_x86_synic_add(X86CPU *cpu) { - return CPU(cpu)->cpu_index; + hyperv_synic_add(CPU(cpu)); + return 0; } -X86CPU *hyperv_find_vcpu(uint32_t vp_index) +void hyperv_x86_synic_reset(X86CPU *cpu) { - return X86_CPU(qemu_get_cpu(vp_index)); + hyperv_synic_reset(CPU(cpu)); +} + +void hyperv_x86_synic_update(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + bool enable = env->msr_hv_synic_control & HV_SYNIC_ENABLE; + hwaddr msg_page_addr = (env->msr_hv_synic_msg_page & HV_SIMP_ENABLE) ? + (env->msr_hv_synic_msg_page & TARGET_PAGE_MASK) : 0; + hwaddr event_page_addr = (env->msr_hv_synic_evt_page & HV_SIEFP_ENABLE) ? + (env->msr_hv_synic_evt_page & TARGET_PAGE_MASK) : 0; + hyperv_synic_update(CPU(cpu), enable, msg_page_addr, event_page_addr); +} + +static void async_synic_update(CPUState *cs, run_on_cpu_data data) +{ + qemu_mutex_lock_iothread(); + hyperv_x86_synic_update(X86_CPU(cs)); + qemu_mutex_unlock_iothread(); } int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) @@ -36,11 +56,6 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) return -1; } - /* - * For now just track changes in SynIC control and msg/evt pages msr's. - * When SynIC messaging/events processing will be added in future - * here we will do messages queues flushing and pages remapping. - */ switch (exit->u.synic.msr) { case HV_X64_MSR_SCONTROL: env->msr_hv_synic_control = exit->u.synic.control; @@ -54,98 +69,33 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) default: return -1; } + + /* + * this will run in this cpu thread before it returns to KVM, but in a + * safe environment (i.e. when all cpus are quiescent) -- this is + * necessary because memory hierarchy is being changed + */ + async_safe_run_on_cpu(CPU(cpu), async_synic_update, RUN_ON_CPU_NULL); + return 0; case KVM_EXIT_HYPERV_HCALL: { - uint16_t code; + uint16_t code = exit->u.hcall.input & 0xffff; + bool fast = exit->u.hcall.input & HV_HYPERCALL_FAST; + uint64_t param = exit->u.hcall.params[0]; - code = exit->u.hcall.input & 0xffff; switch (code) { case HV_POST_MESSAGE: + exit->u.hcall.result = hyperv_hcall_post_message(param, fast); + break; case HV_SIGNAL_EVENT: + exit->u.hcall.result = hyperv_hcall_signal_event(param, fast); + break; default: exit->u.hcall.result = HV_STATUS_INVALID_HYPERCALL_CODE; - return 0; } + return 0; } default: return -1; } } - -static void kvm_hv_sint_ack_handler(EventNotifier *notifier) -{ - HvSintRoute *sint_route = container_of(notifier, HvSintRoute, - sint_ack_notifier); - event_notifier_test_and_clear(notifier); - if (sint_route->sint_ack_clb) { - sint_route->sint_ack_clb(sint_route); - } -} - -HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, - HvSintAckClb sint_ack_clb) -{ - HvSintRoute *sint_route; - int r, gsi; - - sint_route = g_malloc0(sizeof(*sint_route)); - r = event_notifier_init(&sint_route->sint_set_notifier, false); - if (r) { - goto err; - } - - r = event_notifier_init(&sint_route->sint_ack_notifier, false); - if (r) { - goto err_sint_set_notifier; - } - - event_notifier_set_handler(&sint_route->sint_ack_notifier, - kvm_hv_sint_ack_handler); - - gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint); - if (gsi < 0) { - goto err_gsi; - } - - r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, - &sint_route->sint_set_notifier, - &sint_route->sint_ack_notifier, gsi); - if (r) { - goto err_irqfd; - } - sint_route->gsi = gsi; - sint_route->sint_ack_clb = sint_ack_clb; - sint_route->vp_index = vp_index; - sint_route->sint = sint; - - return sint_route; - -err_irqfd: - kvm_irqchip_release_virq(kvm_state, gsi); -err_gsi: - event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); - event_notifier_cleanup(&sint_route->sint_ack_notifier); -err_sint_set_notifier: - event_notifier_cleanup(&sint_route->sint_set_notifier); -err: - g_free(sint_route); - - return NULL; -} - -void kvm_hv_sint_route_destroy(HvSintRoute *sint_route) -{ - kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, - &sint_route->sint_set_notifier, - sint_route->gsi); - kvm_irqchip_release_virq(kvm_state, sint_route->gsi); - event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); - event_notifier_cleanup(&sint_route->sint_ack_notifier); - event_notifier_cleanup(&sint_route->sint_set_notifier); - g_free(sint_route); -} - -int kvm_hv_sint_route_set_sint(HvSintRoute *sint_route) -{ - return event_notifier_set(&sint_route->sint_set_notifier); -} diff --git a/target/i386/hyperv.h b/target/i386/hyperv.h index 00c9b454bb..67543296c3 100644 --- a/target/i386/hyperv.h +++ b/target/i386/hyperv.h @@ -16,30 +16,14 @@ #include "cpu.h" #include "sysemu/kvm.h" -#include "qemu/event_notifier.h" - -typedef struct HvSintRoute HvSintRoute; -typedef void (*HvSintAckClb)(HvSintRoute *sint_route); - -struct HvSintRoute { - uint32_t sint; - uint32_t vp_index; - int gsi; - EventNotifier sint_set_notifier; - EventNotifier sint_ack_notifier; - HvSintAckClb sint_ack_clb; -}; +#include "hw/hyperv/hyperv.h" +#ifdef CONFIG_KVM int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit); +#endif -HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, - HvSintAckClb sint_ack_clb); - -void kvm_hv_sint_route_destroy(HvSintRoute *sint_route); - -int kvm_hv_sint_route_set_sint(HvSintRoute *sint_route); - -uint32_t hyperv_vp_index(X86CPU *cpu); -X86CPU *hyperv_find_vcpu(uint32_t vp_index); +int hyperv_x86_synic_add(X86CPU *cpu); +void hyperv_x86_synic_reset(X86CPU *cpu); +void hyperv_x86_synic_update(X86CPU *cpu); #endif diff --git a/target/i386/kvm.c b/target/i386/kvm.c index dc4047b02f..115d8b4c14 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -608,7 +608,8 @@ static bool hyperv_enabled(X86CPU *cpu) cpu->hyperv_synic || cpu->hyperv_stimer || cpu->hyperv_reenlightenment || - cpu->hyperv_tlbflush); + cpu->hyperv_tlbflush || + cpu->hyperv_ipi); } static int kvm_arch_set_tsc_khz(CPUState *cs) @@ -733,9 +734,20 @@ static int hyperv_handle_properties(CPUState *cs) env->features[FEAT_HYPERV_EAX] |= HV_VP_RUNTIME_AVAILABLE; } if (cpu->hyperv_synic) { - if (!has_msr_hv_synic || - kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_SYNIC, 0)) { - fprintf(stderr, "Hyper-V SynIC is not supported by kernel\n"); + unsigned int cap = KVM_CAP_HYPERV_SYNIC; + if (!cpu->hyperv_synic_kvm_only) { + if (!cpu->hyperv_vpindex) { + fprintf(stderr, "Hyper-V SynIC " + "(requested by 'hv-synic' cpu flag) " + "requires Hyper-V VP_INDEX ('hv-vpindex')\n"); + return -ENOSYS; + } + cap = KVM_CAP_HYPERV_SYNIC2; + } + + if (!has_msr_hv_synic || !kvm_check_extension(cs->kvm_state, cap)) { + fprintf(stderr, "Hyper-V SynIC (requested by 'hv-synic' cpu flag) " + "is not supported by kernel\n"); return -ENOSYS; } @@ -753,12 +765,14 @@ static int hyperv_handle_properties(CPUState *cs) static int hyperv_init_vcpu(X86CPU *cpu) { + CPUState *cs = CPU(cpu); + int ret; + if (cpu->hyperv_vpindex && !hv_vpindex_settable) { /* * the kernel doesn't support setting vp_index; assert that its value * is in sync */ - int ret; struct { struct kvm_msrs info; struct kvm_msr_entry entries[1]; @@ -767,18 +781,38 @@ static int hyperv_init_vcpu(X86CPU *cpu) .entries[0].index = HV_X64_MSR_VP_INDEX, }; - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); + ret = kvm_vcpu_ioctl(cs, KVM_GET_MSRS, &msr_data); if (ret < 0) { return ret; } assert(ret == 1); - if (msr_data.entries[0].data != hyperv_vp_index(cpu)) { + if (msr_data.entries[0].data != hyperv_vp_index(CPU(cpu))) { error_report("kernel's vp_index != QEMU's vp_index"); return -ENXIO; } } + if (cpu->hyperv_synic) { + uint32_t synic_cap = cpu->hyperv_synic_kvm_only ? + KVM_CAP_HYPERV_SYNIC : KVM_CAP_HYPERV_SYNIC2; + ret = kvm_vcpu_enable_cap(cs, synic_cap, 0); + if (ret < 0) { + error_report("failed to turn on HyperV SynIC in KVM: %s", + strerror(-ret)); + return ret; + } + + if (!cpu->hyperv_synic_kvm_only) { + ret = hyperv_x86_synic_add(cpu); + if (ret < 0) { + error_report("failed to create HyperV SynIC: %s", + strerror(-ret)); + return ret; + } + } + } + return 0; } @@ -888,6 +922,17 @@ int kvm_arch_init_vcpu(CPUState *cs) c->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; } + if (cpu->hyperv_ipi) { + if (kvm_check_extension(cs->kvm_state, + KVM_CAP_HYPERV_SEND_IPI) <= 0) { + fprintf(stderr, "Hyper-V IPI send support " + "(requested by 'hv-ipi' cpu flag) " + " is not supported by kernel\n"); + return -ENOSYS; + } + c->eax |= HV_CLUSTER_IPI_RECOMMENDED; + c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; + } c->ebx = cpu->hyperv_spinlock_attempts; @@ -1153,7 +1198,7 @@ int kvm_arch_init_vcpu(CPUState *cs) if (local_err) { error_report_err(local_err); error_free(invtsc_mig_blocker); - goto fail; + return r; } /* for savevm */ vmstate_x86_cpu.unmigratable = 1; @@ -1226,6 +1271,8 @@ void kvm_arch_reset_vcpu(X86CPU *cpu) for (i = 0; i < ARRAY_SIZE(env->msr_hv_synic_sint); i++) { env->msr_hv_synic_sint[i] = HV_SINT_MASKED; } + + hyperv_x86_synic_reset(cpu); } } @@ -1937,7 +1984,8 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_add(cpu, HV_X64_MSR_VP_RUNTIME, env->msr_hv_runtime); } if (cpu->hyperv_vpindex && hv_vpindex_settable) { - kvm_msr_entry_add(cpu, HV_X64_MSR_VP_INDEX, hyperv_vp_index(cpu)); + kvm_msr_entry_add(cpu, HV_X64_MSR_VP_INDEX, + hyperv_vp_index(CPU(cpu))); } if (cpu->hyperv_synic) { int j; @@ -2686,7 +2734,6 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) events.exception.nr = env->exception_injected; events.exception.has_error_code = env->has_error_code; events.exception.error_code = env->error_code; - events.exception.pad = 0; events.interrupt.injected = (env->interrupt_injected >= 0); events.interrupt.nr = env->interrupt_injected; @@ -2695,7 +2742,6 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) events.nmi.injected = env->nmi_injected; events.nmi.pending = env->nmi_pending; events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK); - events.nmi.pad = 0; events.sipi_vector = env->sipi_vector; events.flags = 0; diff --git a/target/i386/machine.c b/target/i386/machine.c index 084c2c73a8..225b5d433b 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -7,6 +7,7 @@ #include "hw/i386/pc.h" #include "hw/isa/isa.h" #include "migration/cpu.h" +#include "hyperv.h" #include "sysemu/kvm.h" @@ -672,11 +673,19 @@ static bool hyperv_synic_enable_needed(void *opaque) return false; } +static int hyperv_synic_post_load(void *opaque, int version_id) +{ + X86CPU *cpu = opaque; + hyperv_x86_synic_update(cpu); + return 0; +} + static const VMStateDescription vmstate_msr_hyperv_synic = { .name = "cpu/msr_hyperv_synic", .version_id = 1, .minimum_version_id = 1, .needed = hyperv_synic_enable_needed, + .post_load = hyperv_synic_post_load, .fields = (VMStateField[]) { VMSTATE_UINT64(env.msr_hv_synic_control, X86CPU), VMSTATE_UINT64(env.msr_hv_synic_evt_page, X86CPU), diff --git a/target/i386/mem_helper.c b/target/i386/mem_helper.c index 30c26b9d9c..6cc53bcb40 100644 --- a/target/i386/mem_helper.c +++ b/target/i386/mem_helper.c @@ -23,6 +23,7 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "qemu/int128.h" +#include "qemu/atomic128.h" #include "tcg.h" void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0) @@ -137,10 +138,7 @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0) if ((a0 & 0xf) != 0) { raise_exception_ra(env, EXCP0D_GPF, ra); - } else { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else + } else if (HAVE_CMPXCHG128) { int eflags = cpu_cc_compute_all(env, CC_OP); Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]); @@ -159,7 +157,8 @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0) eflags &= ~CC_Z; } CC_SRC = eflags; -#endif + } else { + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); } } #endif diff --git a/target/mips/cpu.h b/target/mips/cpu.h index 28af4d191c..e48be4b334 100644 --- a/target/mips/cpu.h +++ b/target/mips/cpu.h @@ -195,10 +195,125 @@ struct CPUMIPSState { #define MSAIR_ProcID 8 #define MSAIR_Rev 0 +/* + * Summary of CP0 registers + * ======================== + * + * + * Register 0 Register 1 Register 2 Register 3 + * ---------- ---------- ---------- ---------- + * + * 0 Index Random EntryLo0 EntryLo1 + * 1 MVPControl VPEControl TCStatus GlobalNumber + * 2 MVPConf0 VPEConf0 TCBind + * 3 MVPConf1 VPEConf1 TCRestart + * 4 VPControl YQMask TCHalt + * 5 VPESchedule TCContext + * 6 VPEScheFBack TCSchedule + * 7 VPEOpt TCScheFBack TCOpt + * + * + * Register 4 Register 5 Register 6 Register 7 + * ---------- ---------- ---------- ---------- + * + * 0 Context PageMask Wired HWREna + * 1 ContextConfig PageGrain SRSConf0 + * 2 UserLocal SegCtl0 SRSConf1 + * 3 XContextConfig SegCtl1 SRSConf2 + * 4 DebugContextID SegCtl2 SRSConf3 + * 5 MemoryMapID PWBase SRSConf4 + * 6 PWField PWCtl + * 7 PWSize + * + * + * Register 8 Register 9 Register 10 Register 11 + * ---------- ---------- ----------- ----------- + * + * 0 BadVAddr Count EntryHi Compare + * 1 BadInstr + * 2 BadInstrP + * 3 BadInstrX + * 4 GuestCtl1 GuestCtl0Ext + * 5 GuestCtl2 + * 6 GuestCtl3 + * 7 + * + * + * Register 12 Register 13 Register 14 Register 15 + * ----------- ----------- ----------- ----------- + * + * 0 Status Cause EPC PRId + * 1 IntCtl EBase + * 2 SRSCtl NestedEPC CDMMBase + * 3 SRSMap CMGCRBase + * 4 View_IPL View_RIPL BEVVA + * 5 SRSMap2 NestedExc + * 6 GuestCtl0 + * 7 GTOffset + * + * + * Register 16 Register 17 Register 18 Register 19 + * ----------- ----------- ----------- ----------- + * + * 0 Config LLAddr WatchLo WatchHi + * 1 Config1 MAAR WatchLo WatchHi + * 2 Config2 MAARI WatchLo WatchHi + * 3 Config3 WatchLo WatchHi + * 4 Config4 WatchLo WatchHi + * 5 Config5 WatchLo WatchHi + * 6 WatchLo WatchHi + * 7 WatchLo WatchHi + * + * + * Register 20 Register 21 Register 22 Register 23 + * ----------- ----------- ----------- ----------- + * + * 0 XContext Debug + * 1 TraceControl + * 2 TraceControl2 + * 3 UserTraceData1 + * 4 TraceIBPC + * 5 TraceDBPC + * 6 Debug2 + * 7 + * + * + * Register 24 Register 25 Register 26 Register 27 + * ----------- ----------- ----------- ----------- + * + * 0 DEPC PerfCnt ErrCtl CacheErr + * 1 PerfCnt + * 2 TraceControl3 PerfCnt + * 3 UserTraceData2 PerfCnt + * 4 PerfCnt + * 5 PerfCnt + * 6 PerfCnt + * 7 PerfCnt + * + * + * Register 28 Register 29 Register 30 Register 31 + * ----------- ----------- ----------- ----------- + * + * 0 DataLo DataHi ErrorEPC DESAVE + * 1 TagLo TagHi + * 2 DataLo DataHi KScratch<n> + * 3 TagLo TagHi KScratch<n> + * 4 DataLo DataHi KScratch<n> + * 5 TagLo TagHi KScratch<n> + * 6 DataLo DataHi KScratch<n> + * 7 TagLo TagHi KScratch<n> + * + */ +/* + * CP0 Register 0 + */ int32_t CP0_Index; /* CP0_MVP* are per MVP registers. */ int32_t CP0_VPControl; #define CP0VPCtl_DIS 0 +/* + * CP0 Register 1 + */ int32_t CP0_Random; int32_t CP0_VPEControl; #define CP0VPECo_YSI 21 @@ -239,7 +354,13 @@ struct CPUMIPSState { #define CP0VPEOpt_DWX2 2 #define CP0VPEOpt_DWX1 1 #define CP0VPEOpt_DWX0 0 +/* + * CP0 Register 2 + */ uint64_t CP0_EntryLo0; +/* + * CP0 Register 3 + */ uint64_t CP0_EntryLo1; #if defined(TARGET_MIPS64) # define CP0EnLo_RI 63 @@ -250,8 +371,14 @@ struct CPUMIPSState { #endif int32_t CP0_GlobalNumber; #define CP0GN_VPId 0 +/* + * CP0 Register 4 + */ target_ulong CP0_Context; target_ulong CP0_KScratch[MIPS_KSCRATCH_NUM]; +/* + * CP0 Register 5 + */ int32_t CP0_PageMask; int32_t CP0_PageGrain_rw_bitmask; int32_t CP0_PageGrain; @@ -289,7 +416,47 @@ struct CPUMIPSState { #define CP0SC2_XR 56 #define CP0SC2_XR_MASK (0xFFULL << CP0SC2_XR) #define CP0SC2_MASK (CP0SC_1GMASK | (CP0SC_1GMASK << 16) | CP0SC2_XR_MASK) + target_ulong CP0_PWBase; + target_ulong CP0_PWField; +#if defined(TARGET_MIPS64) +#define CP0PF_BDI 32 /* 37..32 */ +#define CP0PF_GDI 24 /* 29..24 */ +#define CP0PF_UDI 18 /* 23..18 */ +#define CP0PF_MDI 12 /* 17..12 */ +#define CP0PF_PTI 6 /* 11..6 */ +#define CP0PF_PTEI 0 /* 5..0 */ +#else +#define CP0PF_GDW 24 /* 29..24 */ +#define CP0PF_UDW 18 /* 23..18 */ +#define CP0PF_MDW 12 /* 17..12 */ +#define CP0PF_PTW 6 /* 11..6 */ +#define CP0PF_PTEW 0 /* 5..0 */ +#endif + target_ulong CP0_PWSize; +#if defined(TARGET_MIPS64) +#define CP0PS_BDW 32 /* 37..32 */ +#endif +#define CP0PS_PS 30 +#define CP0PS_GDW 24 /* 29..24 */ +#define CP0PS_UDW 18 /* 23..18 */ +#define CP0PS_MDW 12 /* 17..12 */ +#define CP0PS_PTW 6 /* 11..6 */ +#define CP0PS_PTEW 0 /* 5..0 */ +/* + * CP0 Register 6 + */ int32_t CP0_Wired; + int32_t CP0_PWCtl; +#define CP0PC_PWEN 31 +#if defined(TARGET_MIPS64) +#define CP0PC_PWDIREXT 30 +#define CP0PC_XK 28 +#define CP0PC_XS 27 +#define CP0PC_XU 26 +#endif +#define CP0PC_DPH 7 +#define CP0PC_HUGEPG 6 +#define CP0PC_PSN 0 /* 5..0 */ int32_t CP0_SRSConf0_rw_bitmask; int32_t CP0_SRSConf0; #define CP0SRSC0_M 31 @@ -319,16 +486,34 @@ struct CPUMIPSState { #define CP0SRSC4_SRS15 20 #define CP0SRSC4_SRS14 10 #define CP0SRSC4_SRS13 0 +/* + * CP0 Register 7 + */ int32_t CP0_HWREna; +/* + * CP0 Register 8 + */ target_ulong CP0_BadVAddr; uint32_t CP0_BadInstr; uint32_t CP0_BadInstrP; uint32_t CP0_BadInstrX; +/* + * CP0 Register 9 + */ int32_t CP0_Count; +/* + * CP0 Register 10 + */ target_ulong CP0_EntryHi; #define CP0EnHi_EHINV 10 target_ulong CP0_EntryHi_ASID_mask; +/* + * CP0 Register 11 + */ int32_t CP0_Compare; +/* + * CP0 Register 12 + */ int32_t CP0_Status; #define CP0St_CU3 31 #define CP0St_CU2 30 @@ -370,6 +555,9 @@ struct CPUMIPSState { #define CP0SRSMap_SSV2 8 #define CP0SRSMap_SSV1 4 #define CP0SRSMap_SSV0 0 +/* + * CP0 Register 13 + */ int32_t CP0_Cause; #define CP0Ca_BD 31 #define CP0Ca_TI 30 @@ -381,12 +569,21 @@ struct CPUMIPSState { #define CP0Ca_IP 8 #define CP0Ca_IP_mask 0x0000FF00 #define CP0Ca_EC 2 +/* + * CP0 Register 14 + */ target_ulong CP0_EPC; +/* + * CP0 Register 15 + */ int32_t CP0_PRid; target_ulong CP0_EBase; target_ulong CP0_EBaseWG_rw_bitmask; #define CP0EBase_WG 11 target_ulong CP0_CMGCRBase; +/* + * CP0 Register 16 + */ int32_t CP0_Config0; #define CP0C0_M 31 #define CP0C0_K23 28 /* 30..28 */ @@ -503,6 +700,9 @@ struct CPUMIPSState { uint64_t CP0_MAAR[MIPS_MAAR_MAX]; int32_t CP0_MAARI; /* XXX: Maybe make LLAddr per-TC? */ +/* + * CP0 Register 17 + */ uint64_t lladdr; target_ulong llval; target_ulong llnewval; @@ -511,11 +711,23 @@ struct CPUMIPSState { target_ulong llreg; uint64_t CP0_LLAddr_rw_bitmask; int CP0_LLAddr_shift; +/* + * CP0 Register 18 + */ target_ulong CP0_WatchLo[8]; +/* + * CP0 Register 19 + */ int32_t CP0_WatchHi[8]; #define CP0WH_ASID 16 +/* + * CP0 Register 20 + */ target_ulong CP0_XContext; int32_t CP0_Framemask; +/* + * CP0 Register 23 + */ int32_t CP0_Debug; #define CP0DB_DBD 31 #define CP0DB_DM 30 @@ -535,18 +747,40 @@ struct CPUMIPSState { #define CP0DB_DDBL 2 #define CP0DB_DBp 1 #define CP0DB_DSS 0 +/* + * CP0 Register 24 + */ target_ulong CP0_DEPC; +/* + * CP0 Register 25 + */ int32_t CP0_Performance0; +/* + * CP0 Register 26 + */ int32_t CP0_ErrCtl; #define CP0EC_WST 29 #define CP0EC_SPR 28 #define CP0EC_ITC 26 +/* + * CP0 Register 28 + */ uint64_t CP0_TagLo; int32_t CP0_DataLo; +/* + * CP0 Register 29 + */ int32_t CP0_TagHi; int32_t CP0_DataHi; +/* + * CP0 Register 30 + */ target_ulong CP0_ErrorEPC; +/* + * CP0 Register 31 + */ int32_t CP0_DESAVE; + /* We waste some space so we can handle shadow registers like TCs. */ TCState tcs[MIPS_SHADOW_SET_MAX]; CPUMIPSFPUContext fpus[MIPS_FPU_MAX]; @@ -596,8 +830,9 @@ struct CPUMIPSState { #define MIPS_HFLAG_BX 0x40000 /* branch exchanges execution mode */ #define MIPS_HFLAG_BMASK (MIPS_HFLAG_BMASK_BASE | MIPS_HFLAG_BMASK_EXT) /* MIPS DSP resources access. */ -#define MIPS_HFLAG_DSP 0x080000 /* Enable access to MIPS DSP resources. */ -#define MIPS_HFLAG_DSPR2 0x100000 /* Enable access to MIPS DSPR2 resources. */ +#define MIPS_HFLAG_DSP 0x080000 /* Enable access to DSP resources. */ +#define MIPS_HFLAG_DSP_R2 0x100000 /* Enable access to DSP R2 resources. */ +#define MIPS_HFLAG_DSP_R3 0x20000000 /* Enable access to DSP R3 resources. */ /* Extra flag about HWREna register. */ #define MIPS_HFLAG_HWRENA_ULR 0x200000 /* ULR bit from HWREna is set. */ #define MIPS_HFLAG_SBRI 0x400000 /* R6 SDBBP causes RI excpt. in user mode */ @@ -614,7 +849,7 @@ struct CPUMIPSState { int CCRes; /* Cycle count resolution/divisor */ uint32_t CP0_Status_rw_bitmask; /* Read/write bits in CP0_Status */ uint32_t CP0_TCStatus_rw_bitmask; /* Read/write bits in CP0_TCStatus */ - int insn_flags; /* Supported instruction set */ + uint64_t insn_flags; /* Supported instruction set */ /* Fields up to this point are cleared by a CPU reset */ struct {} end_reset_fields; diff --git a/target/mips/helper.c b/target/mips/helper.c index f0c268b83c..8988452dbd 100644 --- a/target/mips/helper.c +++ b/target/mips/helper.c @@ -537,6 +537,342 @@ hwaddr mips_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) } #endif +#if !defined(CONFIG_USER_ONLY) +#if !defined(TARGET_MIPS64) + +/* + * Perform hardware page table walk + * + * Memory accesses are performed using the KERNEL privilege level. + * Synchronous exceptions detected on memory accesses cause a silent exit + * from page table walking, resulting in a TLB or XTLB Refill exception. + * + * Implementations are not required to support page table walk memory + * accesses from mapped memory regions. When an unsupported access is + * attempted, a silent exit is taken, resulting in a TLB or XTLB Refill + * exception. + * + * Note that if an exception is caused by AddressTranslation or LoadMemory + * functions, the exception is not taken, a silent exit is taken, + * resulting in a TLB or XTLB Refill exception. + */ + +static bool get_pte(CPUMIPSState *env, uint64_t vaddr, int entry_size, + uint64_t *pte) +{ + if ((vaddr & ((entry_size >> 3) - 1)) != 0) { + return false; + } + if (entry_size == 64) { + *pte = cpu_ldq_code(env, vaddr); + } else { + *pte = cpu_ldl_code(env, vaddr); + } + return true; +} + +static uint64_t get_tlb_entry_layout(CPUMIPSState *env, uint64_t entry, + int entry_size, int ptei) +{ + uint64_t result = entry; + uint64_t rixi; + if (ptei > entry_size) { + ptei -= 32; + } + result >>= (ptei - 2); + rixi = result & 3; + result >>= 2; + result |= rixi << CP0EnLo_XI; + return result; +} + +static int walk_directory(CPUMIPSState *env, uint64_t *vaddr, + int directory_index, bool *huge_page, bool *hgpg_directory_hit, + uint64_t *pw_entrylo0, uint64_t *pw_entrylo1) +{ + int dph = (env->CP0_PWCtl >> CP0PC_DPH) & 0x1; + int psn = (env->CP0_PWCtl >> CP0PC_PSN) & 0x3F; + int hugepg = (env->CP0_PWCtl >> CP0PC_HUGEPG) & 0x1; + int pf_ptew = (env->CP0_PWField >> CP0PF_PTEW) & 0x3F; + int ptew = (env->CP0_PWSize >> CP0PS_PTEW) & 0x3F; + int native_shift = (((env->CP0_PWSize >> CP0PS_PS) & 1) == 0) ? 2 : 3; + int directory_shift = (ptew > 1) ? -1 : + (hugepg && (ptew == 1)) ? native_shift + 1 : native_shift; + int leaf_shift = (ptew > 1) ? -1 : + (ptew == 1) ? native_shift + 1 : native_shift; + uint32_t direntry_size = 1 << (directory_shift + 3); + uint32_t leafentry_size = 1 << (leaf_shift + 3); + uint64_t entry; + uint64_t paddr; + int prot; + uint64_t lsb = 0; + uint64_t w = 0; + + if (get_physical_address(env, &paddr, &prot, *vaddr, MMU_DATA_LOAD, + ACCESS_INT, cpu_mmu_index(env, false)) != + TLBRET_MATCH) { + /* wrong base address */ + return 0; + } + if (!get_pte(env, *vaddr, direntry_size, &entry)) { + return 0; + } + + if ((entry & (1 << psn)) && hugepg) { + *huge_page = true; + *hgpg_directory_hit = true; + entry = get_tlb_entry_layout(env, entry, leafentry_size, pf_ptew); + w = directory_index - 1; + if (directory_index & 0x1) { + /* Generate adjacent page from same PTE for odd TLB page */ + lsb = (1 << w) >> 6; + *pw_entrylo0 = entry & ~lsb; /* even page */ + *pw_entrylo1 = entry | lsb; /* odd page */ + } else if (dph) { + int oddpagebit = 1 << leaf_shift; + uint64_t vaddr2 = *vaddr ^ oddpagebit; + if (*vaddr & oddpagebit) { + *pw_entrylo1 = entry; + } else { + *pw_entrylo0 = entry; + } + if (get_physical_address(env, &paddr, &prot, vaddr2, MMU_DATA_LOAD, + ACCESS_INT, cpu_mmu_index(env, false)) != + TLBRET_MATCH) { + return 0; + } + if (!get_pte(env, vaddr2, leafentry_size, &entry)) { + return 0; + } + entry = get_tlb_entry_layout(env, entry, leafentry_size, pf_ptew); + if (*vaddr & oddpagebit) { + *pw_entrylo0 = entry; + } else { + *pw_entrylo1 = entry; + } + } else { + return 0; + } + return 1; + } else { + *vaddr = entry; + return 2; + } +} + +static bool page_table_walk_refill(CPUMIPSState *env, vaddr address, int rw, + int mmu_idx) +{ + int gdw = (env->CP0_PWSize >> CP0PS_GDW) & 0x3F; + int udw = (env->CP0_PWSize >> CP0PS_UDW) & 0x3F; + int mdw = (env->CP0_PWSize >> CP0PS_MDW) & 0x3F; + int ptw = (env->CP0_PWSize >> CP0PS_PTW) & 0x3F; + int ptew = (env->CP0_PWSize >> CP0PS_PTEW) & 0x3F; + + /* Initial values */ + bool huge_page = false; + bool hgpg_bdhit = false; + bool hgpg_gdhit = false; + bool hgpg_udhit = false; + bool hgpg_mdhit = false; + + int32_t pw_pagemask = 0; + target_ulong pw_entryhi = 0; + uint64_t pw_entrylo0 = 0; + uint64_t pw_entrylo1 = 0; + + /* Native pointer size */ + /*For the 32-bit architectures, this bit is fixed to 0.*/ + int native_shift = (((env->CP0_PWSize >> CP0PS_PS) & 1) == 0) ? 2 : 3; + + /* Indices from PWField */ + int pf_gdw = (env->CP0_PWField >> CP0PF_GDW) & 0x3F; + int pf_udw = (env->CP0_PWField >> CP0PF_UDW) & 0x3F; + int pf_mdw = (env->CP0_PWField >> CP0PF_MDW) & 0x3F; + int pf_ptw = (env->CP0_PWField >> CP0PF_PTW) & 0x3F; + int pf_ptew = (env->CP0_PWField >> CP0PF_PTEW) & 0x3F; + + /* Indices computed from faulting address */ + int gindex = (address >> pf_gdw) & ((1 << gdw) - 1); + int uindex = (address >> pf_udw) & ((1 << udw) - 1); + int mindex = (address >> pf_mdw) & ((1 << mdw) - 1); + int ptindex = (address >> pf_ptw) & ((1 << ptw) - 1); + + /* Other HTW configs */ + int hugepg = (env->CP0_PWCtl >> CP0PC_HUGEPG) & 0x1; + + /* HTW Shift values (depend on entry size) */ + int directory_shift = (ptew > 1) ? -1 : + (hugepg && (ptew == 1)) ? native_shift + 1 : native_shift; + int leaf_shift = (ptew > 1) ? -1 : + (ptew == 1) ? native_shift + 1 : native_shift; + + /* Offsets into tables */ + int goffset = gindex << directory_shift; + int uoffset = uindex << directory_shift; + int moffset = mindex << directory_shift; + int ptoffset0 = (ptindex >> 1) << (leaf_shift + 1); + int ptoffset1 = ptoffset0 | (1 << (leaf_shift)); + + uint32_t leafentry_size = 1 << (leaf_shift + 3); + + /* Starting address - Page Table Base */ + uint64_t vaddr = env->CP0_PWBase; + + uint64_t dir_entry; + uint64_t paddr; + int prot; + int m; + + if (!(env->CP0_Config3 & (1 << CP0C3_PW))) { + /* walker is unimplemented */ + return false; + } + if (!(env->CP0_PWCtl & (1 << CP0PC_PWEN))) { + /* walker is disabled */ + return false; + } + if (!(gdw > 0 || udw > 0 || mdw > 0)) { + /* no structure to walk */ + return false; + } + if ((directory_shift == -1) || (leaf_shift == -1)) { + return false; + } + + /* Global Directory */ + if (gdw > 0) { + vaddr |= goffset; + switch (walk_directory(env, &vaddr, pf_gdw, &huge_page, &hgpg_gdhit, + &pw_entrylo0, &pw_entrylo1)) + { + case 0: + return false; + case 1: + goto refill; + case 2: + default: + break; + } + } + + /* Upper directory */ + if (udw > 0) { + vaddr |= uoffset; + switch (walk_directory(env, &vaddr, pf_udw, &huge_page, &hgpg_udhit, + &pw_entrylo0, &pw_entrylo1)) + { + case 0: + return false; + case 1: + goto refill; + case 2: + default: + break; + } + } + + /* Middle directory */ + if (mdw > 0) { + vaddr |= moffset; + switch (walk_directory(env, &vaddr, pf_mdw, &huge_page, &hgpg_mdhit, + &pw_entrylo0, &pw_entrylo1)) + { + case 0: + return false; + case 1: + goto refill; + case 2: + default: + break; + } + } + + /* Leaf Level Page Table - First half of PTE pair */ + vaddr |= ptoffset0; + if (get_physical_address(env, &paddr, &prot, vaddr, MMU_DATA_LOAD, + ACCESS_INT, cpu_mmu_index(env, false)) != + TLBRET_MATCH) { + return false; + } + if (!get_pte(env, vaddr, leafentry_size, &dir_entry)) { + return false; + } + dir_entry = get_tlb_entry_layout(env, dir_entry, leafentry_size, pf_ptew); + pw_entrylo0 = dir_entry; + + /* Leaf Level Page Table - Second half of PTE pair */ + vaddr |= ptoffset1; + if (get_physical_address(env, &paddr, &prot, vaddr, MMU_DATA_LOAD, + ACCESS_INT, cpu_mmu_index(env, false)) != + TLBRET_MATCH) { + return false; + } + if (!get_pte(env, vaddr, leafentry_size, &dir_entry)) { + return false; + } + dir_entry = get_tlb_entry_layout(env, dir_entry, leafentry_size, pf_ptew); + pw_entrylo1 = dir_entry; + +refill: + + m = (1 << pf_ptw) - 1; + + if (huge_page) { + switch (hgpg_bdhit << 3 | hgpg_gdhit << 2 | hgpg_udhit << 1 | + hgpg_mdhit) + { + case 4: + m = (1 << pf_gdw) - 1; + if (pf_gdw & 1) { + m >>= 1; + } + break; + case 2: + m = (1 << pf_udw) - 1; + if (pf_udw & 1) { + m >>= 1; + } + break; + case 1: + m = (1 << pf_mdw) - 1; + if (pf_mdw & 1) { + m >>= 1; + } + break; + } + } + pw_pagemask = m >> 12; + update_pagemask(env, pw_pagemask << 13, &pw_pagemask); + pw_entryhi = (address & ~0x1fff) | (env->CP0_EntryHi & 0xFF); + { + target_ulong tmp_entryhi = env->CP0_EntryHi; + int32_t tmp_pagemask = env->CP0_PageMask; + uint64_t tmp_entrylo0 = env->CP0_EntryLo0; + uint64_t tmp_entrylo1 = env->CP0_EntryLo1; + + env->CP0_EntryHi = pw_entryhi; + env->CP0_PageMask = pw_pagemask; + env->CP0_EntryLo0 = pw_entrylo0; + env->CP0_EntryLo1 = pw_entrylo1; + + /* + * The hardware page walker inserts a page into the TLB in a manner + * identical to a TLBWR instruction as executed by the software refill + * handler. + */ + r4k_helper_tlbwr(env); + + env->CP0_EntryHi = tmp_entryhi; + env->CP0_PageMask = tmp_pagemask; + env->CP0_EntryLo0 = tmp_entrylo0; + env->CP0_EntryLo1 = tmp_entrylo1; + } + return true; +} +#endif +#endif + int mips_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int size, int rw, int mmu_idx) { @@ -558,8 +894,7 @@ int mips_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int size, int rw, /* data access */ #if !defined(CONFIG_USER_ONLY) - /* XXX: put correct access by using cpu_restore_state() - correctly */ + /* XXX: put correct access by using cpu_restore_state() correctly */ access_type = ACCESS_INT; ret = get_physical_address(env, &physical, &prot, address, rw, access_type, mmu_idx); @@ -583,6 +918,32 @@ int mips_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int size, int rw, } else if (ret < 0) #endif { +#if !defined(CONFIG_USER_ONLY) +#if !defined(TARGET_MIPS64) + if ((ret == TLBRET_NOMATCH) && (env->tlb->nb_tlb > 1)) { + /* + * Memory reads during hardware page table walking are performed + * as if they were kernel-mode load instructions. + */ + int mode = (env->hflags & MIPS_HFLAG_KSU); + bool ret_walker; + env->hflags &= ~MIPS_HFLAG_KSU; + ret_walker = page_table_walk_refill(env, address, rw, mmu_idx); + env->hflags |= mode; + if (ret_walker) { + ret = get_physical_address(env, &physical, &prot, + address, rw, access_type, mmu_idx); + if (ret == TLBRET_MATCH) { + tlb_set_page(cs, address & TARGET_PAGE_MASK, + physical & TARGET_PAGE_MASK, prot | PAGE_EXEC, + mmu_idx, TARGET_PAGE_SIZE); + ret = 0; + return ret; + } + } + } +#endif +#endif raise_mmu_exception(env, address, rw, ret); ret = 1; } diff --git a/target/mips/helper.h b/target/mips/helper.h index b2a780a6f2..c23e4e5d97 100644 --- a/target/mips/helper.h +++ b/target/mips/helper.h @@ -120,6 +120,8 @@ DEF_HELPER_2(mtc0_pagegrain, void, env, tl) DEF_HELPER_2(mtc0_segctl0, void, env, tl) DEF_HELPER_2(mtc0_segctl1, void, env, tl) DEF_HELPER_2(mtc0_segctl2, void, env, tl) +DEF_HELPER_2(mtc0_pwfield, void, env, tl) +DEF_HELPER_2(mtc0_pwsize, void, env, tl) DEF_HELPER_2(mtc0_wired, void, env, tl) DEF_HELPER_2(mtc0_srsconf0, void, env, tl) DEF_HELPER_2(mtc0_srsconf1, void, env, tl) @@ -127,6 +129,7 @@ DEF_HELPER_2(mtc0_srsconf2, void, env, tl) DEF_HELPER_2(mtc0_srsconf3, void, env, tl) DEF_HELPER_2(mtc0_srsconf4, void, env, tl) DEF_HELPER_2(mtc0_hwrena, void, env, tl) +DEF_HELPER_2(mtc0_pwctl, void, env, tl) DEF_HELPER_2(mtc0_count, void, env, tl) DEF_HELPER_2(mtc0_entryhi, void, env, tl) DEF_HELPER_2(mttc0_entryhi, void, env, tl) diff --git a/target/mips/internal.h b/target/mips/internal.h index e41051f8e6..8b1b2456af 100644 --- a/target/mips/internal.h +++ b/target/mips/internal.h @@ -59,7 +59,7 @@ struct mips_def_t { int32_t CP0_PageGrain_rw_bitmask; int32_t CP0_PageGrain; target_ulong CP0_EBaseWG_rw_bitmask; - int insn_flags; + uint64_t insn_flags; enum mips_mmu_types mmu_type; }; @@ -211,6 +211,7 @@ uint64_t float_class_d(uint64_t arg, float_status *fst); extern unsigned int ieee_rm[]; int ieee_ex_to_mips(int xcpt); +void update_pagemask(CPUMIPSState *env, target_ulong arg1, int32_t *pagemask); static inline void restore_rounding_mode(CPUMIPSState *env) { @@ -306,9 +307,9 @@ static inline void compute_hflags(CPUMIPSState *env) { env->hflags &= ~(MIPS_HFLAG_COP1X | MIPS_HFLAG_64 | MIPS_HFLAG_CP0 | MIPS_HFLAG_F64 | MIPS_HFLAG_FPU | MIPS_HFLAG_KSU | - MIPS_HFLAG_AWRAP | MIPS_HFLAG_DSP | MIPS_HFLAG_DSPR2 | - MIPS_HFLAG_SBRI | MIPS_HFLAG_MSA | MIPS_HFLAG_FRE | - MIPS_HFLAG_ELPA | MIPS_HFLAG_ERL); + MIPS_HFLAG_AWRAP | MIPS_HFLAG_DSP | MIPS_HFLAG_DSP_R2 | + MIPS_HFLAG_DSP_R3 | MIPS_HFLAG_SBRI | MIPS_HFLAG_MSA | + MIPS_HFLAG_FRE | MIPS_HFLAG_ELPA | MIPS_HFLAG_ERL); if (env->CP0_Status & (1 << CP0St_ERL)) { env->hflags |= MIPS_HFLAG_ERL; } @@ -355,16 +356,29 @@ static inline void compute_hflags(CPUMIPSState *env) (env->CP0_Config5 & (1 << CP0C5_SBRI))) { env->hflags |= MIPS_HFLAG_SBRI; } - if (env->insn_flags & ASE_DSPR2) { - /* Enables access MIPS DSP resources, now our cpu is DSP ASER2, - so enable to access DSPR2 resources. */ + if (env->insn_flags & ASE_DSP_R3) { + /* + * Our cpu supports DSP R3 ASE, so enable + * access to DSP R3 resources. + */ if (env->CP0_Status & (1 << CP0St_MX)) { - env->hflags |= MIPS_HFLAG_DSP | MIPS_HFLAG_DSPR2; + env->hflags |= MIPS_HFLAG_DSP | MIPS_HFLAG_DSP_R2 | + MIPS_HFLAG_DSP_R3; + } + } else if (env->insn_flags & ASE_DSP_R2) { + /* + * Our cpu supports DSP R2 ASE, so enable + * access to DSP R2 resources. + */ + if (env->CP0_Status & (1 << CP0St_MX)) { + env->hflags |= MIPS_HFLAG_DSP | MIPS_HFLAG_DSP_R2; } } else if (env->insn_flags & ASE_DSP) { - /* Enables access MIPS DSP resources, now our cpu is DSP ASE, - so enable to access DSP resources. */ + /* + * Our cpu supports DSP ASE, so enable + * access to DSP resources. + */ if (env->CP0_Status & (1 << CP0St_MX)) { env->hflags |= MIPS_HFLAG_DSP; } diff --git a/target/mips/machine.c b/target/mips/machine.c index 5ba78acd6d..70a8909b90 100644 --- a/target/mips/machine.c +++ b/target/mips/machine.c @@ -212,8 +212,8 @@ const VMStateDescription vmstate_tlb = { const VMStateDescription vmstate_mips_cpu = { .name = "cpu", - .version_id = 11, - .minimum_version_id = 11, + .version_id = 15, + .minimum_version_id = 15, .post_load = cpu_post_load, .fields = (VMStateField[]) { /* Active TC */ @@ -256,7 +256,11 @@ const VMStateDescription vmstate_mips_cpu = { VMSTATE_UINTTL(env.CP0_SegCtl0, MIPSCPU), VMSTATE_UINTTL(env.CP0_SegCtl1, MIPSCPU), VMSTATE_UINTTL(env.CP0_SegCtl2, MIPSCPU), + VMSTATE_UINTTL(env.CP0_PWBase, MIPSCPU), + VMSTATE_UINTTL(env.CP0_PWField, MIPSCPU), + VMSTATE_UINTTL(env.CP0_PWSize, MIPSCPU), VMSTATE_INT32(env.CP0_Wired, MIPSCPU), + VMSTATE_INT32(env.CP0_PWCtl, MIPSCPU), VMSTATE_INT32(env.CP0_SRSConf0, MIPSCPU), VMSTATE_INT32(env.CP0_SRSConf1, MIPSCPU), VMSTATE_INT32(env.CP0_SRSConf2, MIPSCPU), diff --git a/target/mips/mips-defs.h b/target/mips/mips-defs.h index c8e99791ad..5177618615 100644 --- a/target/mips/mips-defs.h +++ b/target/mips/mips-defs.h @@ -22,40 +22,53 @@ #endif #endif -/* Masks used to mark instructions to indicate which ISA level they - were introduced in. */ -#define ISA_MIPS1 0x00000001 -#define ISA_MIPS2 0x00000002 -#define ISA_MIPS3 0x00000004 -#define ISA_MIPS4 0x00000008 -#define ISA_MIPS5 0x00000010 -#define ISA_MIPS32 0x00000020 -#define ISA_MIPS32R2 0x00000040 -#define ISA_MIPS64 0x00000080 -#define ISA_MIPS64R2 0x00000100 -#define ISA_MIPS32R3 0x00000200 -#define ISA_MIPS64R3 0x00000400 -#define ISA_MIPS32R5 0x00000800 -#define ISA_MIPS64R5 0x00001000 -#define ISA_MIPS32R6 0x00002000 -#define ISA_MIPS64R6 0x00004000 -#define ISA_NANOMIPS32 0x00008000 - -/* MIPS ASEs. */ -#define ASE_MIPS16 0x00010000 -#define ASE_MIPS3D 0x00020000 -#define ASE_MDMX 0x00040000 -#define ASE_DSP 0x00080000 -#define ASE_DSPR2 0x00100000 -#define ASE_MT 0x00200000 -#define ASE_SMARTMIPS 0x00400000 -#define ASE_MICROMIPS 0x00800000 -#define ASE_MSA 0x01000000 - -/* Chip specific instructions. */ -#define INSN_LOONGSON2E 0x20000000 -#define INSN_LOONGSON2F 0x40000000 -#define INSN_VR54XX 0x80000000 +/* + * bit definitions for insn_flags (ISAs/ASEs flags) + * ------------------------------------------------ + */ +/* + * bits 0-31: MIPS base instruction sets + */ +#define ISA_MIPS1 0x0000000000000001ULL +#define ISA_MIPS2 0x0000000000000002ULL +#define ISA_MIPS3 0x0000000000000004ULL +#define ISA_MIPS4 0x0000000000000008ULL +#define ISA_MIPS5 0x0000000000000010ULL +#define ISA_MIPS32 0x0000000000000020ULL +#define ISA_MIPS32R2 0x0000000000000040ULL +#define ISA_MIPS64 0x0000000000000080ULL +#define ISA_MIPS64R2 0x0000000000000100ULL +#define ISA_MIPS32R3 0x0000000000000200ULL +#define ISA_MIPS64R3 0x0000000000000400ULL +#define ISA_MIPS32R5 0x0000000000000800ULL +#define ISA_MIPS64R5 0x0000000000001000ULL +#define ISA_MIPS32R6 0x0000000000002000ULL +#define ISA_MIPS64R6 0x0000000000004000ULL +#define ISA_NANOMIPS32 0x0000000000008000ULL +/* + * bits 32-47: MIPS ASEs + */ +#define ASE_MIPS16 0x0000000100000000ULL +#define ASE_MIPS3D 0x0000000200000000ULL +#define ASE_MDMX 0x0000000400000000ULL +#define ASE_DSP 0x0000000800000000ULL +#define ASE_DSP_R2 0x0000001000000000ULL +#define ASE_DSP_R3 0x0000002000000000ULL +#define ASE_MT 0x0000004000000000ULL +#define ASE_SMARTMIPS 0x0000008000000000ULL +#define ASE_MICROMIPS 0x0000010000000000ULL +#define ASE_MSA 0x0000020000000000ULL +/* + * bits 48-55: vendor-specific base instruction sets + */ +#define INSN_LOONGSON2E 0x0001000000000000ULL +#define INSN_LOONGSON2F 0x0002000000000000ULL +#define INSN_VR54XX 0x0004000000000000ULL +#define INSN_R5900 0x0008000000000000ULL +/* + * bits 56-63: vendor-specific ASEs + */ +#define ASE_MMI 0x0100000000000000ULL /* MIPS CPU defines. */ #define CPU_MIPS1 (ISA_MIPS1) @@ -63,6 +76,7 @@ #define CPU_MIPS3 (CPU_MIPS2 | ISA_MIPS3) #define CPU_MIPS4 (CPU_MIPS3 | ISA_MIPS4) #define CPU_VR54XX (CPU_MIPS4 | INSN_VR54XX) +#define CPU_R5900 (CPU_MIPS3 | INSN_R5900) #define CPU_LOONGSON2E (CPU_MIPS3 | INSN_LOONGSON2E) #define CPU_LOONGSON2F (CPU_MIPS3 | INSN_LOONGSON2F) diff --git a/target/mips/op_helper.c b/target/mips/op_helper.c index c148b310cd..d1f1d1aa35 100644 --- a/target/mips/op_helper.c +++ b/target/mips/op_helper.c @@ -1400,7 +1400,7 @@ void helper_mtc0_context(CPUMIPSState *env, target_ulong arg1) env->CP0_Context = (env->CP0_Context & 0x007FFFFF) | (arg1 & ~0x007FFFFF); } -void helper_mtc0_pagemask(CPUMIPSState *env, target_ulong arg1) +void update_pagemask(CPUMIPSState *env, target_ulong arg1, int32_t *pagemask) { uint64_t mask = arg1 >> (TARGET_PAGE_BITS + 1); if (!(env->insn_flags & ISA_MIPS32R6) || (arg1 == ~0) || @@ -1411,6 +1411,11 @@ void helper_mtc0_pagemask(CPUMIPSState *env, target_ulong arg1) } } +void helper_mtc0_pagemask(CPUMIPSState *env, target_ulong arg1) +{ + update_pagemask(env, arg1, &env->CP0_PageMask); +} + void helper_mtc0_pagegrain(CPUMIPSState *env, target_ulong arg1) { /* SmartMIPS not implemented */ @@ -1445,6 +1450,77 @@ void helper_mtc0_segctl2(CPUMIPSState *env, target_ulong arg1) tlb_flush(cs); } +void helper_mtc0_pwfield(CPUMIPSState *env, target_ulong arg1) +{ +#if defined(TARGET_MIPS64) + uint64_t mask = 0x3F3FFFFFFFULL; + uint32_t old_ptei = (env->CP0_PWField >> CP0PF_PTEI) & 0x3FULL; + uint32_t new_ptei = (arg1 >> CP0PF_PTEI) & 0x3FULL; + + if ((env->insn_flags & ISA_MIPS32R6)) { + if (((arg1 >> CP0PF_BDI) & 0x3FULL) < 12) { + mask &= ~(0x3FULL << CP0PF_BDI); + } + if (((arg1 >> CP0PF_GDI) & 0x3FULL) < 12) { + mask &= ~(0x3FULL << CP0PF_GDI); + } + if (((arg1 >> CP0PF_UDI) & 0x3FULL) < 12) { + mask &= ~(0x3FULL << CP0PF_UDI); + } + if (((arg1 >> CP0PF_MDI) & 0x3FULL) < 12) { + mask &= ~(0x3FULL << CP0PF_MDI); + } + if (((arg1 >> CP0PF_PTI) & 0x3FULL) < 12) { + mask &= ~(0x3FULL << CP0PF_PTI); + } + } + env->CP0_PWField = arg1 & mask; + + if ((new_ptei >= 32) || + ((env->insn_flags & ISA_MIPS32R6) && + (new_ptei == 0 || new_ptei == 1))) { + env->CP0_PWField = (env->CP0_PWField & ~0x3FULL) | + (old_ptei << CP0PF_PTEI); + } +#else + uint32_t mask = 0x3FFFFFFF; + uint32_t old_ptew = (env->CP0_PWField >> CP0PF_PTEW) & 0x3F; + uint32_t new_ptew = (arg1 >> CP0PF_PTEW) & 0x3F; + + if ((env->insn_flags & ISA_MIPS32R6)) { + if (((arg1 >> CP0PF_GDW) & 0x3F) < 12) { + mask &= ~(0x3F << CP0PF_GDW); + } + if (((arg1 >> CP0PF_UDW) & 0x3F) < 12) { + mask &= ~(0x3F << CP0PF_UDW); + } + if (((arg1 >> CP0PF_MDW) & 0x3F) < 12) { + mask &= ~(0x3F << CP0PF_MDW); + } + if (((arg1 >> CP0PF_PTW) & 0x3F) < 12) { + mask &= ~(0x3F << CP0PF_PTW); + } + } + env->CP0_PWField = arg1 & mask; + + if ((new_ptew >= 32) || + ((env->insn_flags & ISA_MIPS32R6) && + (new_ptew == 0 || new_ptew == 1))) { + env->CP0_PWField = (env->CP0_PWField & ~0x3F) | + (old_ptew << CP0PF_PTEW); + } +#endif +} + +void helper_mtc0_pwsize(CPUMIPSState *env, target_ulong arg1) +{ +#if defined(TARGET_MIPS64) + env->CP0_PWSize = arg1 & 0x3F7FFFFFFFULL; +#else + env->CP0_PWSize = arg1 & 0x3FFFFFFF; +#endif +} + void helper_mtc0_wired(CPUMIPSState *env, target_ulong arg1) { if (env->insn_flags & ISA_MIPS32R6) { @@ -1456,6 +1532,16 @@ void helper_mtc0_wired(CPUMIPSState *env, target_ulong arg1) } } +void helper_mtc0_pwctl(CPUMIPSState *env, target_ulong arg1) +{ +#if defined(TARGET_MIPS64) + /* PWEn = 0. Hardware page table walking is not implemented. */ + env->CP0_PWCtl = (env->CP0_PWCtl & 0x000000C0) | (arg1 & 0x5C00003F); +#else + env->CP0_PWCtl = (arg1 & 0x800000FF); +#endif +} + void helper_mtc0_srsconf0(CPUMIPSState *env, target_ulong arg1) { env->CP0_SRSConf0 |= arg1 & env->CP0_SRSConf0_rw_bitmask; diff --git a/target/mips/translate.c b/target/mips/translate.c index ab16cdb911..c44a751be9 100644 --- a/target/mips/translate.c +++ b/target/mips/translate.c @@ -1,5 +1,5 @@ /* - * MIPS32 emulation for qemu: main translation routines. + * MIPS emulation for QEMU - main translation routines * * Copyright (c) 2004-2005 Jocelyn Mayer * Copyright (c) 2006 Marius Groeger (FPU operations) @@ -463,8 +463,10 @@ enum { OPC_WSBH = (0x02 << 6) | OPC_BSHFL, OPC_SEB = (0x10 << 6) | OPC_BSHFL, OPC_SEH = (0x18 << 6) | OPC_BSHFL, - OPC_ALIGN = (0x08 << 6) | OPC_BSHFL, /* 010.bp */ - OPC_ALIGN_END = (0x0B << 6) | OPC_BSHFL, /* 010.00 to 010.11 */ + OPC_ALIGN = (0x08 << 6) | OPC_BSHFL, /* 010.bp (010.00 to 010.11) */ + OPC_ALIGN_1 = (0x09 << 6) | OPC_BSHFL, + OPC_ALIGN_2 = (0x0A << 6) | OPC_BSHFL, + OPC_ALIGN_3 = (0x0B << 6) | OPC_BSHFL, OPC_BITSWAP = (0x00 << 6) | OPC_BSHFL /* 00000 */ }; @@ -474,8 +476,14 @@ enum { enum { OPC_DSBH = (0x02 << 6) | OPC_DBSHFL, OPC_DSHD = (0x05 << 6) | OPC_DBSHFL, - OPC_DALIGN = (0x08 << 6) | OPC_DBSHFL, /* 01.bp */ - OPC_DALIGN_END = (0x0F << 6) | OPC_DBSHFL, /* 01.000 to 01.111 */ + OPC_DALIGN = (0x08 << 6) | OPC_DBSHFL, /* 01.bp (01.000 to 01.111) */ + OPC_DALIGN_1 = (0x09 << 6) | OPC_DBSHFL, + OPC_DALIGN_2 = (0x0A << 6) | OPC_DBSHFL, + OPC_DALIGN_3 = (0x0B << 6) | OPC_DBSHFL, + OPC_DALIGN_4 = (0x0C << 6) | OPC_DBSHFL, + OPC_DALIGN_5 = (0x0D << 6) | OPC_DBSHFL, + OPC_DALIGN_6 = (0x0E << 6) | OPC_DBSHFL, + OPC_DALIGN_7 = (0x0F << 6) | OPC_DBSHFL, OPC_DBITSWAP = (0x00 << 6) | OPC_DBSHFL, /* 00000 */ }; @@ -1389,6 +1397,979 @@ enum { OPC_BINSRI_df = (0x7 << 23) | OPC_MSA_BIT_09, }; + +/* + * AN OVERVIEW OF MXU EXTENSION INSTRUCTION SET + * ============================================ + * + * MXU (full name: MIPS eXtension/enhanced Unit) is an SIMD extension of MIPS32 + * instructions set. It is designed to fit the needs of signal, graphical and + * video processing applications. MXU instruction set is used in Xburst family + * of microprocessors by Ingenic. + * + * MXU unit contains 17 registers called X0-X16. X0 is always zero, and X16 is + * the control register. + * + * The notation used in MXU assembler mnemonics: + * + * XRa, XRb, XRc, XRd - MXU registers + * Rb, Rc, Rd, Rs, Rt - general purpose MIPS registers + * s12 - a subfield of an instruction code + * strd2 - a subfield of an instruction code + * eptn2 - a subfield of an instruction code + * eptn3 - a subfield of an instruction code + * optn2 - a subfield of an instruction code + * optn3 - a subfield of an instruction code + * sft4 - a subfield of an instruction code + * + * Load/Store instructions Multiplication instructions + * ----------------------- --------------------------- + * + * S32LDD XRa, Rb, s12 S32MADD XRa, XRd, Rs, Rt + * S32STD XRa, Rb, s12 S32MADDU XRa, XRd, Rs, Rt + * S32LDDV XRa, Rb, rc, strd2 S32SUB XRa, XRd, Rs, Rt + * S32STDV XRa, Rb, rc, strd2 S32SUBU XRa, XRd, Rs, Rt + * S32LDI XRa, Rb, s12 S32MUL XRa, XRd, Rs, Rt + * S32SDI XRa, Rb, s12 S32MULU XRa, XRd, Rs, Rt + * S32LDIV XRa, Rb, rc, strd2 D16MUL XRa, XRb, XRc, XRd, optn2 + * S32SDIV XRa, Rb, rc, strd2 D16MULE XRa, XRb, XRc, optn2 + * S32LDDR XRa, Rb, s12 D16MULF XRa, XRb, XRc, optn2 + * S32STDR XRa, Rb, s12 D16MAC XRa, XRb, XRc, XRd, aptn2, optn2 + * S32LDDVR XRa, Rb, rc, strd2 D16MACE XRa, XRb, XRc, XRd, aptn2, optn2 + * S32STDVR XRa, Rb, rc, strd2 D16MACF XRa, XRb, XRc, XRd, aptn2, optn2 + * S32LDIR XRa, Rb, s12 D16MADL XRa, XRb, XRc, XRd, aptn2, optn2 + * S32SDIR XRa, Rb, s12 S16MAD XRa, XRb, XRc, XRd, aptn1, optn2 + * S32LDIVR XRa, Rb, rc, strd2 Q8MUL XRa, XRb, XRc, XRd + * S32SDIVR XRa, Rb, rc, strd2 Q8MULSU XRa, XRb, XRc, XRd + * S16LDD XRa, Rb, s10, eptn2 Q8MAC XRa, XRb, XRc, XRd, aptn2 + * S16STD XRa, Rb, s10, eptn2 Q8MACSU XRa, XRb, XRc, XRd, aptn2 + * S16LDI XRa, Rb, s10, eptn2 Q8MADL XRa, XRb, XRc, XRd, aptn2 + * S16SDI XRa, Rb, s10, eptn2 + * S8LDD XRa, Rb, s8, eptn3 + * S8STD XRa, Rb, s8, eptn3 Addition and subtraction instructions + * S8LDI XRa, Rb, s8, eptn3 ------------------------------------- + * S8SDI XRa, Rb, s8, eptn3 + * LXW Rd, Rs, Rt, strd2 D32ADD XRa, XRb, XRc, XRd, eptn2 + * LXH Rd, Rs, Rt, strd2 D32ADDC XRa, XRb, XRc, XRd + * LXHU Rd, Rs, Rt, strd2 D32ACC XRa, XRb, XRc, XRd, eptn2 + * LXB Rd, Rs, Rt, strd2 D32ACCM XRa, XRb, XRc, XRd, eptn2 + * LXBU Rd, Rs, Rt, strd2 D32ASUM XRa, XRb, XRc, XRd, eptn2 + * S32CPS XRa, XRb, XRc + * Q16ADD XRa, XRb, XRc, XRd, eptn2, optn2 + * Comparison instructions Q16ACC XRa, XRb, XRc, XRd, eptn2 + * ----------------------- Q16ACCM XRa, XRb, XRc, XRd, eptn2 + * D16ASUM XRa, XRb, XRc, XRd, eptn2 + * S32MAX XRa, XRb, XRc D16CPS XRa, XRb, + * S32MIN XRa, XRb, XRc D16AVG XRa, XRb, XRc + * S32SLT XRa, XRb, XRc D16AVGR XRa, XRb, XRc + * S32MOVZ XRa, XRb, XRc Q8ADD XRa, XRb, XRc, eptn2 + * S32MOVN XRa, XRb, XRc Q8ADDE XRa, XRb, XRc, XRd, eptn2 + * D16MAX XRa, XRb, XRc Q8ACCE XRa, XRb, XRc, XRd, eptn2 + * D16MIN XRa, XRb, XRc Q8ABD XRa, XRb, XRc + * D16SLT XRa, XRb, XRc Q8SAD XRa, XRb, XRc, XRd + * D16MOVZ XRa, XRb, XRc Q8AVG XRa, XRb, XRc + * D16MOVN XRa, XRb, XRc Q8AVGR XRa, XRb, XRc + * Q8MAX XRa, XRb, XRc D8SUM XRa, XRb, XRc, XRd + * Q8MIN XRa, XRb, XRc D8SUMC XRa, XRb, XRc, XRd + * Q8SLT XRa, XRb, XRc + * Q8SLTU XRa, XRb, XRc + * Q8MOVZ XRa, XRb, XRc Shift instructions + * Q8MOVN XRa, XRb, XRc ------------------ + * + * D32SLL XRa, XRb, XRc, XRd, sft4 + * Bitwise instructions D32SLR XRa, XRb, XRc, XRd, sft4 + * -------------------- D32SAR XRa, XRb, XRc, XRd, sft4 + * D32SARL XRa, XRb, XRc, sft4 + * S32NOR XRa, XRb, XRc D32SLLV XRa, XRb, Rb + * S32AND XRa, XRb, XRc D32SLRV XRa, XRb, Rb + * S32XOR XRa, XRb, XRc D32SARV XRa, XRb, Rb + * S32OR XRa, XRb, XRc D32SARW XRa, XRb, XRc, Rb + * Q16SLL XRa, XRb, XRc, XRd, sft4 + * Q16SLR XRa, XRb, XRc, XRd, sft4 + * Miscelaneous instructions Q16SAR XRa, XRb, XRc, XRd, sft4 + * ------------------------- Q16SLLV XRa, XRb, Rb + * Q16SLRV XRa, XRb, Rb + * S32SFL XRa, XRb, XRc, XRd, optn2 Q16SARV XRa, XRb, Rb + * S32ALN XRa, XRb, XRc, Rb + * S32ALNI XRa, XRb, XRc, s3 + * S32LUI XRa, s8, optn3 Move instructions + * S32EXTR XRa, XRb, Rb, bits5 ----------------- + * S32EXTRV XRa, XRb, Rs, Rt + * Q16SCOP XRa, XRb, XRc, XRd S32M2I XRa, Rb + * Q16SAT XRa, XRb, XRc S32I2M XRa, Rb + * + * + * bits + * 05..00 + * + * ┌─ 000000 ─ OPC_MXU_S32MADD + * ├─ 000001 ─ OPC_MXU_S32MADDU + * ├─ 000010 ─ <not assigned> + * │ 20..18 + * ├─ 000011 ─ OPC_MXU__POOL00 ─┬─ 000 ─ OPC_MXU_S32MAX + * │ ├─ 001 ─ OPC_MXU_S32MIN + * │ ├─ 010 ─ OPC_MXU_D16MAX + * │ ├─ 011 ─ OPC_MXU_D16MIN + * │ ├─ 100 ─ OPC_MXU_Q8MAX + * │ ├─ 101 ─ OPC_MXU_Q8MIN + * │ ├─ 110 ─ OPC_MXU_Q8SLT + * │ └─ 111 ─ OPC_MXU_Q8SLTU + * ├─ 000100 ─ OPC_MXU_S32MSUB + * ├─ 000101 ─ OPC_MXU_S32MSUBU 20..18 + * ├─ 000110 ─ OPC_MXU__POOL01 ─┬─ 000 ─ OPC_MXU_S32SLT + * │ ├─ 001 ─ OPC_MXU_D16SLT + * │ ├─ 010 ─ OPC_MXU_D16AVG + * │ ├─ 011 ─ OPC_MXU_D16AVGR + * │ ├─ 100 ─ OPC_MXU_Q8AVG + * │ ├─ 101 ─ OPC_MXU_Q8AVGR + * │ └─ 111 ─ OPC_MXU_Q8ADD + * │ + * │ 20..18 + * ├─ 000111 ─ OPC_MXU__POOL02 ─┬─ 000 ─ OPC_MXU_S32CPS + * │ ├─ 010 ─ OPC_MXU_D16CPS + * │ ├─ 100 ─ OPC_MXU_Q8ABD + * │ └─ 110 ─ OPC_MXU_Q16SAT + * ├─ 001000 ─ OPC_MXU_D16MUL + * │ 25..24 + * ├─ 001001 ─ OPC_MXU__POOL03 ─┬─ 00 ─ OPC_MXU_D16MULF + * │ └─ 01 ─ OPC_MXU_D16MULE + * ├─ 001010 ─ OPC_MXU_D16MAC + * ├─ 001011 ─ OPC_MXU_D16MACF + * ├─ 001100 ─ OPC_MXU_D16MADL + * │ 25..24 + * ├─ 001101 ─ OPC_MXU__POOL04 ─┬─ 00 ─ OPC_MXU_S16MAD + * │ └─ 01 ─ OPC_MXU_S16MAD_1 + * ├─ 001110 ─ OPC_MXU_Q16ADD + * ├─ 001111 ─ OPC_MXU_D16MACE + * │ 23 + * ├─ 010000 ─ OPC_MXU__POOL05 ─┬─ 0 ─ OPC_MXU_S32LDD + * │ └─ 1 ─ OPC_MXU_S32LDDR + * │ + * │ 23 + * ├─ 010001 ─ OPC_MXU__POOL06 ─┬─ 0 ─ OPC_MXU_S32STD + * │ └─ 1 ─ OPC_MXU_S32STDR + * │ + * │ 13..10 + * ├─ 010010 ─ OPC_MXU__POOL07 ─┬─ 0000 ─ OPC_MXU_S32LDDV + * │ └─ 0001 ─ OPC_MXU_S32LDDVR + * │ + * │ 13..10 + * ├─ 010011 ─ OPC_MXU__POOL08 ─┬─ 0000 ─ OPC_MXU_S32STDV + * │ └─ 0001 ─ OPC_MXU_S32STDVR + * │ + * │ 23 + * ├─ 010100 ─ OPC_MXU__POOL09 ─┬─ 0 ─ OPC_MXU_S32LDI + * │ └─ 1 ─ OPC_MXU_S32LDIR + * │ + * │ 23 + * ├─ 010101 ─ OPC_MXU__POOL10 ─┬─ 0 ─ OPC_MXU_S32SDI + * │ └─ 1 ─ OPC_MXU_S32SDIR + * │ + * │ 13..10 + * ├─ 010110 ─ OPC_MXU__POOL11 ─┬─ 0000 ─ OPC_MXU_S32LDIV + * │ └─ 0001 ─ OPC_MXU_S32LDIVR + * │ + * │ 13..10 + * ├─ 010111 ─ OPC_MXU__POOL12 ─┬─ 0000 ─ OPC_MXU_S32SDIV + * │ └─ 0001 ─ OPC_MXU_S32SDIVR + * ├─ 011000 ─ OPC_MXU_D32ADD + * │ 23..22 + * MXU ├─ 011001 ─ OPC_MXU__POOL13 ─┬─ 00 ─ OPC_MXU_D32ACC + * opcodes ─┤ ├─ 01 ─ OPC_MXU_D32ACCM + * │ └─ 10 ─ OPC_MXU_D32ASUM + * ├─ 011010 ─ <not assigned> + * │ 23..22 + * ├─ 011011 ─ OPC_MXU__POOL14 ─┬─ 00 ─ OPC_MXU_Q16ACC + * │ ├─ 01 ─ OPC_MXU_Q16ACCM + * │ └─ 10 ─ OPC_MXU_Q16ASUM + * │ + * │ 23..22 + * ├─ 011100 ─ OPC_MXU__POOL15 ─┬─ 00 ─ OPC_MXU_Q8ADDE + * │ ├─ 01 ─ OPC_MXU_D8SUM + * ├─ 011101 ─ OPC_MXU_Q8ACCE └─ 10 ─ OPC_MXU_D8SUMC + * ├─ 011110 ─ <not assigned> + * ├─ 011111 ─ <not assigned> + * ├─ 100000 ─ <not assigned> + * ├─ 100001 ─ <not assigned> + * ├─ 100010 ─ OPC_MXU_S8LDD + * ├─ 100011 ─ OPC_MXU_S8STD + * ├─ 100100 ─ OPC_MXU_S8LDI + * ├─ 100101 ─ OPC_MXU_S8SDI + * │ 15..14 + * ├─ 100110 ─ OPC_MXU__POOL16 ─┬─ 00 ─ OPC_MXU_S32MUL + * │ ├─ 00 ─ OPC_MXU_S32MULU + * │ ├─ 00 ─ OPC_MXU_S32EXTR + * │ └─ 00 ─ OPC_MXU_S32EXTRV + * │ + * │ 20..18 + * ├─ 100111 ─ OPC_MXU__POOL17 ─┬─ 000 ─ OPC_MXU_D32SARW + * │ ├─ 001 ─ OPC_MXU_S32ALN + * ├─ 101000 ─ OPC_MXU_LXB ├─ 010 ─ OPC_MXU_S32ALNI + * ├─ 101001 ─ <not assigned> ├─ 011 ─ OPC_MXU_S32NOR + * ├─ 101010 ─ OPC_MXU_S16LDD ├─ 100 ─ OPC_MXU_S32AND + * ├─ 101011 ─ OPC_MXU_S16STD ├─ 101 ─ OPC_MXU_S32OR + * ├─ 101100 ─ OPC_MXU_S16LDI ├─ 110 ─ OPC_MXU_S32XOR + * ├─ 101101 ─ OPC_MXU_S16SDI └─ 111 ─ OPC_MXU_S32LUI + * ├─ 101000 ─ <not assigned> + * ├─ 101001 ─ <not assigned> + * ├─ 101010 ─ <not assigned> + * ├─ 101011 ─ <not assigned> + * ├─ 101100 ─ <not assigned> + * ├─ 101101 ─ <not assigned> + * ├─ 101110 ─ OPC_MXU_S32M2I + * ├─ 101111 ─ OPC_MXU_S32I2M + * ├─ 110000 ─ OPC_MXU_D32SLL + * ├─ 110001 ─ OPC_MXU_D32SLR + * ├─ 110010 ─ OPC_MXU_D32SARL + * ├─ 110011 ─ OPC_MXU_D32SAR + * ├─ 110100 ─ OPC_MXU_Q16SLL + * ├─ 110101 ─ OPC_MXU_Q16SLR 20..18 + * ├─ 110110 ─ OPC_MXU__POOL18 ─┬─ 000 ─ OPC_MXU_D32SLLV + * │ ├─ 001 ─ OPC_MXU_D32SLRV + * │ ├─ 010 ─ OPC_MXU_D32SARV + * │ ├─ 011 ─ OPC_MXU_Q16SLLV + * │ ├─ 100 ─ OPC_MXU_Q16SLRV + * │ └─ 101 ─ OPC_MXU_Q16SARV + * ├─ 110111 ─ OPC_MXU_Q16SAR + * │ 23..22 + * ├─ 111000 ─ OPC_MXU__POOL19 ─┬─ 00 ─ OPC_MXU_Q8MUL + * │ └─ 01 ─ OPC_MXU_Q8MULSU + * │ + * │ 20..18 + * ├─ 111001 ─ OPC_MXU__POOL20 ─┬─ 000 ─ OPC_MXU_Q8MOVZ + * │ ├─ 001 ─ OPC_MXU_Q8MOVN + * │ ├─ 010 ─ OPC_MXU_D16MOVZ + * │ ├─ 011 ─ OPC_MXU_D16MOVN + * │ ├─ 100 ─ OPC_MXU_S32MOVZ + * │ └─ 101 ─ OPC_MXU_S32MOV + * │ + * │ 23..22 + * ├─ 111010 ─ OPC_MXU__POOL21 ─┬─ 00 ─ OPC_MXU_Q8MAC + * │ └─ 10 ─ OPC_MXU_Q8MACSU + * ├─ 111011 ─ OPC_MXU_Q16SCOP + * ├─ 111100 ─ OPC_MXU_Q8MADL + * ├─ 111101 ─ OPC_MXU_S32SFL + * ├─ 111110 ─ OPC_MXU_Q8SAD + * └─ 111111 ─ <not assigned> + * + * + * Compiled after: + * + * "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit + * Programming Manual", Ingenic Semiconductor Co, Ltd., 2017 + */ + +enum { + OPC_MXU_S32MADD = 0x00, + OPC_MXU_S32MADDU = 0x01, + /* not assigned 0x02 */ + OPC_MXU__POOL00 = 0x03, + OPC_MXU_S32MSUB = 0x04, + OPC_MXU_S32MSUBU = 0x05, + OPC_MXU__POOL01 = 0x06, + OPC_MXU__POOL02 = 0x07, + OPC_MXU_D16MUL = 0x08, + OPC_MXU__POOL03 = 0x09, + OPC_MXU_D16MAC = 0x0A, + OPC_MXU_D16MACF = 0x0B, + OPC_MXU_D16MADL = 0x0C, + OPC_MXU__POOL04 = 0x0D, + OPC_MXU_Q16ADD = 0x0E, + OPC_MXU_D16MACE = 0x0F, + OPC_MXU__POOL05 = 0x10, + OPC_MXU__POOL06 = 0x11, + OPC_MXU__POOL07 = 0x12, + OPC_MXU__POOL08 = 0x13, + OPC_MXU__POOL09 = 0x14, + OPC_MXU__POOL10 = 0x15, + OPC_MXU__POOL11 = 0x16, + OPC_MXU__POOL12 = 0x17, + OPC_MXU_D32ADD = 0x18, + OPC_MXU__POOL13 = 0x19, + /* not assigned 0x1A */ + OPC_MXU__POOL14 = 0x1B, + OPC_MXU__POOL15 = 0x1C, + OPC_MXU_Q8ACCE = 0x1D, + /* not assigned 0x1E */ + /* not assigned 0x1F */ + /* not assigned 0x20 */ + /* not assigned 0x21 */ + OPC_MXU_S8LDD = 0x22, + OPC_MXU_S8STD = 0x23, + OPC_MXU_S8LDI = 0x24, + OPC_MXU_S8SDI = 0x25, + OPC_MXU__POOL16 = 0x26, + OPC_MXU__POOL17 = 0x27, + OPC_MXU_LXB = 0x28, + /* not assigned 0x29 */ + OPC_MXU_S16LDD = 0x2A, + OPC_MXU_S16STD = 0x2B, + OPC_MXU_S16LDI = 0x2C, + OPC_MXU_S16SDI = 0x2D, + OPC_MXU_S32M2I = 0x2E, + OPC_MXU_S32I2M = 0x2F, + OPC_MXU_D32SLL = 0x30, + OPC_MXU_D32SLR = 0x31, + OPC_MXU_D32SARL = 0x32, + OPC_MXU_D32SAR = 0x33, + OPC_MXU_Q16SLL = 0x34, + OPC_MXU_Q16SLR = 0x35, + OPC_MXU__POOL18 = 0x36, + OPC_MXU_Q16SAR = 0x37, + OPC_MXU__POOL19 = 0x38, + OPC_MXU__POOL20 = 0x39, + OPC_MXU__POOL21 = 0x3A, + OPC_MXU_Q16SCOP = 0x3B, + OPC_MXU_Q8MADL = 0x3C, + OPC_MXU_S32SFL = 0x3D, + OPC_MXU_Q8SAD = 0x3E, + /* not assigned 0x3F */ +}; + + +/* + * MXU pool 00 + */ +enum { + OPC_MXU_S32MAX = 0x00, + OPC_MXU_S32MIN = 0x01, + OPC_MXU_D16MAX = 0x02, + OPC_MXU_D16MIN = 0x03, + OPC_MXU_Q8MAX = 0x04, + OPC_MXU_Q8MIN = 0x05, + OPC_MXU_Q8SLT = 0x06, + OPC_MXU_Q8SLTU = 0x07, +}; + +/* + * MXU pool 01 + */ +enum { + OPC_MXU_S32SLT = 0x00, + OPC_MXU_D16SLT = 0x01, + OPC_MXU_D16AVG = 0x02, + OPC_MXU_D16AVGR = 0x03, + OPC_MXU_Q8AVG = 0x04, + OPC_MXU_Q8AVGR = 0x05, + OPC_MXU_Q8ADD = 0x07, +}; + +/* + * MXU pool 02 + */ +enum { + OPC_MXU_S32CPS = 0x00, + OPC_MXU_D16CPS = 0x02, + OPC_MXU_Q8ABD = 0x04, + OPC_MXU_Q16SAT = 0x06, +}; + +/* + * MXU pool 03 + */ +enum { + OPC_MXU_D16MULF = 0x00, + OPC_MXU_D16MULE = 0x01, +}; + +/* + * MXU pool 04 + */ +enum { + OPC_MXU_S16MAD = 0x00, + OPC_MXU_S16MAD_1 = 0x01, +}; + +/* + * MXU pool 05 + */ +enum { + OPC_MXU_S32LDD = 0x00, + OPC_MXU_S32LDDR = 0x01, +}; + +/* + * MXU pool 06 + */ +enum { + OPC_MXU_S32STD = 0x00, + OPC_MXU_S32STDR = 0x01, +}; + +/* + * MXU pool 07 + */ +enum { + OPC_MXU_S32LDDV = 0x00, + OPC_MXU_S32LDDVR = 0x01, +}; + +/* + * MXU pool 08 + */ +enum { + OPC_MXU_S32STDV = 0x00, + OPC_MXU_S32STDVR = 0x01, +}; + +/* + * MXU pool 09 + */ +enum { + OPC_MXU_S32LDI = 0x00, + OPC_MXU_S32LDIR = 0x01, +}; + +/* + * MXU pool 10 + */ +enum { + OPC_MXU_S32SDI = 0x00, + OPC_MXU_S32SDIR = 0x01, +}; + +/* + * MXU pool 11 + */ +enum { + OPC_MXU_S32LDIV = 0x00, + OPC_MXU_S32LDIVR = 0x01, +}; + +/* + * MXU pool 12 + */ +enum { + OPC_MXU_S32SDIV = 0x00, + OPC_MXU_S32SDIVR = 0x01, +}; + +/* + * MXU pool 13 + */ +enum { + OPC_MXU_D32ACC = 0x00, + OPC_MXU_D32ACCM = 0x01, + OPC_MXU_D32ASUM = 0x02, +}; + +/* + * MXU pool 14 + */ +enum { + OPC_MXU_Q16ACC = 0x00, + OPC_MXU_Q16ACCM = 0x01, + OPC_MXU_Q16ASUM = 0x02, +}; + +/* + * MXU pool 15 + */ +enum { + OPC_MXU_Q8ADDE = 0x00, + OPC_MXU_D8SUM = 0x01, + OPC_MXU_D8SUMC = 0x02, +}; + +/* + * MXU pool 16 + */ +enum { + OPC_MXU_S32MUL = 0x00, + OPC_MXU_S32MULU = 0x01, + OPC_MXU_S32EXTR = 0x02, + OPC_MXU_S32EXTRV = 0x03, +}; + +/* + * MXU pool 17 + */ +enum { + OPC_MXU_D32SARW = 0x00, + OPC_MXU_S32ALN = 0x01, + OPC_MXU_S32ALNI = 0x02, + OPC_MXU_S32NOR = 0x03, + OPC_MXU_S32AND = 0x04, + OPC_MXU_S32OR = 0x05, + OPC_MXU_S32XOR = 0x06, + OPC_MXU_S32LUI = 0x07, +}; + +/* + * MXU pool 18 + */ +enum { + OPC_MXU_D32SLLV = 0x00, + OPC_MXU_D32SLRV = 0x01, + OPC_MXU_D32SARV = 0x03, + OPC_MXU_Q16SLLV = 0x04, + OPC_MXU_Q16SLRV = 0x05, + OPC_MXU_Q16SARV = 0x07, +}; + +/* + * MXU pool 19 + */ +enum { + OPC_MXU_Q8MUL = 0x00, + OPC_MXU_Q8MULSU = 0x01, +}; + +/* + * MXU pool 20 + */ +enum { + OPC_MXU_Q8MOVZ = 0x00, + OPC_MXU_Q8MOVN = 0x01, + OPC_MXU_D16MOVZ = 0x02, + OPC_MXU_D16MOVN = 0x03, + OPC_MXU_S32MOVZ = 0x04, + OPC_MXU_S32MOVN = 0x05, +}; + +/* + * MXU pool 21 + */ +enum { + OPC_MXU_Q8MAC = 0x00, + OPC_MXU_Q8MACSU = 0x01, +}; + +/* + * Overview of the TX79-specific instruction set + * ============================================= + * + * The R5900 and the C790 have 128-bit wide GPRs, where the upper 64 bits + * are only used by the specific quadword (128-bit) LQ/SQ load/store + * instructions and certain multimedia instructions (MMIs). These MMIs + * configure the 128-bit data path as two 64-bit, four 32-bit, eight 16-bit + * or sixteen 8-bit paths. + * + * Reference: + * + * The Toshiba TX System RISC TX79 Core Architecture manual, + * https://wiki.qemu.org/File:C790.pdf + * + * Three-Operand Multiply and Multiply-Add (4 instructions) + * -------------------------------------------------------- + * MADD [rd,] rs, rt Multiply/Add + * MADDU [rd,] rs, rt Multiply/Add Unsigned + * MULT [rd,] rs, rt Multiply (3-operand) + * MULTU [rd,] rs, rt Multiply Unsigned (3-operand) + * + * Multiply Instructions for Pipeline 1 (10 instructions) + * ------------------------------------------------------ + * MULT1 [rd,] rs, rt Multiply Pipeline 1 + * MULTU1 [rd,] rs, rt Multiply Unsigned Pipeline 1 + * DIV1 rs, rt Divide Pipeline 1 + * DIVU1 rs, rt Divide Unsigned Pipeline 1 + * MADD1 [rd,] rs, rt Multiply-Add Pipeline 1 + * MADDU1 [rd,] rs, rt Multiply-Add Unsigned Pipeline 1 + * MFHI1 rd Move From HI1 Register + * MFLO1 rd Move From LO1 Register + * MTHI1 rs Move To HI1 Register + * MTLO1 rs Move To LO1 Register + * + * Arithmetic (19 instructions) + * ---------------------------- + * PADDB rd, rs, rt Parallel Add Byte + * PSUBB rd, rs, rt Parallel Subtract Byte + * PADDH rd, rs, rt Parallel Add Halfword + * PSUBH rd, rs, rt Parallel Subtract Halfword + * PADDW rd, rs, rt Parallel Add Word + * PSUBW rd, rs, rt Parallel Subtract Word + * PADSBH rd, rs, rt Parallel Add/Subtract Halfword + * PADDSB rd, rs, rt Parallel Add with Signed Saturation Byte + * PSUBSB rd, rs, rt Parallel Subtract with Signed Saturation Byte + * PADDSH rd, rs, rt Parallel Add with Signed Saturation Halfword + * PSUBSH rd, rs, rt Parallel Subtract with Signed Saturation Halfword + * PADDSW rd, rs, rt Parallel Add with Signed Saturation Word + * PSUBSW rd, rs, rt Parallel Subtract with Signed Saturation Word + * PADDUB rd, rs, rt Parallel Add with Unsigned saturation Byte + * PSUBUB rd, rs, rt Parallel Subtract with Unsigned saturation Byte + * PADDUH rd, rs, rt Parallel Add with Unsigned saturation Halfword + * PSUBUH rd, rs, rt Parallel Subtract with Unsigned saturation Halfword + * PADDUW rd, rs, rt Parallel Add with Unsigned saturation Word + * PSUBUW rd, rs, rt Parallel Subtract with Unsigned saturation Word + * + * Min/Max (4 instructions) + * ------------------------ + * PMAXH rd, rs, rt Parallel Maximum Halfword + * PMINH rd, rs, rt Parallel Minimum Halfword + * PMAXW rd, rs, rt Parallel Maximum Word + * PMINW rd, rs, rt Parallel Minimum Word + * + * Absolute (2 instructions) + * ------------------------- + * PABSH rd, rt Parallel Absolute Halfword + * PABSW rd, rt Parallel Absolute Word + * + * Logical (4 instructions) + * ------------------------ + * PAND rd, rs, rt Parallel AND + * POR rd, rs, rt Parallel OR + * PXOR rd, rs, rt Parallel XOR + * PNOR rd, rs, rt Parallel NOR + * + * Shift (9 instructions) + * ---------------------- + * PSLLH rd, rt, sa Parallel Shift Left Logical Halfword + * PSRLH rd, rt, sa Parallel Shift Right Logical Halfword + * PSRAH rd, rt, sa Parallel Shift Right Arithmetic Halfword + * PSLLW rd, rt, sa Parallel Shift Left Logical Word + * PSRLW rd, rt, sa Parallel Shift Right Logical Word + * PSRAW rd, rt, sa Parallel Shift Right Arithmetic Word + * PSLLVW rd, rt, rs Parallel Shift Left Logical Variable Word + * PSRLVW rd, rt, rs Parallel Shift Right Logical Variable Word + * PSRAVW rd, rt, rs Parallel Shift Right Arithmetic Variable Word + * + * Compare (6 instructions) + * ------------------------ + * PCGTB rd, rs, rt Parallel Compare for Greater Than Byte + * PCEQB rd, rs, rt Parallel Compare for Equal Byte + * PCGTH rd, rs, rt Parallel Compare for Greater Than Halfword + * PCEQH rd, rs, rt Parallel Compare for Equal Halfword + * PCGTW rd, rs, rt Parallel Compare for Greater Than Word + * PCEQW rd, rs, rt Parallel Compare for Equal Word + * + * LZC (1 instruction) + * ------------------- + * PLZCW rd, rs Parallel Leading Zero or One Count Word + * + * Quadword Load and Store (2 instructions) + * ---------------------------------------- + * LQ rt, offset(base) Load Quadword + * SQ rt, offset(base) Store Quadword + * + * Multiply and Divide (19 instructions) + * ------------------------------------- + * PMULTW rd, rs, rt Parallel Multiply Word + * PMULTUW rd, rs, rt Parallel Multiply Unsigned Word + * PDIVW rs, rt Parallel Divide Word + * PDIVUW rs, rt Parallel Divide Unsigned Word + * PMADDW rd, rs, rt Parallel Multiply-Add Word + * PMADDUW rd, rs, rt Parallel Multiply-Add Unsigned Word + * PMSUBW rd, rs, rt Parallel Multiply-Subtract Word + * PMULTH rd, rs, rt Parallel Multiply Halfword + * PMADDH rd, rs, rt Parallel Multiply-Add Halfword + * PMSUBH rd, rs, rt Parallel Multiply-Subtract Halfword + * PHMADH rd, rs, rt Parallel Horizontal Multiply-Add Halfword + * PHMSBH rd, rs, rt Parallel Horizontal Multiply-Subtract Halfword + * PDIVBW rs, rt Parallel Divide Broadcast Word + * PMFHI rd Parallel Move From HI Register + * PMFLO rd Parallel Move From LO Register + * PMTHI rs Parallel Move To HI Register + * PMTLO rs Parallel Move To LO Register + * PMFHL rd Parallel Move From HI/LO Register + * PMTHL rs Parallel Move To HI/LO Register + * + * Pack/Extend (11 instructions) + * ----------------------------- + * PPAC5 rd, rt Parallel Pack to 5 bits + * PPACB rd, rs, rt Parallel Pack to Byte + * PPACH rd, rs, rt Parallel Pack to Halfword + * PPACW rd, rs, rt Parallel Pack to Word + * PEXT5 rd, rt Parallel Extend Upper from 5 bits + * PEXTUB rd, rs, rt Parallel Extend Upper from Byte + * PEXTLB rd, rs, rt Parallel Extend Lower from Byte + * PEXTUH rd, rs, rt Parallel Extend Upper from Halfword + * PEXTLH rd, rs, rt Parallel Extend Lower from Halfword + * PEXTUW rd, rs, rt Parallel Extend Upper from Word + * PEXTLW rd, rs, rt Parallel Extend Lower from Word + * + * Others (16 instructions) + * ------------------------ + * PCPYH rd, rt Parallel Copy Halfword + * PCPYLD rd, rs, rt Parallel Copy Lower Doubleword + * PCPYUD rd, rs, rt Parallel Copy Upper Doubleword + * PREVH rd, rt Parallel Reverse Halfword + * PINTH rd, rs, rt Parallel Interleave Halfword + * PINTEH rd, rs, rt Parallel Interleave Even Halfword + * PEXEH rd, rt Parallel Exchange Even Halfword + * PEXCH rd, rt Parallel Exchange Center Halfword + * PEXEW rd, rt Parallel Exchange Even Word + * PEXCW rd, rt Parallel Exchange Center Word + * QFSRV rd, rs, rt Quadword Funnel Shift Right Variable + * MFSA rd Move from Shift Amount Register + * MTSA rs Move to Shift Amount Register + * MTSAB rs, immediate Move Byte Count to Shift Amount Register + * MTSAH rs, immediate Move Halfword Count to Shift Amount Register + * PROT3W rd, rt Parallel Rotate 3 Words + * + * The TX79-specific Multimedia Instruction encodings + * ================================================== + * + * TX79 Multimedia Instruction encoding table keys: + * + * * This code is reserved for future use. An attempt to execute it + * causes a Reserved Instruction exception. + * % This code indicates an instruction class. The instruction word + * must be further decoded by examining additional tables that show + * the values for other instruction fields. + * # This code is reserved for the unsupported instructions DMULT, + * DMULTU, DDIV, DDIVU, LL, LLD, SC, SCD, LWC2 and SWC2. An attempt + * to execute it causes a Reserved Instruction exception. + * + * TX79 Multimedia Instructions encoded by opcode field (MMI, LQ, SQ): + * + * 31 26 0 + * +--------+----------------------------------------+ + * | opcode | | + * +--------+----------------------------------------+ + * + * opcode bits 28..26 + * bits | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 + * 31..29 | 000 | 001 | 010 | 011 | 100 | 101 | 110 | 111 + * -------+-------+-------+-------+-------+-------+-------+-------+------- + * 0 000 |SPECIAL| REGIMM| J | JAL | BEQ | BNE | BLEZ | BGTZ + * 1 001 | ADDI | ADDIU | SLTI | SLTIU | ANDI | ORI | XORI | LUI + * 2 010 | COP0 | COP1 | * | * | BEQL | BNEL | BLEZL | BGTZL + * 3 011 | DADDI | DADDIU| LDL | LDR | MMI% | * | LQ | SQ + * 4 100 | LB | LH | LWL | LW | LBU | LHU | LWR | LWU + * 5 101 | SB | SH | SWL | SW | SDL | SDR | SWR | CACHE + * 6 110 | # | LWC1 | # | PREF | # | LDC1 | # | LD + * 7 111 | # | SWC1 | # | * | # | SDC1 | # | SD + */ + +enum { + TX79_CLASS_MMI = 0x1C << 26, /* Same as OPC_SPECIAL2 */ + TX79_LQ = 0x1E << 26, /* Same as OPC_MSA */ + TX79_SQ = 0x1F << 26, /* Same as OPC_SPECIAL3 */ +}; + +/* + * TX79 Multimedia Instructions with opcode field = MMI: + * + * 31 26 5 0 + * +--------+-------------------------------+--------+ + * | MMI | |function| + * +--------+-------------------------------+--------+ + * + * function bits 2..0 + * bits | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 + * 5..3 | 000 | 001 | 010 | 011 | 100 | 101 | 110 | 111 + * -------+-------+-------+-------+-------+-------+-------+-------+------- + * 0 000 | MADD | MADDU | * | * | PLZCW | * | * | * + * 1 001 | MMI0% | MMI2% | * | * | * | * | * | * + * 2 010 | MFHI1 | MTHI1 | MFLO1 | MTLO1 | * | * | * | * + * 3 011 | MULT1 | MULTU1| DIV1 | DIVU1 | * | * | * | * + * 4 100 | MADD1 | MADDU1| * | * | * | * | * | * + * 5 101 | MMI1% | MMI3% | * | * | * | * | * | * + * 6 110 | PMFHL | PMTHL | * | * | PSLLH | * | PSRLH | PSRAH + * 7 111 | * | * | * | * | PSLLW | * | PSRLW | PSRAW + */ + +#define MASK_TX79_MMI(op) (MASK_OP_MAJOR(op) | ((op) & 0x3F)) +enum { + TX79_MMI_MADD = 0x00 | TX79_CLASS_MMI, /* Same as OPC_MADD */ + TX79_MMI_MADDU = 0x01 | TX79_CLASS_MMI, /* Same as OPC_MADDU */ + TX79_MMI_PLZCW = 0x04 | TX79_CLASS_MMI, + TX79_MMI_CLASS_MMI0 = 0x08 | TX79_CLASS_MMI, + TX79_MMI_CLASS_MMI2 = 0x09 | TX79_CLASS_MMI, + TX79_MMI_MFHI1 = 0x10 | TX79_CLASS_MMI, /* Same minor as OPC_MFHI */ + TX79_MMI_MTHI1 = 0x11 | TX79_CLASS_MMI, /* Same minor as OPC_MTHI */ + TX79_MMI_MFLO1 = 0x12 | TX79_CLASS_MMI, /* Same minor as OPC_MFLO */ + TX79_MMI_MTLO1 = 0x13 | TX79_CLASS_MMI, /* Same minor as OPC_MTLO */ + TX79_MMI_MULT1 = 0x18 | TX79_CLASS_MMI, /* Same minor as OPC_MULT */ + TX79_MMI_MULTU1 = 0x19 | TX79_CLASS_MMI, /* Same minor as OPC_MULTU */ + TX79_MMI_DIV1 = 0x1A | TX79_CLASS_MMI, /* Same minor as OPC_DIV */ + TX79_MMI_DIVU1 = 0x1B | TX79_CLASS_MMI, /* Same minor as OPC_DIVU */ + TX79_MMI_MADD1 = 0x20 | TX79_CLASS_MMI, + TX79_MMI_MADDU1 = 0x21 | TX79_CLASS_MMI, + TX79_MMI_CLASS_MMI1 = 0x28 | TX79_CLASS_MMI, + TX79_MMI_CLASS_MMI3 = 0x29 | TX79_CLASS_MMI, + TX79_MMI_PMFHL = 0x30 | TX79_CLASS_MMI, + TX79_MMI_PMTHL = 0x31 | TX79_CLASS_MMI, + TX79_MMI_PSLLH = 0x34 | TX79_CLASS_MMI, + TX79_MMI_PSRLH = 0x36 | TX79_CLASS_MMI, + TX79_MMI_PSRAH = 0x37 | TX79_CLASS_MMI, + TX79_MMI_PSLLW = 0x3C | TX79_CLASS_MMI, + TX79_MMI_PSRLW = 0x3E | TX79_CLASS_MMI, + TX79_MMI_PSRAW = 0x3F | TX79_CLASS_MMI, +}; + +/* + * TX79 Multimedia Instructions with opcode field = MMI and bits 5..0 = MMI0: + * + * 31 26 10 6 5 0 + * +--------+----------------------+--------+--------+ + * | MMI | |function| MMI0 | + * +--------+----------------------+--------+--------+ + * + * function bits 7..6 + * bits | 0 | 1 | 2 | 3 + * 10..8 | 00 | 01 | 10 | 11 + * -------+-------+-------+-------+------- + * 0 000 | PADDW | PSUBW | PCGTW | PMAXW + * 1 001 | PADDH | PSUBH | PCGTH | PMAXH + * 2 010 | PADDB | PSUBB | PCGTB | * + * 3 011 | * | * | * | * + * 4 100 | PADDSW| PSUBSW| PEXTLW| PPACW + * 5 101 | PADDSH| PSUBSH| PEXTLH| PPACH + * 6 110 | PADDSB| PSUBSB| PEXTLB| PPACB + * 7 111 | * | * | PEXT5 | PPAC5 + */ + +#define MASK_TX79_MMI0(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF)) +enum { + TX79_MMI0_PADDW = (0x00 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PSUBW = (0x01 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PCGTW = (0x02 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PMAXW = (0x03 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PADDH = (0x04 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PSUBH = (0x05 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PCGTH = (0x06 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PMAXH = (0x07 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PADDB = (0x08 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PSUBB = (0x09 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PCGTB = (0x0A << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PADDSW = (0x10 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PSUBSW = (0x11 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PEXTLW = (0x12 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PPACW = (0x13 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PADDSH = (0x14 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PSUBSH = (0x15 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PEXTLH = (0x16 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PPACH = (0x17 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PADDSB = (0x18 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PSUBSB = (0x19 << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PEXTLB = (0x1A << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PPACB = (0x1B << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PEXT5 = (0x1E << 6) | TX79_MMI_CLASS_MMI0, + TX79_MMI0_PPAC5 = (0x1F << 6) | TX79_MMI_CLASS_MMI0, +}; + +/* + * TX79 Multimedia Instructions with opcode field = MMI and bits 5..0 = MMI1: + * + * 31 26 10 6 5 0 + * +--------+----------------------+--------+--------+ + * | MMI | |function| MMI1 | + * +--------+----------------------+--------+--------+ + * + * function bits 7..6 + * bits | 0 | 1 | 2 | 3 + * 10..8 | 00 | 01 | 10 | 11 + * -------+-------+-------+-------+------- + * 0 000 | * | PABSW | PCEQW | PMINW + * 1 001 | PADSBH| PABSH | PCEQH | PMINH + * 2 010 | * | * | PCEQB | * + * 3 011 | * | * | * | * + * 4 100 | PADDUW| PSUBUW| PEXTUW| * + * 5 101 | PADDUH| PSUBUH| PEXTUH| * + * 6 110 | PADDUB| PSUBUB| PEXTUB| QFSRV + * 7 111 | * | * | * | * + */ + +#define MASK_TX79_MMI1(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF)) +enum { + TX79_MMI1_PABSW = (0x01 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PCEQW = (0x02 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PMINW = (0x03 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PADSBH = (0x04 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PABSH = (0x05 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PCEQH = (0x06 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PMINH = (0x07 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PCEQB = (0x0A << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PADDUW = (0x10 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PSUBUW = (0x11 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PEXTUW = (0x12 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PADDUH = (0x14 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PSUBUH = (0x15 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PEXTUH = (0x16 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PADDUB = (0x18 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PSUBUB = (0x19 << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_PEXTUB = (0x1A << 6) | TX79_MMI_CLASS_MMI1, + TX79_MMI1_QFSRV = (0x1B << 6) | TX79_MMI_CLASS_MMI1, +}; + +/* + * TX79 Multimedia Instructions with opcode field = MMI and bits 5..0 = MMI2: + * + * 31 26 10 6 5 0 + * +--------+----------------------+--------+--------+ + * | MMI | |function| MMI2 | + * +--------+----------------------+--------+--------+ + * + * function bits 7..6 + * bits | 0 | 1 | 2 | 3 + * 10..8 | 00 | 01 | 10 | 11 + * -------+-------+-------+-------+------- + * 0 000 | PMADDW| * | PSLLVW| PSRLVW + * 1 001 | PMSUBW| * | * | * + * 2 010 | PMFHI | PMFLO | PINTH | * + * 3 011 | PMULTW| PDIVW | PCPYLD| * + * 4 100 | PMADDH| PHMADH| PAND | PXOR + * 5 101 | PMSUBH| PHMSBH| * | * + * 6 110 | * | * | PEXEH | PREVH + * 7 111 | PMULTH| PDIVBW| PEXEW | PROT3W + */ + +#define MASK_TX79_MMI2(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF)) +enum { + TX79_MMI2_PMADDW = (0x00 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PSLLVW = (0x02 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PSRLVW = (0x03 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PMSUBW = (0x04 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PMFHI = (0x08 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PMFLO = (0x09 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PINTH = (0x0A << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PMULTW = (0x0C << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PDIVW = (0x0D << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PCPYLD = (0x0E << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PMADDH = (0x10 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PHMADH = (0x11 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PAND = (0x12 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PXOR = (0x13 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PMSUBH = (0x14 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PHMSBH = (0x15 << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PEXEH = (0x1A << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PREVH = (0x1B << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PMULTH = (0x1C << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PDIVBW = (0x1D << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PEXEW = (0x1E << 6) | TX79_MMI_CLASS_MMI2, + TX79_MMI2_PROT3W = (0x1F << 6) | TX79_MMI_CLASS_MMI2, +}; + +/* + * TX79 Multimedia Instructions with opcode field = MMI and bits 5..0 = MMI3: + * + * 31 26 10 6 5 0 + * +--------+----------------------+--------+--------+ + * | MMI | |function| MMI3 | + * +--------+----------------------+--------+--------+ + * + * function bits 7..6 + * bits | 0 | 1 | 2 | 3 + * 10..8 | 00 | 01 | 10 | 11 + * -------+-------+-------+-------+------- + * 0 000 |PMADDUW| * | * | PSRAVW + * 1 001 | * | * | * | * + * 2 010 | PMTHI | PMTLO | PINTEH| * + * 3 011 |PMULTUW| PDIVUW| PCPYUD| * + * 4 100 | * | * | POR | PNOR + * 5 101 | * | * | * | * + * 6 110 | * | * | PEXCH | PCPYH + * 7 111 | * | * | PEXCW | * + */ + +#define MASK_TX79_MMI3(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF)) +enum { + TX79_MMI3_PMADDUW = (0x00 << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PSRAVW = (0x03 << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PMTHI = (0x08 << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PMTLO = (0x09 << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PINTEH = (0x0A << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PMULTUW = (0x0C << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PDIVUW = (0x0D << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PCPYUD = (0x0E << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_POR = (0x12 << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PNOR = (0x13 << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PEXCH = (0x1A << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PCPYH = (0x1B << 6) | TX79_MMI_CLASS_MMI3, + TX79_MMI3_PEXCW = (0x1E << 6) | TX79_MMI_CLASS_MMI3, +}; + /* global register indices */ static TCGv cpu_gpr[32], cpu_PC; static TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC]; @@ -1447,8 +2428,9 @@ typedef struct DisasContext { target_ulong saved_pc; target_ulong page_start; uint32_t opcode; - int insn_flags; + uint64_t insn_flags; int32_t CP0_Config1; + int32_t CP0_Config2; int32_t CP0_Config3; int32_t CP0_Config5; /* Routine used to access memory */ @@ -1857,9 +2839,20 @@ static inline void check_dsp(DisasContext *ctx) } } -static inline void check_dspr2(DisasContext *ctx) +static inline void check_dsp_r2(DisasContext *ctx) { - if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSPR2))) { + if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSP_R2))) { + if (ctx->insn_flags & ASE_DSP) { + generate_exception_end(ctx, EXCP_DSPDIS); + } else { + generate_exception_end(ctx, EXCP_RI); + } + } +} + +static inline void check_dsp_r3(DisasContext *ctx) +{ + if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSP_R3))) { if (ctx->insn_flags & ASE_DSP) { generate_exception_end(ctx, EXCP_DSPDIS); } else { @@ -1870,7 +2863,7 @@ static inline void check_dspr2(DisasContext *ctx) /* This code generates a "reserved instruction" exception if the CPU does not support the instruction set corresponding to flags. */ -static inline void check_insn(DisasContext *ctx, int flags) +static inline void check_insn(DisasContext *ctx, uint64_t flags) { if (unlikely(!(ctx->insn_flags & flags))) { generate_exception_end(ctx, EXCP_RI); @@ -1880,13 +2873,28 @@ static inline void check_insn(DisasContext *ctx, int flags) /* This code generates a "reserved instruction" exception if the CPU has corresponding flag set which indicates that the instruction has been removed. */ -static inline void check_insn_opc_removed(DisasContext *ctx, int flags) +static inline void check_insn_opc_removed(DisasContext *ctx, uint64_t flags) { if (unlikely(ctx->insn_flags & flags)) { generate_exception_end(ctx, EXCP_RI); } } +/* + * The Linux kernel traps certain reserved instruction exceptions to + * emulate the corresponding instructions. QEMU is the kernel in user + * mode, so those traps are emulated by accepting the instructions. + * + * A reserved instruction exception is generated for flagged CPUs if + * QEMU runs in system mode. + */ +static inline void check_insn_opc_user_only(DisasContext *ctx, uint64_t flags) +{ +#ifndef CONFIG_USER_ONLY + check_insn_opc_removed(ctx, flags); +#endif +} + /* This code generates a "reserved instruction" exception if the CPU does not support 64-bit paired-single (PS) floating point data type */ static inline void check_ps(DisasContext *ctx) @@ -1927,6 +2935,19 @@ static inline void check_xnp(DisasContext *ctx) } } +#ifndef CONFIG_USER_ONLY +/* + * This code generates a "reserved instruction" exception if the + * Config3 PW bit is NOT set. + */ +static inline void check_pw(DisasContext *ctx) +{ + if (unlikely(!(ctx->CP0_Config3 & (1 << CP0C3_PW)))) { + generate_exception_end(ctx, EXCP_RI); + } +} +#endif + /* * This code generates a "reserved instruction" exception if the * Config3 MT bit is NOT set. @@ -3231,17 +4252,21 @@ static void gen_shift(DisasContext *ctx, uint32_t opc, /* Arithmetic on HI/LO registers */ static void gen_HILO(DisasContext *ctx, uint32_t opc, int acc, int reg) { - if (reg == 0 && (opc == OPC_MFHI || opc == OPC_MFLO)) { + if (reg == 0 && (opc == OPC_MFHI || opc == TX79_MMI_MFHI1 || + opc == OPC_MFLO || opc == TX79_MMI_MFLO1)) { /* Treat as NOP. */ return; } if (acc != 0) { - check_dsp(ctx); + if (!(ctx->insn_flags & INSN_R5900)) { + check_dsp(ctx); + } } switch (opc) { case OPC_MFHI: + case TX79_MMI_MFHI1: #if defined(TARGET_MIPS64) if (acc != 0) { tcg_gen_ext32s_tl(cpu_gpr[reg], cpu_HI[acc]); @@ -3252,6 +4277,7 @@ static void gen_HILO(DisasContext *ctx, uint32_t opc, int acc, int reg) } break; case OPC_MFLO: + case TX79_MMI_MFLO1: #if defined(TARGET_MIPS64) if (acc != 0) { tcg_gen_ext32s_tl(cpu_gpr[reg], cpu_LO[acc]); @@ -3262,6 +4288,7 @@ static void gen_HILO(DisasContext *ctx, uint32_t opc, int acc, int reg) } break; case OPC_MTHI: + case TX79_MMI_MTHI1: if (reg != 0) { #if defined(TARGET_MIPS64) if (acc != 0) { @@ -3276,6 +4303,7 @@ static void gen_HILO(DisasContext *ctx, uint32_t opc, int acc, int reg) } break; case OPC_MTLO: + case TX79_MMI_MTLO1: if (reg != 0) { #if defined(TARGET_MIPS64) if (acc != 0) { @@ -3588,11 +4616,14 @@ static void gen_muldiv(DisasContext *ctx, uint32_t opc, gen_load_gpr(t1, rt); if (acc != 0) { - check_dsp(ctx); + if (!(ctx->insn_flags & INSN_R5900)) { + check_dsp(ctx); + } } switch (opc) { case OPC_DIV: + case TX79_MMI_DIV1: { TCGv t2 = tcg_temp_new(); TCGv t3 = tcg_temp_new(); @@ -3614,6 +4645,7 @@ static void gen_muldiv(DisasContext *ctx, uint32_t opc, } break; case OPC_DIVU: + case TX79_MMI_DIVU1: { TCGv t2 = tcg_const_tl(0); TCGv t3 = tcg_const_tl(1); @@ -3768,6 +4800,84 @@ static void gen_muldiv(DisasContext *ctx, uint32_t opc, tcg_temp_free(t1); } +/* + * These MULT and MULTU instructions implemented in for example the + * Toshiba/Sony R5900 and the Toshiba TX19, TX39 and TX79 core + * architectures are special three-operand variants with the syntax + * + * MULT[U][1] rd, rs, rt + * + * such that + * + * (rd, LO, HI) <- rs * rt + * + * where the low-order 32-bits of the result is placed into both the + * GPR rd and the special register LO. The high-order 32-bits of the + * result is placed into the special register HI. + * + * If the GPR rd is omitted in assembly language, it is taken to be 0, + * which is the zero register that always reads as 0. + */ +static void gen_mul_txx9(DisasContext *ctx, uint32_t opc, + int rd, int rs, int rt) +{ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + int acc = 0; + + gen_load_gpr(t0, rs); + gen_load_gpr(t1, rt); + + switch (opc) { + case TX79_MMI_MULT1: + acc = 1; + /* Fall through */ + case OPC_MULT: + { + TCGv_i32 t2 = tcg_temp_new_i32(); + TCGv_i32 t3 = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(t2, t0); + tcg_gen_trunc_tl_i32(t3, t1); + tcg_gen_muls2_i32(t2, t3, t2, t3); + if (rd) { + tcg_gen_ext_i32_tl(cpu_gpr[rd], t2); + } + tcg_gen_ext_i32_tl(cpu_LO[acc], t2); + tcg_gen_ext_i32_tl(cpu_HI[acc], t3); + tcg_temp_free_i32(t2); + tcg_temp_free_i32(t3); + } + break; + case TX79_MMI_MULTU1: + acc = 1; + /* Fall through */ + case OPC_MULTU: + { + TCGv_i32 t2 = tcg_temp_new_i32(); + TCGv_i32 t3 = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(t2, t0); + tcg_gen_trunc_tl_i32(t3, t1); + tcg_gen_mulu2_i32(t2, t3, t2, t3); + if (rd) { + tcg_gen_ext_i32_tl(cpu_gpr[rd], t2); + } + tcg_gen_ext_i32_tl(cpu_LO[acc], t2); + tcg_gen_ext_i32_tl(cpu_HI[acc], t3); + tcg_temp_free_i32(t2); + tcg_temp_free_i32(t3); + } + break; + default: + MIPS_INVAL("mul TXx9"); + generate_exception_end(ctx, EXCP_RI); + goto out; + } + + out: + tcg_temp_free(t0); + tcg_temp_free(t1); +} + static void gen_mul_vr54xx (DisasContext *ctx, uint32_t opc, int rd, int rs, int rt) { @@ -5537,6 +6647,21 @@ static void gen_mfc0(DisasContext *ctx, TCGv arg, int reg, int sel) tcg_gen_ext32s_tl(arg, arg); rn = "SegCtl2"; break; + case 5: + check_pw(ctx); + gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWBase)); + rn = "PWBase"; + break; + case 6: + check_pw(ctx); + gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWField)); + rn = "PWField"; + break; + case 7: + check_pw(ctx); + gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWSize)); + rn = "PWSize"; + break; default: goto cp0_unimplemented; } @@ -5572,6 +6697,11 @@ static void gen_mfc0(DisasContext *ctx, TCGv arg, int reg, int sel) gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf4)); rn = "SRSConf4"; break; + case 6: + check_pw(ctx); + gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWCtl)); + rn = "PWCtl"; + break; default: goto cp0_unimplemented; } @@ -6238,6 +7368,21 @@ static void gen_mtc0(DisasContext *ctx, TCGv arg, int reg, int sel) gen_helper_mtc0_segctl2(cpu_env, arg); rn = "SegCtl2"; break; + case 5: + check_pw(ctx); + gen_mtc0_store32(arg, offsetof(CPUMIPSState, CP0_PWBase)); + rn = "PWBase"; + break; + case 6: + check_pw(ctx); + gen_helper_mtc0_pwfield(cpu_env, arg); + rn = "PWField"; + break; + case 7: + check_pw(ctx); + gen_helper_mtc0_pwsize(cpu_env, arg); + rn = "PWSize"; + break; default: goto cp0_unimplemented; } @@ -6273,6 +7418,11 @@ static void gen_mtc0(DisasContext *ctx, TCGv arg, int reg, int sel) gen_helper_mtc0_srsconf4(cpu_env, arg); rn = "SRSConf4"; break; + case 6: + check_pw(ctx); + gen_helper_mtc0_pwctl(cpu_env, arg); + rn = "PWCtl"; + break; default: goto cp0_unimplemented; } @@ -6948,6 +8098,21 @@ static void gen_dmfc0(DisasContext *ctx, TCGv arg, int reg, int sel) tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_SegCtl2)); rn = "SegCtl2"; break; + case 5: + check_pw(ctx); + tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWBase)); + rn = "PWBase"; + break; + case 6: + check_pw(ctx); + tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWField)); + rn = "PWField"; + break; + case 7: + check_pw(ctx); + tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWSize)); + rn = "PWSize"; + break; default: goto cp0_unimplemented; } @@ -6983,6 +8148,11 @@ static void gen_dmfc0(DisasContext *ctx, TCGv arg, int reg, int sel) gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf4)); rn = "SRSConf4"; break; + case 6: + check_pw(ctx); + gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWCtl)); + rn = "PWCtl"; + break; default: goto cp0_unimplemented; } @@ -7631,6 +8801,21 @@ static void gen_dmtc0(DisasContext *ctx, TCGv arg, int reg, int sel) gen_helper_mtc0_segctl2(cpu_env, arg); rn = "SegCtl2"; break; + case 5: + check_pw(ctx); + tcg_gen_st_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWBase)); + rn = "PWBase"; + break; + case 6: + check_pw(ctx); + gen_helper_mtc0_pwfield(cpu_env, arg); + rn = "PWField"; + break; + case 7: + check_pw(ctx); + gen_helper_mtc0_pwsize(cpu_env, arg); + rn = "PWSize"; + break; default: goto cp0_unimplemented; } @@ -7666,6 +8851,11 @@ static void gen_dmtc0(DisasContext *ctx, TCGv arg, int reg, int sel) gen_helper_mtc0_srsconf4(cpu_env, arg); rn = "SRSConf4"; break; + case 6: + check_pw(ctx); + gen_helper_mtc0_pwctl(cpu_env, arg); + rn = "PWCtl"; + break; default: goto cp0_unimplemented; } @@ -14999,15 +16189,15 @@ static void decode_micromips32_opc(CPUMIPSState *env, DisasContext *ctx) case 0x38: /* cmovs */ switch ((ctx->opcode >> 6) & 0x7) { - case MOVN_FMT: /* SELNEZ_FMT */ + case MOVN_FMT: /* SELEQZ_FMT */ if (ctx->insn_flags & ISA_MIPS32R6) { - /* SELNEZ_FMT */ + /* SELEQZ_FMT */ switch ((ctx->opcode >> 9) & 0x3) { case FMT_SDPS_S: - gen_sel_s(ctx, OPC_SELNEZ_S, rd, rt, rs); + gen_sel_s(ctx, OPC_SELEQZ_S, rd, rt, rs); break; case FMT_SDPS_D: - gen_sel_d(ctx, OPC_SELNEZ_D, rd, rt, rs); + gen_sel_d(ctx, OPC_SELEQZ_D, rd, rt, rs); break; default: goto pool32f_invalid; @@ -15021,15 +16211,15 @@ static void decode_micromips32_opc(CPUMIPSState *env, DisasContext *ctx) check_insn_opc_removed(ctx, ISA_MIPS32R6); FINSN_3ARG_SDPS(MOVN); break; - case MOVZ_FMT: /* SELEQZ_FMT */ + case MOVZ_FMT: /* SELNEZ_FMT */ if (ctx->insn_flags & ISA_MIPS32R6) { - /* SELEQZ_FMT */ + /* SELNEZ_FMT */ switch ((ctx->opcode >> 9) & 0x3) { case FMT_SDPS_S: - gen_sel_s(ctx, OPC_SELEQZ_S, rd, rt, rs); + gen_sel_s(ctx, OPC_SELNEZ_S, rd, rt, rs); break; case FMT_SDPS_D: - gen_sel_d(ctx, OPC_SELEQZ_D, rd, rt, rs); + gen_sel_d(ctx, OPC_SELNEZ_D, rd, rt, rs); break; default: goto pool32f_invalid; @@ -16488,6 +17678,40 @@ enum { NM_P_SC = 0x0b, }; +/* P.LS.E0 instruction pool */ +enum { + NM_LBE = 0x00, + NM_SBE = 0x01, + NM_LBUE = 0x02, + NM_P_PREFE = 0x03, + NM_LHE = 0x04, + NM_SHE = 0x05, + NM_LHUE = 0x06, + NM_CACHEE = 0x07, + NM_LWE = 0x08, + NM_SWE = 0x09, + NM_P_LLE = 0x0a, + NM_P_SCE = 0x0b, +}; + +/* P.PREFE instruction pool */ +enum { + NM_SYNCIE = 0x00, + NM_PREFE = 0x01, +}; + +/* P.LLE instruction pool */ +enum { + NM_LLE = 0x00, + NM_LLWPE = 0x01, +}; + +/* P.SCE instruction pool */ +enum { + NM_SCE = 0x00, + NM_SCWPE = 0x01, +}; + /* P.LS.WM instruction pool */ enum { NM_LWM = 0x00, @@ -17444,7 +18668,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, case NM_POOL32AXF_2_0_7: switch (extract32(ctx->opcode, 9, 3)) { case NM_DPA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpa_w_ph(t0, v1, v0, cpu_env); break; case NM_DPAQ_S_W_PH: @@ -17452,7 +18676,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, gen_helper_dpaq_s_w_ph(t0, v1, v0, cpu_env); break; case NM_DPS_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dps_w_ph(t0, v1, v0, cpu_env); break; case NM_DPSQ_S_W_PH: @@ -17467,7 +18691,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, case NM_POOL32AXF_2_8_15: switch (extract32(ctx->opcode, 9, 3)) { case NM_DPAX_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpax_w_ph(t0, v0, v1, cpu_env); break; case NM_DPAQ_SA_L_W: @@ -17475,7 +18699,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, gen_helper_dpaq_sa_l_w(t0, v0, v1, cpu_env); break; case NM_DPSX_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpsx_w_ph(t0, v0, v1, cpu_env); break; case NM_DPSQ_SA_L_W: @@ -17494,7 +18718,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, gen_helper_dpau_h_qbl(t0, v0, v1, cpu_env); break; case NM_DPAQX_S_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpaqx_s_w_ph(t0, v0, v1, cpu_env); break; case NM_DPSU_H_QBL: @@ -17502,11 +18726,11 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, gen_helper_dpsu_h_qbl(t0, v0, v1, cpu_env); break; case NM_DPSQX_S_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpsqx_s_w_ph(t0, v0, v1, cpu_env); break; case NM_MULSA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_mulsa_w_ph(t0, v0, v1, cpu_env); break; default: @@ -17521,7 +18745,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, gen_helper_dpau_h_qbr(t0, v1, v0, cpu_env); break; case NM_DPAQX_SA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpaqx_sa_w_ph(t0, v1, v0, cpu_env); break; case NM_DPSU_H_QBR: @@ -17529,7 +18753,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, gen_helper_dpsu_h_qbr(t0, v1, v0, cpu_env); break; case NM_DPSQX_SA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpsqx_sa_w_ph(t0, v1, v0, cpu_env); break; case NM_MULSAQ_S_W_PH: @@ -17571,7 +18795,7 @@ static void gen_pool32axf_2_nanomips_insn(DisasContext *ctx, uint32_t opc, gen_pool32axf_2_multiply(ctx, opc, v0_t, v1_t, rd); break; case NM_BALIGN: - check_dspr2(ctx); + check_dsp_r2(ctx); if (rt != 0) { gen_load_gpr(t0, rs); rd &= 3; @@ -17801,7 +19025,7 @@ static void gen_pool32axf_4_nanomips_insn(DisasContext *ctx, uint32_t opc, switch (opc) { case NM_ABSQ_S_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_absq_s_qb(v0_t, v0_t, cpu_env); gen_store_gpr(v0_t, ret); break; @@ -17940,7 +19164,7 @@ static void gen_pool32axf_7_nanomips_insn(DisasContext *ctx, uint32_t opc, switch (opc) { case NM_SHRA_R_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); tcg_gen_movi_tl(t0, rd >> 2); switch (extract32(ctx->opcode, 12, 1)) { case 0: @@ -17956,7 +19180,7 @@ static void gen_pool32axf_7_nanomips_insn(DisasContext *ctx, uint32_t opc, } break; case NM_SHRL_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); tcg_gen_movi_tl(t0, rd >> 1); gen_helper_shrl_ph(t0, t0, rs_t); gen_store_gpr(t0, rt); @@ -18881,19 +20105,19 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, gen_store_gpr(v1_t, ret); break; case NM_CMPGDU_EQ_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgu_eq_qb(v1_t, v1_t, v2_t); tcg_gen_deposit_tl(cpu_dspctrl, cpu_dspctrl, v1_t, 24, 4); gen_store_gpr(v1_t, ret); break; case NM_CMPGDU_LT_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgu_lt_qb(v1_t, v1_t, v2_t); tcg_gen_deposit_tl(cpu_dspctrl, cpu_dspctrl, v1_t, 24, 4); gen_store_gpr(v1_t, ret); break; case NM_CMPGDU_LE_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgu_le_qb(v1_t, v1_t, v2_t); tcg_gen_deposit_tl(cpu_dspctrl, cpu_dspctrl, v1_t, 24, 4); gen_store_gpr(v1_t, ret); @@ -18949,7 +20173,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_ADDQH_R_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* ADDQH_PH */ @@ -18964,7 +20188,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_ADDQH_R_W: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* ADDQH_W */ @@ -18994,7 +20218,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_ADDU_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* ADDU_PH */ @@ -19009,7 +20233,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_ADDUH_R_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* ADDUH_QB */ @@ -19039,7 +20263,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_SHRAV_R_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* SHRAV_QB */ @@ -19069,7 +20293,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_SUBQH_R_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* SUBQH_PH */ @@ -19084,7 +20308,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_SUBQH_R_W: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* SUBQH_W */ @@ -19114,7 +20338,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_SUBU_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* SUBU_PH */ @@ -19129,7 +20353,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_SUBUH_R_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* SUBUH_QB */ @@ -19159,7 +20383,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_PRECR_SRA_R_PH_W: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* PRECR_SRA_PH_W */ @@ -19199,22 +20423,22 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, gen_store_gpr(v1_t, ret); break; case NM_MULQ_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_mulq_s_ph(v1_t, v1_t, v2_t, cpu_env); gen_store_gpr(v1_t, ret); break; case NM_MULQ_RS_W: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_mulq_rs_w(v1_t, v1_t, v2_t, cpu_env); gen_store_gpr(v1_t, ret); break; case NM_MULQ_S_W: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_mulq_s_w(v1_t, v1_t, v2_t, cpu_env); gen_store_gpr(v1_t, ret); break; case NM_APPEND: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_load_gpr(t0, rs); if (rd != 0) { tcg_gen_deposit_tl(cpu_gpr[rt], t0, cpu_gpr[rt], rd, 32 - rd); @@ -19232,7 +20456,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, gen_store_gpr(v1_t, ret); break; case NM_SHRLV_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shrl_ph(v1_t, v1_t, v2_t); gen_store_gpr(v1_t, ret); break; @@ -19274,7 +20498,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, gen_store_gpr(v1_t, ret); break; case NM_MUL_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* MUL_PH */ @@ -19289,7 +20513,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_PRECR_QB_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_precr_qb_ph(v1_t, v1_t, v2_t); gen_store_gpr(v1_t, ret); break; @@ -19326,8 +20550,8 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, case 0: /* SHRA_PH */ gen_helper_shra_ph(v1_t, t0, v1_t); - break; gen_store_gpr(v1_t, rt); + break; case 1: /* SHRA_R_PH */ gen_helper_shra_r_ph(v1_t, t0, v1_t); @@ -20098,7 +21322,7 @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx) gen_compute_branch_cp1_nm(ctx, OPC_BC1NEZ, rt, s); break; case NM_BPOSGE32C: - check_dspr2(ctx); + check_dsp_r3(ctx); { int32_t imm = extract32(ctx->opcode, 1, 13) | extract32(ctx->opcode, 0, 1) << 13; @@ -20607,7 +21831,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, switch (op1) { /* OPC_MULT_G_2E is equal OPC_ADDUH_QB_DSP */ case OPC_MULT_G_2E: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (op2) { case OPC_ADDUH_QB: gen_helper_adduh_qb(cpu_gpr[ret], v1_t, v2_t); @@ -20650,7 +21874,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, case OPC_ABSQ_S_PH_DSP: switch (op2) { case OPC_ABSQ_S_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_absq_s_qb(cpu_gpr[ret], v2_t, cpu_env); break; case OPC_ABSQ_S_PH: @@ -20729,11 +21953,11 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_addu_s_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_ADDU_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_addu_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_ADDU_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_addu_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_SUBQ_PH: @@ -20757,11 +21981,11 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_subu_s_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_SUBU_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_subu_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_SUBU_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_subu_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_ADDSC: @@ -20785,7 +22009,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, case OPC_CMPU_EQ_QB_DSP: switch (op2) { case OPC_PRECR_QB_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_precr_qb_ph(cpu_gpr[ret], v1_t, v2_t); break; case OPC_PRECRQ_QB_PH: @@ -20793,7 +22017,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_precrq_qb_ph(cpu_gpr[ret], v1_t, v2_t); break; case OPC_PRECR_SRA_PH_W: - check_dspr2(ctx); + check_dsp_r2(ctx); { TCGv_i32 sa_t = tcg_const_i32(v2); gen_helper_precr_sra_ph_w(cpu_gpr[ret], sa_t, v1_t, @@ -20802,7 +22026,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, break; } case OPC_PRECR_SRA_R_PH_W: - check_dspr2(ctx); + check_dsp_r2(ctx); { TCGv_i32 sa_t = tcg_const_i32(v2); gen_helper_precr_sra_r_ph_w(cpu_gpr[ret], sa_t, v1_t, @@ -20884,7 +22108,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_preceu_qh_obra(cpu_gpr[ret], v2_t); break; case OPC_ABSQ_S_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_absq_s_ob(cpu_gpr[ret], v2_t, cpu_env); break; case OPC_ABSQ_S_PW: @@ -20928,19 +22152,19 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_subu_s_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_SUBU_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_subu_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_SUBU_S_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_subu_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_SUBUH_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_subuh_ob(cpu_gpr[ret], v1_t, v2_t); break; case OPC_SUBUH_R_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_subuh_r_ob(cpu_gpr[ret], v1_t, v2_t); break; case OPC_ADDQ_PW: @@ -20968,19 +22192,19 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_addu_s_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_ADDU_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_addu_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_ADDU_S_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_addu_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_ADDUH_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_adduh_ob(cpu_gpr[ret], v1_t, v2_t); break; case OPC_ADDUH_R_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_adduh_r_ob(cpu_gpr[ret], v1_t, v2_t); break; } @@ -20988,11 +22212,11 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, case OPC_CMPU_EQ_OB_DSP: switch (op2) { case OPC_PRECR_OB_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_precr_ob_qh(cpu_gpr[ret], v1_t, v2_t); break; case OPC_PRECR_SRA_QH_PW: - check_dspr2(ctx); + check_dsp_r2(ctx); { TCGv_i32 ret_t = tcg_const_i32(ret); gen_helper_precr_sra_qh_pw(v2_t, v1_t, v2_t, ret_t); @@ -21000,7 +22224,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, break; } case OPC_PRECR_SRA_R_QH_PW: - check_dspr2(ctx); + check_dsp_r2(ctx); { TCGv_i32 sa_v = tcg_const_i32(ret); gen_helper_precr_sra_r_qh_pw(v2_t, v1_t, v2_t, sa_v); @@ -21103,27 +22327,27 @@ static void gen_mipsdsp_shift(DisasContext *ctx, uint32_t opc, gen_helper_shrl_qb(cpu_gpr[ret], v1_t, v2_t); break; case OPC_SHRL_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shrl_ph(cpu_gpr[ret], t0, v2_t); break; case OPC_SHRLV_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shrl_ph(cpu_gpr[ret], v1_t, v2_t); break; case OPC_SHRA_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_qb(cpu_gpr[ret], t0, v2_t); break; case OPC_SHRA_R_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_r_qb(cpu_gpr[ret], t0, v2_t); break; case OPC_SHRAV_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_qb(cpu_gpr[ret], v1_t, v2_t); break; case OPC_SHRAV_R_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_r_qb(cpu_gpr[ret], v1_t, v2_t); break; case OPC_SHRA_PH: @@ -21202,19 +22426,19 @@ static void gen_mipsdsp_shift(DisasContext *ctx, uint32_t opc, gen_helper_shll_s_qh(cpu_gpr[ret], v2_t, v1_t, cpu_env); break; case OPC_SHRA_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_ob(cpu_gpr[ret], v2_t, t0); break; case OPC_SHRAV_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_ob(cpu_gpr[ret], v2_t, v1_t); break; case OPC_SHRA_R_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_r_ob(cpu_gpr[ret], v2_t, t0); break; case OPC_SHRAV_R_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_r_ob(cpu_gpr[ret], v2_t, v1_t); break; case OPC_SHRA_PW: @@ -21258,11 +22482,11 @@ static void gen_mipsdsp_shift(DisasContext *ctx, uint32_t opc, gen_helper_shrl_ob(cpu_gpr[ret], v2_t, v1_t); break; case OPC_SHRL_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shrl_qh(cpu_gpr[ret], v2_t, t0); break; case OPC_SHRLV_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shrl_qh(cpu_gpr[ret], v2_t, v1_t); break; default: /* Invalid */ @@ -21303,7 +22527,7 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, /* OPC_MULT_G_2E, OPC_ADDUH_QB_DSP, OPC_MUL_PH_DSP have * the same mask and op1. */ case OPC_MULT_G_2E: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (op2) { case OPC_MUL_PH: gen_helper_mul_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env); @@ -21338,11 +22562,11 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_dpsu_h_qbr(t0, v1_t, v2_t, cpu_env); break; case OPC_DPA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpa_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPAX_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpax_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPAQ_S_W_PH: @@ -21350,19 +22574,19 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_dpaq_s_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPAQX_S_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpaqx_s_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPAQX_SA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpaqx_sa_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPS_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dps_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPSX_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpsx_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPSQ_S_W_PH: @@ -21370,11 +22594,11 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_dpsq_s_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPSQX_S_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpsqx_s_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPSQX_SA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpsqx_sa_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_MULSAQ_S_W_PH: @@ -21406,7 +22630,7 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_maq_sa_w_phr(t0, v1_t, v2_t, cpu_env); break; case OPC_MULSA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_mulsa_w_ph(t0, v1_t, v2_t, cpu_env); break; } @@ -21435,7 +22659,7 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_dmsubu(v1_t, v2_t, t0, cpu_env); break; case OPC_DPA_W_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpa_w_qh(v1_t, v2_t, t0, cpu_env); break; case OPC_DPAQ_S_W_QH: @@ -21455,7 +22679,7 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_dpau_h_obr(v1_t, v2_t, t0, cpu_env); break; case OPC_DPS_W_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dps_w_qh(v1_t, v2_t, t0, cpu_env); break; case OPC_DPSQ_S_W_QH: @@ -21549,7 +22773,7 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_muleq_s_w_phr(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_MULQ_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_mulq_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; } @@ -21773,7 +22997,7 @@ static void gen_mipsdsp_add_cmp_pick(DisasContext *ctx, gen_helper_cmpgu_le_qb(cpu_gpr[ret], v1_t, v2_t); break; case OPC_CMPGDU_EQ_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgu_eq_qb(t1, v1_t, v2_t); tcg_gen_mov_tl(cpu_gpr[ret], t1); tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF); @@ -21781,7 +23005,7 @@ static void gen_mipsdsp_add_cmp_pick(DisasContext *ctx, tcg_gen_or_tl(cpu_dspctrl, cpu_dspctrl, t1); break; case OPC_CMPGDU_LT_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgu_lt_qb(t1, v1_t, v2_t); tcg_gen_mov_tl(cpu_gpr[ret], t1); tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF); @@ -21789,7 +23013,7 @@ static void gen_mipsdsp_add_cmp_pick(DisasContext *ctx, tcg_gen_or_tl(cpu_dspctrl, cpu_dspctrl, t1); break; case OPC_CMPGDU_LE_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgu_le_qb(t1, v1_t, v2_t); tcg_gen_mov_tl(cpu_gpr[ret], t1); tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF); @@ -21850,15 +23074,15 @@ static void gen_mipsdsp_add_cmp_pick(DisasContext *ctx, gen_helper_cmp_le_qh(v1_t, v2_t, cpu_env); break; case OPC_CMPGDU_EQ_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgdu_eq_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_CMPGDU_LT_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgdu_lt_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_CMPGDU_LE_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgdu_le_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_CMPGU_EQ_OB: @@ -21916,7 +23140,7 @@ static void gen_mipsdsp_append(CPUMIPSState *env, DisasContext *ctx, { TCGv t0; - check_dspr2(ctx); + check_dsp_r2(ctx); if (rt == 0) { /* Treat as NOP. */ @@ -22351,7 +23575,7 @@ static void decode_opc_special_legacy(CPUMIPSState *env, DisasContext *ctx) case OPC_MOVN: /* Conditional move */ case OPC_MOVZ: check_insn(ctx, ISA_MIPS4 | ISA_MIPS32 | - INSN_LOONGSON2E | INSN_LOONGSON2F); + INSN_LOONGSON2E | INSN_LOONGSON2F | INSN_R5900); gen_cond_move(ctx, op1, rd, rs, rt); break; case OPC_MFHI: /* Move from HI/LO */ @@ -22378,6 +23602,8 @@ static void decode_opc_special_legacy(CPUMIPSState *env, DisasContext *ctx) check_insn(ctx, INSN_VR54XX); op1 = MASK_MUL_VR54XX(ctx->opcode); gen_mul_vr54xx(ctx, op1, rd, rs, rt); + } else if (ctx->insn_flags & INSN_R5900) { + gen_mul_txx9(ctx, op1, rd, rs, rt); } else { gen_muldiv(ctx, op1, rd & 3, rs, rt); } @@ -22392,6 +23618,7 @@ static void decode_opc_special_legacy(CPUMIPSState *env, DisasContext *ctx) case OPC_DDIV: case OPC_DDIVU: check_insn(ctx, ISA_MIPS3); + check_insn_opc_user_only(ctx, INSN_R5900); check_mips_64(ctx); gen_muldiv(ctx, op1, 0, rs, rt); break; @@ -22738,7 +23965,9 @@ static void decode_opc_special3_r6(CPUMIPSState *env, DisasContext *ctx) op2 = MASK_BSHFL(ctx->opcode); switch (op2) { case OPC_ALIGN: - case OPC_ALIGN_END: + case OPC_ALIGN_1: + case OPC_ALIGN_2: + case OPC_ALIGN_3: gen_align(ctx, 32, rd, rs, rt, sa & 3); break; case OPC_BITSWAP: @@ -22764,7 +23993,13 @@ static void decode_opc_special3_r6(CPUMIPSState *env, DisasContext *ctx) op2 = MASK_DBSHFL(ctx->opcode); switch (op2) { case OPC_DALIGN: - case OPC_DALIGN_END: + case OPC_DALIGN_1: + case OPC_DALIGN_2: + case OPC_DALIGN_3: + case OPC_DALIGN_4: + case OPC_DALIGN_5: + case OPC_DALIGN_6: + case OPC_DALIGN_7: gen_align(ctx, 64, rd, rs, rt, sa & 7); break; case OPC_DBITSWAP: @@ -22801,7 +24036,7 @@ static void decode_opc_special3_legacy(CPUMIPSState *env, DisasContext *ctx) case OPC_MULTU_G_2E: /* OPC_MULT_G_2E, OPC_ADDUH_QB_DSP, OPC_MUL_PH_DSP have * the same mask and op1. */ - if ((ctx->insn_flags & ASE_DSPR2) && (op1 == OPC_MULT_G_2E)) { + if ((ctx->insn_flags & ASE_DSP_R2) && (op1 == OPC_MULT_G_2E)) { op2 = MASK_ADDUH_QB(ctx->opcode); switch (op2) { case OPC_ADDUH_QB: @@ -23308,6 +24543,250 @@ static void decode_opc_special3_legacy(CPUMIPSState *env, DisasContext *ctx) } } +static void decode_tx79_mmi0(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opc = MASK_TX79_MMI0(ctx->opcode); + + switch (opc) { + case TX79_MMI0_PADDW: /* TODO: TX79_MMI0_PADDW */ + case TX79_MMI0_PSUBW: /* TODO: TX79_MMI0_PSUBW */ + case TX79_MMI0_PCGTW: /* TODO: TX79_MMI0_PCGTW */ + case TX79_MMI0_PMAXW: /* TODO: TX79_MMI0_PMAXW */ + case TX79_MMI0_PADDH: /* TODO: TX79_MMI0_PADDH */ + case TX79_MMI0_PSUBH: /* TODO: TX79_MMI0_PSUBH */ + case TX79_MMI0_PCGTH: /* TODO: TX79_MMI0_PCGTH */ + case TX79_MMI0_PMAXH: /* TODO: TX79_MMI0_PMAXH */ + case TX79_MMI0_PADDB: /* TODO: TX79_MMI0_PADDB */ + case TX79_MMI0_PSUBB: /* TODO: TX79_MMI0_PSUBB */ + case TX79_MMI0_PCGTB: /* TODO: TX79_MMI0_PCGTB */ + case TX79_MMI0_PADDSW: /* TODO: TX79_MMI0_PADDSW */ + case TX79_MMI0_PSUBSW: /* TODO: TX79_MMI0_PSUBSW */ + case TX79_MMI0_PEXTLW: /* TODO: TX79_MMI0_PEXTLW */ + case TX79_MMI0_PPACW: /* TODO: TX79_MMI0_PPACW */ + case TX79_MMI0_PADDSH: /* TODO: TX79_MMI0_PADDSH */ + case TX79_MMI0_PSUBSH: /* TODO: TX79_MMI0_PSUBSH */ + case TX79_MMI0_PEXTLH: /* TODO: TX79_MMI0_PEXTLH */ + case TX79_MMI0_PPACH: /* TODO: TX79_MMI0_PPACH */ + case TX79_MMI0_PADDSB: /* TODO: TX79_MMI0_PADDSB */ + case TX79_MMI0_PSUBSB: /* TODO: TX79_MMI0_PSUBSB */ + case TX79_MMI0_PEXTLB: /* TODO: TX79_MMI0_PEXTLB */ + case TX79_MMI0_PPACB: /* TODO: TX79_MMI0_PPACB */ + case TX79_MMI0_PEXT5: /* TODO: TX79_MMI0_PEXT5 */ + case TX79_MMI0_PPAC5: /* TODO: TX79_MMI0_PPAC5 */ + generate_exception_end(ctx, EXCP_RI); /* TODO: TX79_MMI_CLASS_MMI0 */ + break; + default: + MIPS_INVAL("TX79 MMI class MMI0"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +static void decode_tx79_mmi1(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opc = MASK_TX79_MMI1(ctx->opcode); + + switch (opc) { + case TX79_MMI1_PABSW: /* TODO: TX79_MMI1_PABSW */ + case TX79_MMI1_PCEQW: /* TODO: TX79_MMI1_PCEQW */ + case TX79_MMI1_PMINW: /* TODO: TX79_MMI1_PMINW */ + case TX79_MMI1_PADSBH: /* TODO: TX79_MMI1_PADSBH */ + case TX79_MMI1_PABSH: /* TODO: TX79_MMI1_PABSH */ + case TX79_MMI1_PCEQH: /* TODO: TX79_MMI1_PCEQH */ + case TX79_MMI1_PMINH: /* TODO: TX79_MMI1_PMINH */ + case TX79_MMI1_PCEQB: /* TODO: TX79_MMI1_PCEQB */ + case TX79_MMI1_PADDUW: /* TODO: TX79_MMI1_PADDUW */ + case TX79_MMI1_PSUBUW: /* TODO: TX79_MMI1_PSUBUW */ + case TX79_MMI1_PEXTUW: /* TODO: TX79_MMI1_PEXTUW */ + case TX79_MMI1_PADDUH: /* TODO: TX79_MMI1_PADDUH */ + case TX79_MMI1_PSUBUH: /* TODO: TX79_MMI1_PSUBUH */ + case TX79_MMI1_PEXTUH: /* TODO: TX79_MMI1_PEXTUH */ + case TX79_MMI1_PADDUB: /* TODO: TX79_MMI1_PADDUB */ + case TX79_MMI1_PSUBUB: /* TODO: TX79_MMI1_PSUBUB */ + case TX79_MMI1_PEXTUB: /* TODO: TX79_MMI1_PEXTUB */ + case TX79_MMI1_QFSRV: /* TODO: TX79_MMI1_QFSRV */ + generate_exception_end(ctx, EXCP_RI); /* TODO: TX79_MMI_CLASS_MMI1 */ + break; + default: + MIPS_INVAL("TX79 MMI class MMI1"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +static void decode_tx79_mmi2(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opc = MASK_TX79_MMI2(ctx->opcode); + + switch (opc) { + case TX79_MMI2_PMADDW: /* TODO: TX79_MMI2_PMADDW */ + case TX79_MMI2_PSLLVW: /* TODO: TX79_MMI2_PSLLVW */ + case TX79_MMI2_PSRLVW: /* TODO: TX79_MMI2_PSRLVW */ + case TX79_MMI2_PMSUBW: /* TODO: TX79_MMI2_PMSUBW */ + case TX79_MMI2_PMFHI: /* TODO: TX79_MMI2_PMFHI */ + case TX79_MMI2_PMFLO: /* TODO: TX79_MMI2_PMFLO */ + case TX79_MMI2_PINTH: /* TODO: TX79_MMI2_PINTH */ + case TX79_MMI2_PMULTW: /* TODO: TX79_MMI2_PMULTW */ + case TX79_MMI2_PDIVW: /* TODO: TX79_MMI2_PDIVW */ + case TX79_MMI2_PCPYLD: /* TODO: TX79_MMI2_PCPYLD */ + case TX79_MMI2_PMADDH: /* TODO: TX79_MMI2_PMADDH */ + case TX79_MMI2_PHMADH: /* TODO: TX79_MMI2_PHMADH */ + case TX79_MMI2_PAND: /* TODO: TX79_MMI2_PAND */ + case TX79_MMI2_PXOR: /* TODO: TX79_MMI2_PXOR */ + case TX79_MMI2_PMSUBH: /* TODO: TX79_MMI2_PMSUBH */ + case TX79_MMI2_PHMSBH: /* TODO: TX79_MMI2_PHMSBH */ + case TX79_MMI2_PEXEH: /* TODO: TX79_MMI2_PEXEH */ + case TX79_MMI2_PREVH: /* TODO: TX79_MMI2_PREVH */ + case TX79_MMI2_PMULTH: /* TODO: TX79_MMI2_PMULTH */ + case TX79_MMI2_PDIVBW: /* TODO: TX79_MMI2_PDIVBW */ + case TX79_MMI2_PEXEW: /* TODO: TX79_MMI2_PEXEW */ + case TX79_MMI2_PROT3W: /* TODO: TX79_MMI2_PROT3W */ + generate_exception_end(ctx, EXCP_RI); /* TODO: TX79_MMI_CLASS_MMI2 */ + break; + default: + MIPS_INVAL("TX79 MMI class MMI2"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +static void decode_tx79_mmi3(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opc = MASK_TX79_MMI3(ctx->opcode); + + switch (opc) { + case TX79_MMI3_PMADDUW: /* TODO: TX79_MMI3_PMADDUW */ + case TX79_MMI3_PSRAVW: /* TODO: TX79_MMI3_PSRAVW */ + case TX79_MMI3_PMTHI: /* TODO: TX79_MMI3_PMTHI */ + case TX79_MMI3_PMTLO: /* TODO: TX79_MMI3_PMTLO */ + case TX79_MMI3_PINTEH: /* TODO: TX79_MMI3_PINTEH */ + case TX79_MMI3_PMULTUW: /* TODO: TX79_MMI3_PMULTUW */ + case TX79_MMI3_PDIVUW: /* TODO: TX79_MMI3_PDIVUW */ + case TX79_MMI3_PCPYUD: /* TODO: TX79_MMI3_PCPYUD */ + case TX79_MMI3_POR: /* TODO: TX79_MMI3_POR */ + case TX79_MMI3_PNOR: /* TODO: TX79_MMI3_PNOR */ + case TX79_MMI3_PEXCH: /* TODO: TX79_MMI3_PEXCH */ + case TX79_MMI3_PCPYH: /* TODO: TX79_MMI3_PCPYH */ + case TX79_MMI3_PEXCW: /* TODO: TX79_MMI3_PEXCW */ + generate_exception_end(ctx, EXCP_RI); /* TODO: TX79_MMI_CLASS_MMI3 */ + break; + default: + MIPS_INVAL("TX79 MMI class MMI3"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +static void decode_tx79_mmi(CPUMIPSState *env, DisasContext *ctx) +{ + uint32_t opc = MASK_TX79_MMI(ctx->opcode); + int rs = extract32(ctx->opcode, 21, 5); + int rt = extract32(ctx->opcode, 16, 5); + int rd = extract32(ctx->opcode, 11, 5); + + switch (opc) { + case TX79_MMI_CLASS_MMI0: + decode_tx79_mmi0(env, ctx); + break; + case TX79_MMI_CLASS_MMI1: + decode_tx79_mmi1(env, ctx); + break; + case TX79_MMI_CLASS_MMI2: + decode_tx79_mmi2(env, ctx); + break; + case TX79_MMI_CLASS_MMI3: + decode_tx79_mmi3(env, ctx); + break; + case TX79_MMI_MULT1: + case TX79_MMI_MULTU1: + gen_mul_txx9(ctx, opc, rd, rs, rt); + break; + case TX79_MMI_DIV1: + case TX79_MMI_DIVU1: + gen_muldiv(ctx, opc, 1, rs, rt); + break; + case TX79_MMI_MTLO1: + case TX79_MMI_MTHI1: + gen_HILO(ctx, opc, 1, rs); + break; + case TX79_MMI_MFLO1: + case TX79_MMI_MFHI1: + gen_HILO(ctx, opc, 1, rd); + break; + case TX79_MMI_MADD: /* TODO: TX79_MMI_MADD */ + case TX79_MMI_MADDU: /* TODO: TX79_MMI_MADDU */ + case TX79_MMI_PLZCW: /* TODO: TX79_MMI_PLZCW */ + case TX79_MMI_MADD1: /* TODO: TX79_MMI_MADD1 */ + case TX79_MMI_MADDU1: /* TODO: TX79_MMI_MADDU1 */ + case TX79_MMI_PMFHL: /* TODO: TX79_MMI_PMFHL */ + case TX79_MMI_PMTHL: /* TODO: TX79_MMI_PMTHL */ + case TX79_MMI_PSLLH: /* TODO: TX79_MMI_PSLLH */ + case TX79_MMI_PSRLH: /* TODO: TX79_MMI_PSRLH */ + case TX79_MMI_PSRAH: /* TODO: TX79_MMI_PSRAH */ + case TX79_MMI_PSLLW: /* TODO: TX79_MMI_PSLLW */ + case TX79_MMI_PSRLW: /* TODO: TX79_MMI_PSRLW */ + case TX79_MMI_PSRAW: /* TODO: TX79_MMI_PSRAW */ + generate_exception_end(ctx, EXCP_RI); /* TODO: TX79_CLASS_MMI */ + break; + default: + MIPS_INVAL("TX79 MMI class"); + generate_exception_end(ctx, EXCP_RI); + break; + } +} + +static void decode_tx79_lq(CPUMIPSState *env, DisasContext *ctx) +{ + generate_exception_end(ctx, EXCP_RI); /* TODO: TX79_LQ */ +} + +static void gen_tx79_sq(DisasContext *ctx, int base, int rt, int offset) +{ + generate_exception_end(ctx, EXCP_RI); /* TODO: TX79_SQ */ +} + +/* + * The TX79-specific instruction Store Quadword + * + * +--------+-------+-------+------------------------+ + * | 011111 | base | rt | offset | SQ + * +--------+-------+-------+------------------------+ + * 6 5 5 16 + * + * has the same opcode as the Read Hardware Register instruction + * + * +--------+-------+-------+-------+-------+--------+ + * | 011111 | 00000 | rt | rd | 00000 | 111011 | RDHWR + * +--------+-------+-------+-------+-------+--------+ + * 6 5 5 5 5 6 + * + * that is required, trapped and emulated by the Linux kernel. However, all + * RDHWR encodings yield address error exceptions on the TX79 since the SQ + * offset is odd. Therefore all valid SQ instructions can execute normally. + * In user mode, QEMU must verify the upper and lower 11 bits to distinguish + * between SQ and RDHWR, as the Linux kernel does. + */ +static void decode_tx79_sq(CPUMIPSState *env, DisasContext *ctx) +{ + int base = extract32(ctx->opcode, 21, 5); + int rt = extract32(ctx->opcode, 16, 5); + int offset = extract32(ctx->opcode, 0, 16); + +#ifdef CONFIG_USER_ONLY + uint32_t op1 = MASK_SPECIAL3(ctx->opcode); + uint32_t op2 = extract32(ctx->opcode, 6, 5); + + if (base == 0 && op2 == 0 && op1 == OPC_RDHWR) { + int rd = extract32(ctx->opcode, 11, 5); + + gen_rdhwr(ctx, rt, rd, 0); + return; + } +#endif + + gen_tx79_sq(ctx, base, rt, offset); +} + static void decode_opc_special3(CPUMIPSState *env, DisasContext *ctx) { int rs, rt, rd, sa; @@ -23380,7 +24859,9 @@ static void decode_opc_special3(CPUMIPSState *env, DisasContext *ctx) op2 = MASK_BSHFL(ctx->opcode); switch (op2) { case OPC_ALIGN: - case OPC_ALIGN_END: + case OPC_ALIGN_1: + case OPC_ALIGN_2: + case OPC_ALIGN_3: case OPC_BITSWAP: check_insn(ctx, ISA_MIPS32R6); decode_opc_special3_r6(env, ctx); @@ -23406,7 +24887,13 @@ static void decode_opc_special3(CPUMIPSState *env, DisasContext *ctx) op2 = MASK_DBSHFL(ctx->opcode); switch (op2) { case OPC_DALIGN: - case OPC_DALIGN_END: + case OPC_DALIGN_1: + case OPC_DALIGN_2: + case OPC_DALIGN_3: + case OPC_DALIGN_4: + case OPC_DALIGN_5: + case OPC_DALIGN_6: + case OPC_DALIGN_7: case OPC_DBITSWAP: check_insn(ctx, ISA_MIPS32R6); decode_opc_special3_r6(env, ctx); @@ -24605,10 +26092,18 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) decode_opc_special(env, ctx); break; case OPC_SPECIAL2: - decode_opc_special2_legacy(env, ctx); + if ((ctx->insn_flags & INSN_R5900) && (ctx->insn_flags & ASE_MMI)) { + decode_tx79_mmi(env, ctx); + } else { + decode_opc_special2_legacy(env, ctx); + } break; case OPC_SPECIAL3: - decode_opc_special3(env, ctx); + if (ctx->insn_flags & INSN_R5900) { + decode_tx79_sq(env, ctx); /* TX79_SQ */ + } else { + decode_opc_special3(env, ctx); + } break; case OPC_REGIMM: op1 = MASK_REGIMM(ctx->opcode); @@ -24895,6 +26390,7 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) break; case OPC_LL: /* Load and stores */ check_insn(ctx, ISA_MIPS2); + check_insn_opc_user_only(ctx, INSN_R5900); /* Fallthrough */ case OPC_LWL: case OPC_LWR: @@ -24920,6 +26416,7 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) case OPC_SC: check_insn(ctx, ISA_MIPS2); check_insn_opc_removed(ctx, ISA_MIPS32R6); + check_insn_opc_user_only(ctx, INSN_R5900); gen_st_cond(ctx, op, rt, rs, imm); break; case OPC_CACHE: @@ -24933,7 +26430,8 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) break; case OPC_PREF: check_insn_opc_removed(ctx, ISA_MIPS32R6); - check_insn(ctx, ISA_MIPS4 | ISA_MIPS32); + check_insn(ctx, ISA_MIPS4 | ISA_MIPS32 | + INSN_R5900); /* Treat as NOP. */ break; @@ -25185,9 +26683,11 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) #if defined(TARGET_MIPS64) /* MIPS64 opcodes */ + case OPC_LLD: + check_insn_opc_user_only(ctx, INSN_R5900); + /* fall through */ case OPC_LDL: case OPC_LDR: - case OPC_LLD: check_insn_opc_removed(ctx, ISA_MIPS32R6); /* fall through */ case OPC_LWU: @@ -25208,6 +26708,7 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) case OPC_SCD: check_insn_opc_removed(ctx, ISA_MIPS32R6); check_insn(ctx, ISA_MIPS3); + check_insn_opc_user_only(ctx, INSN_R5900); check_mips_64(ctx); gen_st_cond(ctx, op, rt, rs, imm); break; @@ -25262,8 +26763,12 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) } break; case OPC_MSA: /* OPC_MDMX */ - /* MDMX: Not implemented. */ - gen_msa(env, ctx); + if (ctx->insn_flags & INSN_R5900) { + decode_tx79_lq(env, ctx); /* TX79_LQ */ + } else { + /* MDMX: Not implemented. */ + gen_msa(env, ctx); + } break; case OPC_PCREL: check_insn(ctx, ISA_MIPS32R6); @@ -25285,6 +26790,7 @@ static void mips_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->saved_pc = -1; ctx->insn_flags = env->insn_flags; ctx->CP0_Config1 = env->CP0_Config1; + ctx->CP0_Config2 = env->CP0_Config2; ctx->CP0_Config3 = env->CP0_Config3; ctx->CP0_Config5 = env->CP0_Config5; ctx->btarget = 0; @@ -25799,6 +27305,24 @@ void cpu_state_reset(CPUMIPSState *env) env->CP0_Status |= (1 << CP0St_FR); } + if (env->insn_flags & ISA_MIPS32R6) { + /* PTW = 1 */ + env->CP0_PWSize = 0x40; + /* GDI = 12 */ + /* UDI = 12 */ + /* MDI = 12 */ + /* PRI = 12 */ + /* PTEI = 2 */ + env->CP0_PWField = 0x0C30C302; + } else { + /* GDI = 0 */ + /* UDI = 0 */ + /* MDI = 0 */ + /* PRI = 0 */ + /* PTEI = 2 */ + env->CP0_PWField = 0x02; + } + if (env->CP0_Config3 & (1 << CP0C3_ISA) & (1 << (CP0C3_ISA + 1))) { /* microMIPS on reset when Config3.ISA is 3 */ env->hflags |= MIPS_HFLAG_M16; diff --git a/target/mips/translate_init.inc.c b/target/mips/translate_init.inc.c index b3320b9dc7..85da4a269c 100644 --- a/target/mips/translate_init.inc.c +++ b/target/mips/translate_init.inc.c @@ -320,7 +320,7 @@ const mips_def_t mips_defs[] = .CP1_fcr31_rw_bitmask = 0xFF83FFFF, .SEGBITS = 32, .PABITS = 32, - .insn_flags = CPU_MIPS32R2 | ASE_MIPS16 | ASE_DSP | ASE_DSPR2, + .insn_flags = CPU_MIPS32R2 | ASE_MIPS16 | ASE_DSP | ASE_DSP_R2, .mmu_type = MMU_TYPE_R4000, }, { @@ -411,6 +411,65 @@ const mips_def_t mips_defs[] = .mmu_type = MMU_TYPE_R4000, }, { + /* + * The Toshiba TX System RISC TX79 Core Architecture manual + * + * https://wiki.qemu.org/File:C790.pdf + * + * describes the C790 processor that is a follow-up to the R5900. + * There are a few notable differences in that the R5900 FPU + * + * - is not IEEE 754-1985 compliant, + * - does not implement double format, and + * - its machine code is nonstandard. + */ + .name = "R5900", + .CP0_PRid = 0x00002E00, + /* No L2 cache, icache size 32k, dcache size 32k, uncached coherency. */ + .CP0_Config0 = (0x3 << 9) | (0x3 << 6) | (0x2 << CP0C0_K0), + .CP0_Status_rw_bitmask = 0xF4C79C1F, +#ifdef CONFIG_USER_ONLY + /* + * R5900 hardware traps to the Linux kernel for IEEE 754-1985 and LL/SC + * emulation. For user only, QEMU is the kernel, so we emulate the traps + * by simply emulating the instructions directly. + * + * Note: Config1 is only used internally, the R5900 has only Config0. + */ + .CP0_Config1 = (1 << CP0C1_FP) | (47 << CP0C1_MMU), + .CP0_LLAddr_rw_bitmask = 0xFFFFFFFF, + .CP0_LLAddr_shift = 4, + .CP1_fcr0 = (0x38 << FCR0_PRID) | (0x0 << FCR0_REV), + .CP1_fcr31 = 0, + .CP1_fcr31_rw_bitmask = 0x0183FFFF, +#else + /* + * The R5900 COP1 FPU implements single-precision floating-point + * operations but is not entirely IEEE 754-1985 compatible. In + * particular, + * + * - NaN (not a number) and +/- infinities are not supported; + * - exception mechanisms are not fully supported; + * - denormalized numbers are not supported; + * - rounding towards nearest and +/- infinities are not supported; + * - computed results usually differs in the least significant bit; + * - saturations can differ more than the least significant bit. + * + * Since only rounding towards zero is supported, the two least + * significant bits of FCR31 are hardwired to 01. + * + * FPU emulation is disabled here until it is implemented. + * + * Note: Config1 is only used internally, the R5900 has only Config0. + */ + .CP0_Config1 = (47 << CP0C1_MMU), +#endif /* !CONFIG_USER_ONLY */ + .SEGBITS = 32, + .PABITS = 32, + .insn_flags = CPU_R5900 | ASE_MMI, + .mmu_type = MMU_TYPE_R4000, + }, + { /* A generic CPU supporting MIPS32 Release 6 ISA. FIXME: Support IEEE 754-2008 FP. Eventually this should be replaced by a real CPU model. */ @@ -485,7 +544,8 @@ const mips_def_t mips_defs[] = .CP1_fcr31 = (1 << FCR31_ABS2008) | (1 << FCR31_NAN2008), .SEGBITS = 32, .PABITS = 32, - .insn_flags = CPU_NANOMIPS32 | ASE_DSP | ASE_DSPR2 | ASE_MT, + .insn_flags = CPU_NANOMIPS32 | ASE_DSP | ASE_DSP_R2 | ASE_DSP_R3 | + ASE_MT, .mmu_type = MMU_TYPE_R4000, }, #if defined(TARGET_MIPS64) @@ -761,7 +821,7 @@ const mips_def_t mips_defs[] = .mmu_type = MMU_TYPE_R4000, }, { - /* A generic CPU providing MIPS64 ASE DSP 2 features. + /* A generic CPU providing MIPS64 DSP R2 ASE features. FIXME: Eventually this should be replaced by a real CPU model. */ .name = "mips64dspr2", .CP0_PRid = 0x00010000, @@ -786,7 +846,7 @@ const mips_def_t mips_defs[] = .CP1_fcr31_rw_bitmask = 0xFF83FFFF, .SEGBITS = 42, .PABITS = 36, - .insn_flags = CPU_MIPS64R2 | ASE_DSP | ASE_DSPR2, + .insn_flags = CPU_MIPS64R2 | ASE_DSP | ASE_DSP_R2, .mmu_type = MMU_TYPE_R4000, }, diff --git a/target/ppc/helper.h b/target/ppc/helper.h index ef64248bc4..7a1481fd0b 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -800,7 +800,7 @@ DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32) DEF_HELPER_1(tbegin, void, env) DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env) -#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128) +#ifdef TARGET_PPC64 DEF_HELPER_FLAGS_3(lq_le_parallel, TCG_CALL_NO_WG, i64, env, tl, i32) DEF_HELPER_FLAGS_3(lq_be_parallel, TCG_CALL_NO_WG, i64, env, tl, i32) DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG, diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c index 8f0d86d104..a1485fad9b 100644 --- a/target/ppc/mem_helper.c +++ b/target/ppc/mem_helper.c @@ -25,6 +25,7 @@ #include "exec/cpu_ldst.h" #include "tcg.h" #include "internal.h" +#include "qemu/atomic128.h" //#define DEBUG_OP @@ -215,11 +216,15 @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong addr, uint32_t reg, return i; } -#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128) +#ifdef TARGET_PPC64 uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr, uint32_t opidx) { - Int128 ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC()); + Int128 ret; + + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_ATOMIC128); + ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC()); env->retxh = int128_gethi(ret); return int128_getlo(ret); } @@ -227,7 +232,11 @@ uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr, uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr, uint32_t opidx) { - Int128 ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC()); + Int128 ret; + + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_ATOMIC128); + ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC()); env->retxh = int128_gethi(ret); return int128_getlo(ret); } @@ -235,14 +244,22 @@ uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr, void helper_stq_le_parallel(CPUPPCState *env, target_ulong addr, uint64_t lo, uint64_t hi, uint32_t opidx) { - Int128 val = int128_make128(lo, hi); + Int128 val; + + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_ATOMIC128); + val = int128_make128(lo, hi); helper_atomic_sto_le_mmu(env, addr, val, opidx, GETPC()); } void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr, uint64_t lo, uint64_t hi, uint32_t opidx) { - Int128 val = int128_make128(lo, hi); + Int128 val; + + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_ATOMIC128); + val = int128_make128(lo, hi); helper_atomic_sto_be_mmu(env, addr, val, opidx, GETPC()); } @@ -252,6 +269,9 @@ uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr, { bool success = false; + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_CMPXCHG128); + if (likely(addr == env->reserve_addr)) { Int128 oldv, cmpv, newv; @@ -271,6 +291,9 @@ uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr, { bool success = false; + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_CMPXCHG128); + if (likely(addr == env->reserve_addr)) { Int128 oldv, cmpv, newv; diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 881743571b..4e59dd5f42 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -33,6 +33,7 @@ #include "trace-tcg.h" #include "exec/translator.h" #include "exec/log.h" +#include "qemu/atomic128.h" #define CPU_SINGLE_STEP 0x1 @@ -2654,22 +2655,22 @@ static void gen_lq(DisasContext *ctx) hi = cpu_gpr[rd]; if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { -#ifdef CONFIG_ATOMIC128 - TCGv_i32 oi = tcg_temp_new_i32(); - if (ctx->le_mode) { - tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); - gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); + if (HAVE_ATOMIC128) { + TCGv_i32 oi = tcg_temp_new_i32(); + if (ctx->le_mode) { + tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); + gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); + } else { + tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); + gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); + } + tcg_temp_free_i32(oi); + tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); } else { - tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); - gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); + /* Restart with exclusive lock. */ + gen_helper_exit_atomic(cpu_env); + ctx->base.is_jmp = DISAS_NORETURN; } - tcg_temp_free_i32(oi); - tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); -#else - /* Restart with exclusive lock. */ - gen_helper_exit_atomic(cpu_env); - ctx->base.is_jmp = DISAS_NORETURN; -#endif } else if (ctx->le_mode) { tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ); gen_addr_add(ctx, EA, EA, 8); @@ -2805,21 +2806,21 @@ static void gen_std(DisasContext *ctx) hi = cpu_gpr[rs]; if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { -#ifdef CONFIG_ATOMIC128 - TCGv_i32 oi = tcg_temp_new_i32(); - if (ctx->le_mode) { - tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); - gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi); + if (HAVE_ATOMIC128) { + TCGv_i32 oi = tcg_temp_new_i32(); + if (ctx->le_mode) { + tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); + gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi); + } else { + tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); + gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi); + } + tcg_temp_free_i32(oi); } else { - tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); - gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi); + /* Restart with exclusive lock. */ + gen_helper_exit_atomic(cpu_env); + ctx->base.is_jmp = DISAS_NORETURN; } - tcg_temp_free_i32(oi); -#else - /* Restart with exclusive lock. */ - gen_helper_exit_atomic(cpu_env); - ctx->base.is_jmp = DISAS_NORETURN; -#endif } else if (ctx->le_mode) { tcg_gen_qemu_st_i64(lo, EA, ctx->mem_idx, MO_LEQ); gen_addr_add(ctx, EA, EA, 8); @@ -3404,26 +3405,26 @@ static void gen_lqarx(DisasContext *ctx) hi = cpu_gpr[rd]; if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { -#ifdef CONFIG_ATOMIC128 - TCGv_i32 oi = tcg_temp_new_i32(); - if (ctx->le_mode) { - tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16, - ctx->mem_idx)); - gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); + if (HAVE_ATOMIC128) { + TCGv_i32 oi = tcg_temp_new_i32(); + if (ctx->le_mode) { + tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16, + ctx->mem_idx)); + gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); + } else { + tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16, + ctx->mem_idx)); + gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); + } + tcg_temp_free_i32(oi); + tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); } else { - tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16, - ctx->mem_idx)); - gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); + /* Restart with exclusive lock. */ + gen_helper_exit_atomic(cpu_env); + ctx->base.is_jmp = DISAS_NORETURN; + tcg_temp_free(EA); + return; } - tcg_temp_free_i32(oi); - tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); -#else - /* Restart with exclusive lock. */ - gen_helper_exit_atomic(cpu_env); - ctx->base.is_jmp = DISAS_NORETURN; - tcg_temp_free(EA); - return; -#endif } else if (ctx->le_mode) { tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ | MO_ALIGN_16); tcg_gen_mov_tl(cpu_reserve, EA); @@ -3461,20 +3462,22 @@ static void gen_stqcx_(DisasContext *ctx) hi = cpu_gpr[rs]; if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { - TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16); -#ifdef CONFIG_ATOMIC128 - if (ctx->le_mode) { - gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi); + if (HAVE_CMPXCHG128) { + TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16); + if (ctx->le_mode) { + gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, + EA, lo, hi, oi); + } else { + gen_helper_stqcx_be_parallel(cpu_crf[0], cpu_env, + EA, lo, hi, oi); + } + tcg_temp_free_i32(oi); } else { - gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi); + /* Restart with exclusive lock. */ + gen_helper_exit_atomic(cpu_env); + ctx->base.is_jmp = DISAS_NORETURN; } -#else - /* Restart with exclusive lock. */ - gen_helper_exit_atomic(cpu_env); - ctx->base.is_jmp = DISAS_NORETURN; -#endif tcg_temp_free(EA); - tcg_temp_free_i32(oi); } else { TCGLabel *lab_fail = gen_new_label(); TCGLabel *lab_over = gen_new_label(); diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c index 263e63cb03..ee9432eb15 100644 --- a/target/ppc/translate_init.inc.c +++ b/target/ppc/translate_init.inc.c @@ -8381,8 +8381,8 @@ static void getset_compat_deprecated(Object *obj, Visitor *v, const char *name, QNull *null = NULL; if (!qtest_enabled()) { - error_report("CPU 'compat' property is deprecated and has no effect; " - "use max-cpu-compat machine property instead"); + warn_report("CPU 'compat' property is deprecated and has no effect; " + "use max-cpu-compat machine property instead"); } visit_type_null(v, name, &null, NULL); qobject_unref(null); diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index bacae4f503..490c43e6e6 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -25,6 +25,7 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "qemu/int128.h" +#include "qemu/atomic128.h" #if !defined(CONFIG_USER_ONLY) #include "hw/s390x/storage-keys.h" @@ -1379,65 +1380,62 @@ uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2, return cc; } -static void do_cdsg(CPUS390XState *env, uint64_t addr, - uint32_t r1, uint32_t r3, bool parallel) +void HELPER(cdsg)(CPUS390XState *env, uint64_t addr, + uint32_t r1, uint32_t r3) { uintptr_t ra = GETPC(); Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]); Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]); Int128 oldv; + uint64_t oldh, oldl; bool fail; - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); - oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); - fail = !int128_eq(oldv, cmpv); -#endif - } else { - uint64_t oldh, oldl; + check_alignment(env, addr, 16, ra); - check_alignment(env, addr, 16, ra); + oldh = cpu_ldq_data_ra(env, addr + 0, ra); + oldl = cpu_ldq_data_ra(env, addr + 8, ra); - oldh = cpu_ldq_data_ra(env, addr + 0, ra); - oldl = cpu_ldq_data_ra(env, addr + 8, ra); - - oldv = int128_make128(oldl, oldh); - fail = !int128_eq(oldv, cmpv); - if (fail) { - newv = oldv; - } - - cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra); - cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra); + oldv = int128_make128(oldl, oldh); + fail = !int128_eq(oldv, cmpv); + if (fail) { + newv = oldv; } + cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra); + cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra); + env->cc_op = fail; env->regs[r1] = int128_gethi(oldv); env->regs[r1 + 1] = int128_getlo(oldv); } -void HELPER(cdsg)(CPUS390XState *env, uint64_t addr, - uint32_t r1, uint32_t r3) -{ - do_cdsg(env, addr, r1, r3, false); -} - void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr, uint32_t r1, uint32_t r3) { - do_cdsg(env, addr, r1, r3, true); + uintptr_t ra = GETPC(); + Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]); + Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]); + int mem_idx; + TCGMemOpIdx oi; + Int128 oldv; + bool fail; + + assert(HAVE_CMPXCHG128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); + fail = !int128_eq(oldv, cmpv); + + env->cc_op = fail; + env->regs[r1] = int128_gethi(oldv); + env->regs[r1 + 1] = int128_getlo(oldv); } static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2, bool parallel) { -#if !defined(CONFIG_USER_ONLY) || defined(CONFIG_ATOMIC128) uint32_t mem_idx = cpu_mmu_index(env, false); -#endif uintptr_t ra = GETPC(); uint32_t fc = extract32(env->regs[0], 0, 8); uint32_t sc = extract32(env->regs[0], 8, 8); @@ -1465,18 +1463,20 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, probe_write(env, a2, 0, mem_idx, ra); #endif - /* Note that the compare-and-swap is atomic, and the store is atomic, but - the complete operation is not. Therefore we do not need to assert serial - context in order to implement this. That said, restart early if we can't - support either operation that is supposed to be atomic. */ + /* + * Note that the compare-and-swap is atomic, and the store is atomic, + * but the complete operation is not. Therefore we do not need to + * assert serial context in order to implement this. That said, + * restart early if we can't support either operation that is supposed + * to be atomic. + */ if (parallel) { - int mask = 0; -#if !defined(CONFIG_ATOMIC64) - mask = -8; -#elif !defined(CONFIG_ATOMIC128) - mask = -16; + uint32_t max = 2; +#ifdef CONFIG_ATOMIC64 + max = 3; #endif - if (((4 << fc) | (1 << sc)) & mask) { + if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) || + (HAVE_ATOMIC128 ? 0 : sc > max)) { cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); } } @@ -1546,16 +1546,7 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]); Int128 ov; - if (parallel) { -#ifdef CONFIG_ATOMIC128 - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); - ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra); - cc = !int128_eq(ov, cv); -#else - /* Note that we asserted !parallel above. */ - g_assert_not_reached(); -#endif - } else { + if (!parallel) { uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra); uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra); @@ -1567,6 +1558,13 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra); cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra); + } else if (HAVE_CMPXCHG128) { + TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra); + cc = !int128_eq(ov, cv); + } else { + /* Note that we asserted !parallel above. */ + g_assert_not_reached(); } env->regs[r3 + 0] = int128_gethi(ov); @@ -1596,18 +1594,16 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, cpu_stq_data_ra(env, a2, svh, ra); break; case 4: - if (parallel) { -#ifdef CONFIG_ATOMIC128 + if (!parallel) { + cpu_stq_data_ra(env, a2 + 0, svh, ra); + cpu_stq_data_ra(env, a2 + 8, svl, ra); + } else if (HAVE_ATOMIC128) { TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); Int128 sv = int128_make128(svl, svh); helper_atomic_sto_be_mmu(env, a2, sv, oi, ra); -#else + } else { /* Note that we asserted !parallel above. */ g_assert_not_reached(); -#endif - } else { - cpu_stq_data_ra(env, a2 + 0, svh, ra); - cpu_stq_data_ra(env, a2 + 8, svl, ra); } break; default: @@ -2100,76 +2096,64 @@ uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr) #endif /* load pair from quadword */ -static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel) +uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr) { uintptr_t ra = GETPC(); uint64_t hi, lo; - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); - Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra); - hi = int128_gethi(v); - lo = int128_getlo(v); -#endif - } else { - check_alignment(env, addr, 16, ra); - - hi = cpu_ldq_data_ra(env, addr + 0, ra); - lo = cpu_ldq_data_ra(env, addr + 8, ra); - } + check_alignment(env, addr, 16, ra); + hi = cpu_ldq_data_ra(env, addr + 0, ra); + lo = cpu_ldq_data_ra(env, addr + 8, ra); env->retxl = lo; return hi; } -uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr) -{ - return do_lpq(env, addr, false); -} - uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr) { - return do_lpq(env, addr, true); -} - -/* store pair to quadword */ -static void do_stpq(CPUS390XState *env, uint64_t addr, - uint64_t low, uint64_t high, bool parallel) -{ uintptr_t ra = GETPC(); + uint64_t hi, lo; + int mem_idx; + TCGMemOpIdx oi; + Int128 v; - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + assert(HAVE_ATOMIC128); - Int128 v = int128_make128(low, high); - helper_atomic_sto_be_mmu(env, addr, v, oi, ra); -#endif - } else { - check_alignment(env, addr, 16, ra); + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + v = helper_atomic_ldo_be_mmu(env, addr, oi, ra); + hi = int128_gethi(v); + lo = int128_getlo(v); - cpu_stq_data_ra(env, addr + 0, high, ra); - cpu_stq_data_ra(env, addr + 8, low, ra); - } + env->retxl = lo; + return hi; } +/* store pair to quadword */ void HELPER(stpq)(CPUS390XState *env, uint64_t addr, uint64_t low, uint64_t high) { - do_stpq(env, addr, low, high, false); + uintptr_t ra = GETPC(); + + check_alignment(env, addr, 16, ra); + cpu_stq_data_ra(env, addr + 0, high, ra); + cpu_stq_data_ra(env, addr + 8, low, ra); } void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr, uint64_t low, uint64_t high) { - do_stpq(env, addr, low, high, true); + uintptr_t ra = GETPC(); + int mem_idx; + TCGMemOpIdx oi; + Int128 v; + + assert(HAVE_ATOMIC128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + v = int128_make128(low, high); + helper_atomic_sto_be_mmu(env, addr, v, oi, ra); } /* Execute instruction. This instruction executes an insn modified with diff --git a/target/s390x/translate.c b/target/s390x/translate.c index 18861cd186..b5bd56b7ee 100644 --- a/target/s390x/translate.c +++ b/target/s390x/translate.c @@ -44,6 +44,7 @@ #include "trace-tcg.h" #include "exec/translator.h" #include "exec/log.h" +#include "qemu/atomic128.h" /* Information that (most) every instruction needs to manipulate. */ @@ -1128,11 +1129,19 @@ struct DisasInsn { const char *name; + /* Pre-process arguments before HELP_OP. */ void (*help_in1)(DisasContext *, DisasFields *, DisasOps *); void (*help_in2)(DisasContext *, DisasFields *, DisasOps *); void (*help_prep)(DisasContext *, DisasFields *, DisasOps *); + + /* + * Post-process output after HELP_OP. + * Note that these are not called if HELP_OP returns DISAS_NORETURN. + */ void (*help_wout)(DisasContext *, DisasFields *, DisasOps *); void (*help_cout)(DisasContext *, DisasOps *); + + /* Implement the operation itself. */ DisasJumpType (*help_op)(DisasContext *, DisasOps *); uint64_t data; @@ -2032,6 +2041,7 @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o) int r3 = get_field(s->fields, r3); int d2 = get_field(s->fields, d2); int b2 = get_field(s->fields, b2); + DisasJumpType ret = DISAS_NEXT; TCGv_i64 addr; TCGv_i32 t_r1, t_r3; @@ -2039,17 +2049,20 @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o) addr = get_address(s, 0, b2, d2); t_r1 = tcg_const_i32(r1); t_r3 = tcg_const_i32(r3); - if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + gen_helper_cdsg(cpu_env, addr, t_r1, t_r3); + } else if (HAVE_CMPXCHG128) { gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3); } else { - gen_helper_cdsg(cpu_env, addr, t_r1, t_r3); + gen_helper_exit_atomic(cpu_env); + ret = DISAS_NORETURN; } tcg_temp_free_i64(addr); tcg_temp_free_i32(t_r1); tcg_temp_free_i32(t_r3); set_cc_static(s); - return DISAS_NEXT; + return ret; } static DisasJumpType op_csst(DisasContext *s, DisasOps *o) @@ -3026,10 +3039,13 @@ static DisasJumpType op_lpd(DisasContext *s, DisasOps *o) static DisasJumpType op_lpq(DisasContext *s, DisasOps *o) { - if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + gen_helper_lpq(o->out, cpu_env, o->in2); + } else if (HAVE_ATOMIC128) { gen_helper_lpq_parallel(o->out, cpu_env, o->in2); } else { - gen_helper_lpq(o->out, cpu_env, o->in2); + gen_helper_exit_atomic(cpu_env); + return DISAS_NORETURN; } return_low128(o->out2); return DISAS_NEXT; @@ -4406,10 +4422,13 @@ static DisasJumpType op_stmh(DisasContext *s, DisasOps *o) static DisasJumpType op_stpq(DisasContext *s, DisasOps *o) { - if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + gen_helper_stpq(cpu_env, o->in2, o->out2, o->out); + } else if (HAVE_ATOMIC128) { gen_helper_stpq_parallel(cpu_env, o->in2, o->out2, o->out); } else { - gen_helper_stpq(cpu_env, o->in2, o->out2, o->out); + gen_helper_exit_atomic(cpu_env); + return DISAS_NORETURN; } return DISAS_NEXT; } @@ -6125,11 +6144,13 @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s) if (insn->help_op) { ret = insn->help_op(s, &o); } - if (insn->help_wout) { - insn->help_wout(s, &f, &o); - } - if (insn->help_cout) { - insn->help_cout(s, &o); + if (ret != DISAS_NORETURN) { + if (insn->help_wout) { + insn->help_wout(s, &f, &o); + } + if (insn->help_cout) { + insn->help_cout(s, &o); + } } /* Free any temporaries created by the helpers. */ diff --git a/target/unicore32/cpu.c b/target/unicore32/cpu.c index 68f978d80b..2b49d1ca40 100644 --- a/target/unicore32/cpu.c +++ b/target/unicore32/cpu.c @@ -116,8 +116,6 @@ static void uc32_cpu_initfn(Object *obj) env->uncached_asr = ASR_MODE_PRIV; env->regs[31] = 0x03000000; #endif - - tlb_flush(cs); } static const VMStateDescription vmstate_uc32_cpu = { |