summaryrefslogtreecommitdiffstats
path: root/target
diff options
context:
space:
mode:
authorPeter Maydell2019-02-28 18:35:42 +0100
committerPeter Maydell2019-02-28 18:35:42 +0100
commit9403bccfe3e271f04e12c8c64d306e0cff589009 (patch)
tree060b865d955534528520291bc8bb51701a818385 /target
parentMerge remote-tracking branch 'remotes/kraxel/tags/audio-20190228-pull-request... (diff)
parentlinux-user: Enable HWCAP_ASIMDFHM, HWCAP_JSCVT (diff)
downloadqemu-9403bccfe3e271f04e12c8c64d306e0cff589009.tar.gz
qemu-9403bccfe3e271f04e12c8c64d306e0cff589009.tar.xz
qemu-9403bccfe3e271f04e12c8c64d306e0cff589009.zip
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20190228-1' into staging
target-arm queue: * add MHU and dual-core support to Musca boards * refactor some VFP insns to be gated by ID registers * Revert "arm: Allow system registers for KVM guests to be changed by QEMU code" * Implement ARMv8.2-FHM extension * Advertise JSCVT via HWCAP for linux-user # gpg: Signature made Thu 28 Feb 2019 11:06:55 GMT # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20190228-1: linux-user: Enable HWCAP_ASIMDFHM, HWCAP_JSCVT target/arm: Enable ARMv8.2-FHM for -cpu max target/arm: Implement VFMAL and VFMSL for aarch32 target/arm: Implement FMLAL and FMLSL for aarch64 target/arm: Add helpers for FMLAL Revert "arm: Allow system registers for KVM guests to be changed by QEMU code" target/arm: Gate "miscellaneous FP" insns by ID register field target/arm: Use MVFR1 feature bits to gate A32/T32 FP16 instructions hw/arm/armsse: Unify init-svtor and cpuwait handling hw/arm/iotkit-sysctl: Implement CPUWAIT and INITSVTOR* hw/arm/iotkit-sysctl: Add SSE-200 registers hw/misc/iotkit-sysctl: Correct typo in INITSVTOR0 register name target/arm/arm-powerctl: Add new arm_set_cpu_on_and_reset() target/arm/cpu: Allow init-svtor property to be set after realize hw/arm/armsse: Wire up the MHUs hw/misc/armsse-mhu.c: Model the SSE-200 Message Handling Unit Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target')
-rw-r--r--target/arm/arm-powerctl.c56
-rw-r--r--target/arm/arm-powerctl.h16
-rw-r--r--target/arm/cpu.c32
-rw-r--r--target/arm/cpu.h76
-rw-r--r--target/arm/cpu64.c2
-rw-r--r--target/arm/helper.c27
-rw-r--r--target/arm/helper.h9
-rw-r--r--target/arm/kvm32.c23
-rw-r--r--target/arm/kvm64.c2
-rw-r--r--target/arm/machine.c2
-rw-r--r--target/arm/translate-a64.c49
-rw-r--r--target/arm/translate.c180
-rw-r--r--target/arm/vec_helper.c148
13 files changed, 519 insertions, 103 deletions
diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c
index f9de5164e5..f77a950db6 100644
--- a/target/arm/arm-powerctl.c
+++ b/target/arm/arm-powerctl.c
@@ -228,6 +228,62 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id,
return QEMU_ARM_POWERCTL_RET_SUCCESS;
}
+static void arm_set_cpu_on_and_reset_async_work(CPUState *target_cpu_state,
+ run_on_cpu_data data)
+{
+ ARMCPU *target_cpu = ARM_CPU(target_cpu_state);
+
+ /* Initialize the cpu we are turning on */
+ cpu_reset(target_cpu_state);
+ target_cpu_state->halted = 0;
+
+ /* Finally set the power status */
+ assert(qemu_mutex_iothread_locked());
+ target_cpu->power_state = PSCI_ON;
+}
+
+int arm_set_cpu_on_and_reset(uint64_t cpuid)
+{
+ CPUState *target_cpu_state;
+ ARMCPU *target_cpu;
+
+ assert(qemu_mutex_iothread_locked());
+
+ /* Retrieve the cpu we are powering up */
+ target_cpu_state = arm_get_cpu_by_id(cpuid);
+ if (!target_cpu_state) {
+ /* The cpu was not found */
+ return QEMU_ARM_POWERCTL_INVALID_PARAM;
+ }
+
+ target_cpu = ARM_CPU(target_cpu_state);
+ if (target_cpu->power_state == PSCI_ON) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "[ARM]%s: CPU %" PRId64 " is already on\n",
+ __func__, cpuid);
+ return QEMU_ARM_POWERCTL_ALREADY_ON;
+ }
+
+ /*
+ * If another CPU has powered the target on we are in the state
+ * ON_PENDING and additional attempts to power on the CPU should
+ * fail (see 6.6 Implementation CPU_ON/CPU_OFF races in the PSCI
+ * spec)
+ */
+ if (target_cpu->power_state == PSCI_ON_PENDING) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "[ARM]%s: CPU %" PRId64 " is already powering on\n",
+ __func__, cpuid);
+ return QEMU_ARM_POWERCTL_ON_PENDING;
+ }
+
+ async_run_on_cpu(target_cpu_state, arm_set_cpu_on_and_reset_async_work,
+ RUN_ON_CPU_NULL);
+
+ /* We are good to go */
+ return QEMU_ARM_POWERCTL_RET_SUCCESS;
+}
+
static void arm_set_cpu_off_async_work(CPUState *target_cpu_state,
run_on_cpu_data data)
{
diff --git a/target/arm/arm-powerctl.h b/target/arm/arm-powerctl.h
index 04353923c0..37c8a04f0a 100644
--- a/target/arm/arm-powerctl.h
+++ b/target/arm/arm-powerctl.h
@@ -74,4 +74,20 @@ int arm_set_cpu_off(uint64_t cpuid);
*/
int arm_reset_cpu(uint64_t cpuid);
+/*
+ * arm_set_cpu_on_and_reset:
+ * @cpuid: the id of the CPU we want to star
+ *
+ * Start the cpu designated by @cpuid and put it through its normal
+ * CPU reset process. The CPU will start in the way it is architected
+ * to start after a power-on reset.
+ *
+ * Returns: QEMU_ARM_POWERCTL_RET_SUCCESS on success.
+ * QEMU_ARM_POWERCTL_INVALID_PARAM if there is no CPU with that ID.
+ * QEMU_ARM_POWERCTL_ALREADY_ON if the CPU is already on.
+ * QEMU_ARM_POWERCTL_ON_PENDING if the CPU is already partway through
+ * powering on.
+ */
+int arm_set_cpu_on_and_reset(uint64_t cpuid);
+
#endif
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 8ea6569088..54b61f917b 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -22,6 +22,7 @@
#include "target/arm/idau.h"
#include "qemu/error-report.h"
#include "qapi/error.h"
+#include "qapi/visitor.h"
#include "cpu.h"
#include "internals.h"
#include "qemu-common.h"
@@ -771,9 +772,21 @@ static Property arm_cpu_pmsav7_dregion_property =
pmsav7_dregion,
qdev_prop_uint32, uint32_t);
-/* M profile: initial value of the Secure VTOR */
-static Property arm_cpu_initsvtor_property =
- DEFINE_PROP_UINT32("init-svtor", ARMCPU, init_svtor, 0);
+static void arm_get_init_svtor(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ visit_type_uint32(v, name, &cpu->init_svtor, errp);
+}
+
+static void arm_set_init_svtor(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ visit_type_uint32(v, name, &cpu->init_svtor, errp);
+}
void arm_cpu_post_init(Object *obj)
{
@@ -845,8 +858,14 @@ void arm_cpu_post_init(Object *obj)
qdev_prop_allow_set_link_before_realize,
OBJ_PROP_LINK_STRONG,
&error_abort);
- qdev_property_add_static(DEVICE(obj), &arm_cpu_initsvtor_property,
- &error_abort);
+ /*
+ * M profile: initial value of the Secure VTOR. We can't just use
+ * a simple DEFINE_PROP_UINT32 for this because we want to permit
+ * the property to be set after realize.
+ */
+ object_property_add(obj, "init-svtor", "uint32",
+ arm_get_init_svtor, arm_set_init_svtor,
+ NULL, NULL, &error_abort);
}
qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property,
@@ -995,7 +1014,6 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
}
if (arm_feature(env, ARM_FEATURE_VFP4)) {
set_feature(env, ARM_FEATURE_VFP3);
- set_feature(env, ARM_FEATURE_VFP_FP16);
}
if (arm_feature(env, ARM_FEATURE_VFP3)) {
set_feature(env, ARM_FEATURE_VFP);
@@ -1656,7 +1674,6 @@ static void cortex_a9_initfn(Object *obj)
cpu->dtb_compatible = "arm,cortex-a9";
set_feature(&cpu->env, ARM_FEATURE_V7);
set_feature(&cpu->env, ARM_FEATURE_VFP3);
- set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
set_feature(&cpu->env, ARM_FEATURE_NEON);
set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
set_feature(&cpu->env, ARM_FEATURE_EL3);
@@ -2003,6 +2020,7 @@ static void arm_max_initfn(Object *obj)
t = cpu->isar.id_isar6;
t = FIELD_DP32(t, ID_ISAR6, JSCVT, 1);
t = FIELD_DP32(t, ID_ISAR6, DP, 1);
+ t = FIELD_DP32(t, ID_ISAR6, FHM, 1);
cpu->isar.id_isar6 = t;
t = cpu->id_mmfr4;
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 1eea1a408b..36cd365efa 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1730,6 +1730,27 @@ FIELD(ID_DFR0, MPROFDBG, 20, 4)
FIELD(ID_DFR0, PERFMON, 24, 4)
FIELD(ID_DFR0, TRACEFILT, 28, 4)
+FIELD(MVFR0, SIMDREG, 0, 4)
+FIELD(MVFR0, FPSP, 4, 4)
+FIELD(MVFR0, FPDP, 8, 4)
+FIELD(MVFR0, FPTRAP, 12, 4)
+FIELD(MVFR0, FPDIVIDE, 16, 4)
+FIELD(MVFR0, FPSQRT, 20, 4)
+FIELD(MVFR0, FPSHVEC, 24, 4)
+FIELD(MVFR0, FPROUND, 28, 4)
+
+FIELD(MVFR1, FPFTZ, 0, 4)
+FIELD(MVFR1, FPDNAN, 4, 4)
+FIELD(MVFR1, SIMDLS, 8, 4)
+FIELD(MVFR1, SIMDINT, 12, 4)
+FIELD(MVFR1, SIMDSP, 16, 4)
+FIELD(MVFR1, SIMDHP, 20, 4)
+FIELD(MVFR1, FPHP, 24, 4)
+FIELD(MVFR1, SIMDFMAC, 28, 4)
+
+FIELD(MVFR2, SIMDMISC, 0, 4)
+FIELD(MVFR2, FPMISC, 4, 4)
+
QEMU_BUILD_BUG_ON(ARRAY_SIZE(((ARMCPU *)0)->ccsidr) <= R_V7M_CSSELR_INDEX_MASK);
/* If adding a feature bit which corresponds to a Linux ELF
@@ -1747,7 +1768,6 @@ enum arm_features {
ARM_FEATURE_THUMB2,
ARM_FEATURE_PMSA, /* no MMU; may have Memory Protection Unit */
ARM_FEATURE_VFP3,
- ARM_FEATURE_VFP_FP16,
ARM_FEATURE_NEON,
ARM_FEATURE_M, /* Microcontroller profile. */
ARM_FEATURE_OMAPCP, /* OMAP specific CP15 ops handling. */
@@ -2538,25 +2558,18 @@ bool write_list_to_cpustate(ARMCPU *cpu);
/**
* write_cpustate_to_list:
* @cpu: ARMCPU
- * @kvm_sync: true if this is for syncing back to KVM
*
* For each register listed in the ARMCPU cpreg_indexes list, write
* its value from the ARMCPUState structure into the cpreg_values list.
* This is used to copy info from TCG's working data structures into
* KVM or for outbound migration.
*
- * @kvm_sync is true if we are doing this in order to sync the
- * register state back to KVM. In this case we will only update
- * values in the list if the previous list->cpustate sync actually
- * successfully wrote the CPU state. Otherwise we will keep the value
- * that is in the list.
- *
* Returns: true if all register values were read correctly,
* false if some register was unknown or could not be read.
* Note that we do not stop early on failure -- we will attempt
* reading all registers in the list.
*/
-bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
+bool write_cpustate_to_list(ARMCPU *cpu);
#define ARM_CPUID_TI915T 0x54029152
#define ARM_CPUID_TI925T 0x54029252
@@ -3283,6 +3296,11 @@ static inline bool isar_feature_aa32_dp(const ARMISARegisters *id)
return FIELD_EX32(id->id_isar6, ID_ISAR6, DP) != 0;
}
+static inline bool isar_feature_aa32_fhm(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_isar6, ID_ISAR6, FHM) != 0;
+}
+
static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id)
{
/*
@@ -3294,6 +3312,41 @@ static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id)
}
/*
+ * We always set the FP and SIMD FP16 fields to indicate identical
+ * levels of support (assuming SIMD is implemented at all), so
+ * we only need one set of accessors.
+ */
+static inline bool isar_feature_aa32_fp16_spconv(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->mvfr1, MVFR1, FPHP) > 0;
+}
+
+static inline bool isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->mvfr1, MVFR1, FPHP) > 1;
+}
+
+static inline bool isar_feature_aa32_vsel(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 1;
+}
+
+static inline bool isar_feature_aa32_vcvt_dr(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 2;
+}
+
+static inline bool isar_feature_aa32_vrint(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 3;
+}
+
+static inline bool isar_feature_aa32_vminmaxnm(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 4;
+}
+
+/*
* 64-bit feature tests via id registers.
*/
static inline bool isar_feature_aa64_aes(const ARMISARegisters *id)
@@ -3356,6 +3409,11 @@ static inline bool isar_feature_aa64_dp(const ARMISARegisters *id)
return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, DP) != 0;
}
+static inline bool isar_feature_aa64_fhm(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, FHM) != 0;
+}
+
static inline bool isar_feature_aa64_jscvt(const ARMISARegisters *id)
{
return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, JSCVT) != 0;
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 69e4134f79..1b0c427277 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -308,6 +308,7 @@ static void aarch64_max_initfn(Object *obj)
t = FIELD_DP64(t, ID_AA64ISAR0, SM3, 1);
t = FIELD_DP64(t, ID_AA64ISAR0, SM4, 1);
t = FIELD_DP64(t, ID_AA64ISAR0, DP, 1);
+ t = FIELD_DP64(t, ID_AA64ISAR0, FHM, 1);
cpu->isar.id_aa64isar0 = t;
t = cpu->isar.id_aa64isar1;
@@ -347,6 +348,7 @@ static void aarch64_max_initfn(Object *obj)
u = cpu->isar.id_isar6;
u = FIELD_DP32(u, ID_ISAR6, JSCVT, 1);
u = FIELD_DP32(u, ID_ISAR6, DP, 1);
+ u = FIELD_DP32(u, ID_ISAR6, FHM, 1);
cpu->isar.id_isar6 = u;
/*
diff --git a/target/arm/helper.c b/target/arm/helper.c
index fbaa801cea..1fa282a7fc 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -265,7 +265,7 @@ static bool raw_accessors_invalid(const ARMCPRegInfo *ri)
return true;
}
-bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync)
+bool write_cpustate_to_list(ARMCPU *cpu)
{
/* Write the coprocessor state from cpu->env to the (index,value) list. */
int i;
@@ -274,7 +274,6 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync)
for (i = 0; i < cpu->cpreg_array_len; i++) {
uint32_t regidx = kvm_to_cpreg_id(cpu->cpreg_indexes[i]);
const ARMCPRegInfo *ri;
- uint64_t newval;
ri = get_arm_cp_reginfo(cpu->cp_regs, regidx);
if (!ri) {
@@ -284,29 +283,7 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync)
if (ri->type & ARM_CP_NO_RAW) {
continue;
}
-
- newval = read_raw_cp_reg(&cpu->env, ri);
- if (kvm_sync) {
- /*
- * Only sync if the previous list->cpustate sync succeeded.
- * Rather than tracking the success/failure state for every
- * item in the list, we just recheck "does the raw write we must
- * have made in write_list_to_cpustate() read back OK" here.
- */
- uint64_t oldval = cpu->cpreg_values[i];
-
- if (oldval == newval) {
- continue;
- }
-
- write_raw_cp_reg(&cpu->env, ri, oldval);
- if (read_raw_cp_reg(&cpu->env, ri) != oldval) {
- continue;
- }
-
- write_raw_cp_reg(&cpu->env, ri, newval);
- }
- cpu->cpreg_values[i] = newval;
+ cpu->cpreg_values[i] = read_raw_cp_reg(&cpu->env, ri);
}
return ok;
}
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 747cb64d29..d363904278 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -677,6 +677,15 @@ DEF_HELPER_FLAGS_5(gvec_sqsub_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(gvec_sqsub_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmlal_a32, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmlal_a64, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmlal_idx_a32, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmlal_idx_a64, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#include "helper-sve.h"
diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c
index a75e04cc8f..50327989dc 100644
--- a/target/arm/kvm32.c
+++ b/target/arm/kvm32.c
@@ -125,9 +125,6 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
if (extract32(id_pfr0, 12, 4) == 1) {
set_feature(&features, ARM_FEATURE_THUMB2EE);
}
- if (extract32(ahcf->isar.mvfr1, 20, 4) == 1) {
- set_feature(&features, ARM_FEATURE_VFP_FP16);
- }
if (extract32(ahcf->isar.mvfr1, 12, 4) == 1) {
set_feature(&features, ARM_FEATURE_NEON);
}
@@ -387,8 +384,24 @@ int kvm_arch_put_registers(CPUState *cs, int level)
return ret;
}
- write_cpustate_to_list(cpu, true);
-
+ /* Note that we do not call write_cpustate_to_list()
+ * here, so we are only writing the tuple list back to
+ * KVM. This is safe because nothing can change the
+ * CPUARMState cp15 fields (in particular gdb accesses cannot)
+ * and so there are no changes to sync. In fact syncing would
+ * be wrong at this point: for a constant register where TCG and
+ * KVM disagree about its value, the preceding write_list_to_cpustate()
+ * would not have had any effect on the CPUARMState value (since the
+ * register is read-only), and a write_cpustate_to_list() here would
+ * then try to write the TCG value back into KVM -- this would either
+ * fail or incorrectly change the value the guest sees.
+ *
+ * If we ever want to allow the user to modify cp15 registers via
+ * the gdb stub, we would need to be more clever here (for instance
+ * tracking the set of registers kvm_arch_get_registers() successfully
+ * managed to update the CPUARMState with, and only allowing those
+ * to be written back up into the kernel).
+ */
if (!write_list_to_kvmstate(cpu, level)) {
return EINVAL;
}
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index e3ba149248..089af9c5f0 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -838,8 +838,6 @@ int kvm_arch_put_registers(CPUState *cs, int level)
return ret;
}
- write_cpustate_to_list(cpu, true);
-
if (!write_list_to_kvmstate(cpu, level)) {
return EINVAL;
}
diff --git a/target/arm/machine.c b/target/arm/machine.c
index 124192bfc2..b292549614 100644
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@@ -630,7 +630,7 @@ static int cpu_pre_save(void *opaque)
abort();
}
} else {
- if (!write_cpustate_to_list(cpu, false)) {
+ if (!write_cpustate_to_list(cpu)) {
/* This should never fail. */
abort();
}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index c56e878787..d3c8eaf089 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -10917,9 +10917,29 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
if (!fp_access_check(s)) {
return;
}
-
handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
return;
+
+ case 0x1d: /* FMLAL */
+ case 0x3d: /* FMLSL */
+ case 0x59: /* FMLAL2 */
+ case 0x79: /* FMLSL2 */
+ if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
+ unallocated_encoding(s);
+ return;
+ }
+ if (fp_access_check(s)) {
+ int is_s = extract32(insn, 23, 1);
+ int is_2 = extract32(insn, 29, 1);
+ int data = (is_2 << 1) | is_s;
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm), cpu_env,
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ data, gen_helper_gvec_fmlal_a64);
+ }
+ return;
+
default:
unallocated_encoding(s);
return;
@@ -12739,6 +12759,17 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
}
is_fp = 2;
break;
+ case 0x00: /* FMLAL */
+ case 0x04: /* FMLSL */
+ case 0x18: /* FMLAL2 */
+ case 0x1c: /* FMLSL2 */
+ if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
+ unallocated_encoding(s);
+ return;
+ }
+ size = MO_16;
+ /* is_fp, but we pass cpu_env not fp_status. */
+ break;
default:
unallocated_encoding(s);
return;
@@ -12849,6 +12880,22 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
tcg_temp_free_ptr(fpst);
}
return;
+
+ case 0x00: /* FMLAL */
+ case 0x04: /* FMLSL */
+ case 0x18: /* FMLAL2 */
+ case 0x1c: /* FMLSL2 */
+ {
+ int is_s = extract32(opcode, 2, 1);
+ int is_2 = u;
+ int data = (index << 2) | (is_2 << 1) | is_s;
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm), cpu_env,
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ data, gen_helper_gvec_fmlal_idx_a64);
+ }
+ return;
}
if (size == 3) {
diff --git a/target/arm/translate.c b/target/arm/translate.c
index c1175798ac..8f7f5b95aa 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -3357,14 +3357,10 @@ static const uint8_t fp_decode_rm[] = {
FPROUNDING_NEGINF,
};
-static int disas_vfp_v8_insn(DisasContext *s, uint32_t insn)
+static int disas_vfp_misc_insn(DisasContext *s, uint32_t insn)
{
uint32_t rd, rn, rm, dp = extract32(insn, 8, 1);
- if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
- return 1;
- }
-
if (dp) {
VFP_DREG_D(rd, insn);
VFP_DREG_N(rn, insn);
@@ -3375,15 +3371,18 @@ static int disas_vfp_v8_insn(DisasContext *s, uint32_t insn)
rm = VFP_SREG_M(insn);
}
- if ((insn & 0x0f800e50) == 0x0e000a00) {
+ if ((insn & 0x0f800e50) == 0x0e000a00 && dc_isar_feature(aa32_vsel, s)) {
return handle_vsel(insn, rd, rn, rm, dp);
- } else if ((insn & 0x0fb00e10) == 0x0e800a00) {
+ } else if ((insn & 0x0fb00e10) == 0x0e800a00 &&
+ dc_isar_feature(aa32_vminmaxnm, s)) {
return handle_vminmaxnm(insn, rd, rn, rm, dp);
- } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40) {
+ } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40 &&
+ dc_isar_feature(aa32_vrint, s)) {
/* VRINTA, VRINTN, VRINTP, VRINTM */
int rounding = fp_decode_rm[extract32(insn, 16, 2)];
return handle_vrint(insn, rd, rm, dp, rounding);
- } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40) {
+ } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40 &&
+ dc_isar_feature(aa32_vcvt_dr, s)) {
/* VCVTA, VCVTN, VCVTP, VCVTM */
int rounding = fp_decode_rm[extract32(insn, 16, 2)];
return handle_vcvt(insn, rd, rm, dp, rounding);
@@ -3427,10 +3426,12 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
}
if (extract32(insn, 28, 4) == 0xf) {
- /* Encodings with T=1 (Thumb) or unconditional (ARM):
- * only used in v8 and above.
+ /*
+ * Encodings with T=1 (Thumb) or unconditional (ARM):
+ * only used for the "miscellaneous VFP features" added in v8A
+ * and v7M (and gated on the MVFR2.FPMisc field).
*/
- return disas_vfp_v8_insn(s, insn);
+ return disas_vfp_misc_insn(s, insn);
}
dp = ((insn & 0xf00) == 0xb00);
@@ -3663,17 +3664,27 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
* UNPREDICTABLE if bit 8 is set prior to ARMv8
* (we choose to UNDEF)
*/
- if ((dp && !arm_dc_feature(s, ARM_FEATURE_V8)) ||
- !arm_dc_feature(s, ARM_FEATURE_VFP_FP16)) {
- return 1;
+ if (dp) {
+ if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
+ return 1;
+ }
+ } else {
+ if (!dc_isar_feature(aa32_fp16_spconv, s)) {
+ return 1;
+ }
}
rm_is_dp = false;
break;
case 0x06: /* vcvtb.f16.f32, vcvtb.f16.f64 */
case 0x07: /* vcvtt.f16.f32, vcvtt.f16.f64 */
- if ((dp && !arm_dc_feature(s, ARM_FEATURE_V8)) ||
- !arm_dc_feature(s, ARM_FEATURE_VFP_FP16)) {
- return 1;
+ if (dp) {
+ if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
+ return 1;
+ }
+ } else {
+ if (!dc_isar_feature(aa32_fp16_spconv, s)) {
+ return 1;
+ }
}
rd_is_dp = false;
break;
@@ -7876,7 +7887,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
TCGv_ptr fpst;
TCGv_i32 ahp;
- if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
+ if (!dc_isar_feature(aa32_fp16_spconv, s) ||
q || (rm & 1)) {
return 1;
}
@@ -7908,7 +7919,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
{
TCGv_ptr fpst;
TCGv_i32 ahp;
- if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
+ if (!dc_isar_feature(aa32_fp16_spconv, s) ||
q || (rd & 1)) {
return 1;
}
@@ -8372,15 +8383,9 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
int rd, rn, rm, opr_sz;
int data = 0;
- bool q;
-
- q = extract32(insn, 6, 1);
- VFP_DREG_D(rd, insn);
- VFP_DREG_N(rn, insn);
- VFP_DREG_M(rm, insn);
- if ((rd | rn | rm) & q) {
- return 1;
- }
+ int off_rn, off_rm;
+ bool is_long = false, q = extract32(insn, 6, 1);
+ bool ptr_is_env = false;
if ((insn & 0xfe200f10) == 0xfc200800) {
/* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */
@@ -8407,10 +8412,39 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
return 1;
}
fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
+ } else if ((insn & 0xff300f10) == 0xfc200810) {
+ /* VFM[AS]L -- 1111 1100 S.10 .... .... 1000 .Q.1 .... */
+ int is_s = extract32(insn, 23, 1);
+ if (!dc_isar_feature(aa32_fhm, s)) {
+ return 1;
+ }
+ is_long = true;
+ data = is_s; /* is_2 == 0 */
+ fn_gvec_ptr = gen_helper_gvec_fmlal_a32;
+ ptr_is_env = true;
} else {
return 1;
}
+ VFP_DREG_D(rd, insn);
+ if (rd & q) {
+ return 1;
+ }
+ if (q || !is_long) {
+ VFP_DREG_N(rn, insn);
+ VFP_DREG_M(rm, insn);
+ if ((rn | rm) & q & !is_long) {
+ return 1;
+ }
+ off_rn = vfp_reg_offset(1, rn);
+ off_rm = vfp_reg_offset(1, rm);
+ } else {
+ rn = VFP_SREG_N(insn);
+ rm = VFP_SREG_M(insn);
+ off_rn = vfp_reg_offset(0, rn);
+ off_rm = vfp_reg_offset(0, rm);
+ }
+
if (s->fp_excp_el) {
gen_exception_insn(s, 4, EXCP_UDEF,
syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
@@ -8422,16 +8456,19 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
opr_sz = (1 + q) * 8;
if (fn_gvec_ptr) {
- TCGv_ptr fpst = get_fpstatus_ptr(1);
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
- vfp_reg_offset(1, rn),
- vfp_reg_offset(1, rm), fpst,
+ TCGv_ptr ptr;
+ if (ptr_is_env) {
+ ptr = cpu_env;
+ } else {
+ ptr = get_fpstatus_ptr(1);
+ }
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
opr_sz, opr_sz, data, fn_gvec_ptr);
- tcg_temp_free_ptr(fpst);
+ if (!ptr_is_env) {
+ tcg_temp_free_ptr(ptr);
+ }
} else {
- tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd),
- vfp_reg_offset(1, rn),
- vfp_reg_offset(1, rm),
+ tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm,
opr_sz, opr_sz, data, fn_gvec);
}
return 0;
@@ -8450,14 +8487,9 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
gen_helper_gvec_3 *fn_gvec = NULL;
gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
int rd, rn, rm, opr_sz, data;
- bool q;
-
- q = extract32(insn, 6, 1);
- VFP_DREG_D(rd, insn);
- VFP_DREG_N(rn, insn);
- if ((rd | rn) & q) {
- return 1;
- }
+ int off_rn, off_rm;
+ bool is_long = false, q = extract32(insn, 6, 1);
+ bool ptr_is_env = false;
if ((insn & 0xff000f10) == 0xfe000800) {
/* VCMLA (indexed) -- 1111 1110 S.RR .... .... 1000 ...0 .... */
@@ -8486,6 +8518,7 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
} else if ((insn & 0xffb00f00) == 0xfe200d00) {
/* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */
int u = extract32(insn, 4, 1);
+
if (!dc_isar_feature(aa32_dp, s)) {
return 1;
}
@@ -8493,10 +8526,48 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
/* rm is just Vm, and index is M. */
data = extract32(insn, 5, 1); /* index */
rm = extract32(insn, 0, 4);
+ } else if ((insn & 0xffa00f10) == 0xfe000810) {
+ /* VFM[AS]L -- 1111 1110 0.0S .... .... 1000 .Q.1 .... */
+ int is_s = extract32(insn, 20, 1);
+ int vm20 = extract32(insn, 0, 3);
+ int vm3 = extract32(insn, 3, 1);
+ int m = extract32(insn, 5, 1);
+ int index;
+
+ if (!dc_isar_feature(aa32_fhm, s)) {
+ return 1;
+ }
+ if (q) {
+ rm = vm20;
+ index = m * 2 + vm3;
+ } else {
+ rm = vm20 * 2 + m;
+ index = vm3;
+ }
+ is_long = true;
+ data = (index << 2) | is_s; /* is_2 == 0 */
+ fn_gvec_ptr = gen_helper_gvec_fmlal_idx_a32;
+ ptr_is_env = true;
} else {
return 1;
}
+ VFP_DREG_D(rd, insn);
+ if (rd & q) {
+ return 1;
+ }
+ if (q || !is_long) {
+ VFP_DREG_N(rn, insn);
+ if (rn & q & !is_long) {
+ return 1;
+ }
+ off_rn = vfp_reg_offset(1, rn);
+ off_rm = vfp_reg_offset(1, rm);
+ } else {
+ rn = VFP_SREG_N(insn);
+ off_rn = vfp_reg_offset(0, rn);
+ off_rm = vfp_reg_offset(0, rm);
+ }
if (s->fp_excp_el) {
gen_exception_insn(s, 4, EXCP_UDEF,
syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
@@ -8508,16 +8579,19 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
opr_sz = (1 + q) * 8;
if (fn_gvec_ptr) {
- TCGv_ptr fpst = get_fpstatus_ptr(1);
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
- vfp_reg_offset(1, rn),
- vfp_reg_offset(1, rm), fpst,
+ TCGv_ptr ptr;
+ if (ptr_is_env) {
+ ptr = cpu_env;
+ } else {
+ ptr = get_fpstatus_ptr(1);
+ }
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
opr_sz, opr_sz, data, fn_gvec_ptr);
- tcg_temp_free_ptr(fpst);
+ if (!ptr_is_env) {
+ tcg_temp_free_ptr(ptr);
+ }
} else {
- tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd),
- vfp_reg_offset(1, rn),
- vfp_reg_offset(1, rm),
+ tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm,
opr_sz, opr_sz, data, fn_gvec);
}
return 0;
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index dfc635cf9a..dedef62403 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -898,3 +898,151 @@ void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn,
}
clear_tail(d, oprsz, simd_maxsz(desc));
}
+
+/*
+ * Convert float16 to float32, raising no exceptions and
+ * preserving exceptional values, including SNaN.
+ * This is effectively an unpack+repack operation.
+ */
+static float32 float16_to_float32_by_bits(uint32_t f16, bool fz16)
+{
+ const int f16_bias = 15;
+ const int f32_bias = 127;
+ uint32_t sign = extract32(f16, 15, 1);
+ uint32_t exp = extract32(f16, 10, 5);
+ uint32_t frac = extract32(f16, 0, 10);
+
+ if (exp == 0x1f) {
+ /* Inf or NaN */
+ exp = 0xff;
+ } else if (exp == 0) {
+ /* Zero or denormal. */
+ if (frac != 0) {
+ if (fz16) {
+ frac = 0;
+ } else {
+ /*
+ * Denormal; these are all normal float32.
+ * Shift the fraction so that the msb is at bit 11,
+ * then remove bit 11 as the implicit bit of the
+ * normalized float32. Note that we still go through
+ * the shift for normal numbers below, to put the
+ * float32 fraction at the right place.
+ */
+ int shift = clz32(frac) - 21;
+ frac = (frac << shift) & 0x3ff;
+ exp = f32_bias - f16_bias - shift + 1;
+ }
+ }
+ } else {
+ /* Normal number; adjust the bias. */
+ exp += f32_bias - f16_bias;
+ }
+ sign <<= 31;
+ exp <<= 23;
+ frac <<= 23 - 10;
+
+ return sign | exp | frac;
+}
+
+static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2)
+{
+ /*
+ * Branchless load of u32[0], u64[0], u32[1], or u64[1].
+ * Load the 2nd qword iff is_q & is_2.
+ * Shift to the 2nd dword iff !is_q & is_2.
+ * For !is_q & !is_2, the upper bits of the result are garbage.
+ */
+ return ptr[is_q & is_2] >> ((is_2 & ~is_q) << 5);
+}
+
+/*
+ * Note that FMLAL requires oprsz == 8 or oprsz == 16,
+ * as there is not yet SVE versions that might use blocking.
+ */
+
+static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst,
+ uint32_t desc, bool fz16)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+ int is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
+ int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ int is_q = oprsz == 16;
+ uint64_t n_4, m_4;
+
+ /* Pre-load all of the f16 data, avoiding overlap issues. */
+ n_4 = load4_f16(vn, is_q, is_2);
+ m_4 = load4_f16(vm, is_q, is_2);
+
+ /* Negate all inputs for FMLSL at once. */
+ if (is_s) {
+ n_4 ^= 0x8000800080008000ull;
+ }
+
+ for (i = 0; i < oprsz / 4; i++) {
+ float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16);
+ float32 m_1 = float16_to_float32_by_bits(m_4 >> (i * 16), fz16);
+ d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst);
+ }
+ clear_tail(d, oprsz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
+ void *venv, uint32_t desc)
+{
+ CPUARMState *env = venv;
+ do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, desc,
+ get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
+}
+
+void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
+ void *venv, uint32_t desc)
+{
+ CPUARMState *env = venv;
+ do_fmlal(vd, vn, vm, &env->vfp.fp_status, desc,
+ get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
+}
+
+static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst,
+ uint32_t desc, bool fz16)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+ int is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
+ int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3);
+ int is_q = oprsz == 16;
+ uint64_t n_4;
+ float32 m_1;
+
+ /* Pre-load all of the f16 data, avoiding overlap issues. */
+ n_4 = load4_f16(vn, is_q, is_2);
+
+ /* Negate all inputs for FMLSL at once. */
+ if (is_s) {
+ n_4 ^= 0x8000800080008000ull;
+ }
+
+ m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16);
+
+ for (i = 0; i < oprsz / 4; i++) {
+ float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16);
+ d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst);
+ }
+ clear_tail(d, oprsz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
+ void *venv, uint32_t desc)
+{
+ CPUARMState *env = venv;
+ do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, desc,
+ get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
+}
+
+void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
+ void *venv, uint32_t desc)
+{
+ CPUARMState *env = venv;
+ do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status, desc,
+ get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
+}