summaryrefslogtreecommitdiffstats
path: root/target/arm/vec_helper.c
diff options
context:
space:
mode:
authorRichard Henderson2021-05-25 03:02:28 +0200
committerPeter Maydell2021-05-25 17:01:43 +0200
commit5dad1ba52f39f1c0bbf76c20b0b9f30636b51c88 (patch)
tree810d7500bac8ae1a68cba3ef5e11debf05febcd5 /target/arm/vec_helper.c
parenttarget/arm: Add ID_AA64ZFR0 fields and isar_feature_aa64_sve2 (diff)
downloadqemu-5dad1ba52f39f1c0bbf76c20b0b9f30636b51c88.tar.gz
qemu-5dad1ba52f39f1c0bbf76c20b0b9f30636b51c88.tar.xz
qemu-5dad1ba52f39f1c0bbf76c20b0b9f30636b51c88.zip
target/arm: Implement SVE2 Integer Multiply - Unpredicated
For MUL, we can rely on generic support. For SMULH and UMULH, create some trivial helpers. For PMUL, back in a21bb78e5817, we organized helper_gvec_pmul_b in preparation for this use. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20210525010358.152808-3-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target/arm/vec_helper.c')
-rw-r--r--target/arm/vec_helper.c96
1 files changed, 96 insertions, 0 deletions
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 3fbeae87cb..40b92100bf 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -1985,3 +1985,99 @@ void HELPER(simd_tblx)(void *vd, void *vm, void *venv, uint32_t desc)
clear_tail(vd, oprsz, simd_maxsz(desc));
}
#endif
+
+/*
+ * NxN -> N highpart multiply
+ *
+ * TODO: expose this as a generic vector operation.
+ */
+
+void HELPER(gvec_smulh_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int8_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = ((int32_t)n[i] * m[i]) >> 8;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_smulh_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int16_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ d[i] = ((int32_t)n[i] * m[i]) >> 16;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_smulh_s)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int32_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = ((int64_t)n[i] * m[i]) >> 32;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_smulh_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint64_t discard;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ muls64(&discard, &d[i], n[i], m[i]);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_umulh_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint8_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = ((uint32_t)n[i] * m[i]) >> 8;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_umulh_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint16_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ d[i] = ((uint32_t)n[i] * m[i]) >> 16;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_umulh_s)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint32_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = ((uint64_t)n[i] * m[i]) >> 32;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_umulh_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint64_t discard;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ mulu64(&discard, &d[i], n[i], m[i]);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}