summaryrefslogtreecommitdiffstats
path: root/target/arm/neon_helper.c
diff options
context:
space:
mode:
Diffstat (limited to 'target/arm/neon_helper.c')
-rw-r--r--target/arm/neon_helper.c519
1 files changed, 89 insertions, 430 deletions
diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c
index b637265691..338b9189d5 100644
--- a/target/arm/neon_helper.c
+++ b/target/arm/neon_helper.c
@@ -11,6 +11,7 @@
#include "cpu.h"
#include "exec/helper-proto.h"
#include "fpu/softfloat.h"
+#include "vec_internal.h"
#define SIGNBIT (uint32_t)0x80000000
#define SIGNBIT64 ((uint64_t)1 << 63)
@@ -576,496 +577,154 @@ NEON_POP(pmax_s16, neon_s16, 2)
NEON_POP(pmax_u16, neon_u16, 2)
#undef NEON_FN
-#define NEON_FN(dest, src1, src2) do { \
- int8_t tmp; \
- tmp = (int8_t)src2; \
- if (tmp >= (ssize_t)sizeof(src1) * 8 || \
- tmp <= -(ssize_t)sizeof(src1) * 8) { \
- dest = 0; \
- } else if (tmp < 0) { \
- dest = src1 >> -tmp; \
- } else { \
- dest = src1 << tmp; \
- }} while (0)
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, false, NULL))
NEON_VOP(shl_u16, neon_u16, 2)
#undef NEON_FN
-#define NEON_FN(dest, src1, src2) do { \
- int8_t tmp; \
- tmp = (int8_t)src2; \
- if (tmp >= (ssize_t)sizeof(src1) * 8) { \
- dest = 0; \
- } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
- dest = src1 >> (sizeof(src1) * 8 - 1); \
- } else if (tmp < 0) { \
- dest = src1 >> -tmp; \
- } else { \
- dest = src1 << tmp; \
- }} while (0)
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, false, NULL))
NEON_VOP(shl_s16, neon_s16, 2)
#undef NEON_FN
-#define NEON_FN(dest, src1, src2) do { \
- int8_t tmp; \
- tmp = (int8_t)src2; \
- if ((tmp >= (ssize_t)sizeof(src1) * 8) \
- || (tmp <= -(ssize_t)sizeof(src1) * 8)) { \
- dest = 0; \
- } else if (tmp < 0) { \
- dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
- } else { \
- dest = src1 << tmp; \
- }} while (0)
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, true, NULL))
NEON_VOP(rshl_s8, neon_s8, 4)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, true, NULL))
NEON_VOP(rshl_s16, neon_s16, 2)
#undef NEON_FN
-/* The addition of the rounding constant may overflow, so we use an
- * intermediate 64 bit accumulator. */
-uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop)
+uint32_t HELPER(neon_rshl_s32)(uint32_t val, uint32_t shift)
{
- int32_t dest;
- int32_t val = (int32_t)valop;
- int8_t shift = (int8_t)shiftop;
- if ((shift >= 32) || (shift <= -32)) {
- dest = 0;
- } else if (shift < 0) {
- int64_t big_dest = ((int64_t)val + (1 << (-1 - shift)));
- dest = big_dest >> -shift;
- } else {
- dest = val << shift;
- }
- return dest;
+ return do_sqrshl_bhs(val, (int8_t)shift, 32, true, NULL);
}
-/* Handling addition overflow with 64 bit input values is more
- * tricky than with 32 bit values. */
-uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
-{
- int8_t shift = (int8_t)shiftop;
- int64_t val = valop;
- if ((shift >= 64) || (shift <= -64)) {
- val = 0;
- } else if (shift < 0) {
- val >>= (-shift - 1);
- if (val == INT64_MAX) {
- /* In this case, it means that the rounding constant is 1,
- * and the addition would overflow. Return the actual
- * result directly. */
- val = 0x4000000000000000LL;
- } else {
- val++;
- val >>= 1;
- }
- } else {
- val <<= shift;
- }
- return val;
+uint64_t HELPER(neon_rshl_s64)(uint64_t val, uint64_t shift)
+{
+ return do_sqrshl_d(val, (int8_t)shift, true, NULL);
}
-#define NEON_FN(dest, src1, src2) do { \
- int8_t tmp; \
- tmp = (int8_t)src2; \
- if (tmp >= (ssize_t)sizeof(src1) * 8 || \
- tmp < -(ssize_t)sizeof(src1) * 8) { \
- dest = 0; \
- } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
- dest = src1 >> (-tmp - 1); \
- } else if (tmp < 0) { \
- dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
- } else { \
- dest = src1 << tmp; \
- }} while (0)
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, true, NULL))
NEON_VOP(rshl_u8, neon_u8, 4)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, true, NULL))
NEON_VOP(rshl_u16, neon_u16, 2)
#undef NEON_FN
-/* The addition of the rounding constant may overflow, so we use an
- * intermediate 64 bit accumulator. */
-uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop)
+uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shift)
{
- uint32_t dest;
- int8_t shift = (int8_t)shiftop;
- if (shift >= 32 || shift < -32) {
- dest = 0;
- } else if (shift == -32) {
- dest = val >> 31;
- } else if (shift < 0) {
- uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift)));
- dest = big_dest >> -shift;
- } else {
- dest = val << shift;
- }
- return dest;
+ return do_uqrshl_bhs(val, (int8_t)shift, 32, true, NULL);
}
-/* Handling addition overflow with 64 bit input values is more
- * tricky than with 32 bit values. */
-uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
-{
- int8_t shift = (uint8_t)shiftop;
- if (shift >= 64 || shift < -64) {
- val = 0;
- } else if (shift == -64) {
- /* Rounding a 1-bit result just preserves that bit. */
- val >>= 63;
- } else if (shift < 0) {
- val >>= (-shift - 1);
- if (val == UINT64_MAX) {
- /* In this case, it means that the rounding constant is 1,
- * and the addition would overflow. Return the actual
- * result directly. */
- val = 0x8000000000000000ULL;
- } else {
- val++;
- val >>= 1;
- }
- } else {
- val <<= shift;
- }
- return val;
+uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shift)
+{
+ return do_uqrshl_d(val, (int8_t)shift, true, NULL);
}
-#define NEON_FN(dest, src1, src2) do { \
- int8_t tmp; \
- tmp = (int8_t)src2; \
- if (tmp >= (ssize_t)sizeof(src1) * 8) { \
- if (src1) { \
- SET_QC(); \
- dest = ~0; \
- } else { \
- dest = 0; \
- } \
- } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
- dest = 0; \
- } else if (tmp < 0) { \
- dest = src1 >> -tmp; \
- } else { \
- dest = src1 << tmp; \
- if ((dest >> tmp) != src1) { \
- SET_QC(); \
- dest = ~0; \
- } \
- }} while (0)
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc))
NEON_VOP_ENV(qshl_u8, neon_u8, 4)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc))
NEON_VOP_ENV(qshl_u16, neon_u16, 2)
-NEON_VOP_ENV(qshl_u32, neon_u32, 1)
#undef NEON_FN
-uint64_t HELPER(neon_qshl_u64)(CPUARMState *env, uint64_t val, uint64_t shiftop)
-{
- int8_t shift = (int8_t)shiftop;
- if (shift >= 64) {
- if (val) {
- val = ~(uint64_t)0;
- SET_QC();
- }
- } else if (shift <= -64) {
- val = 0;
- } else if (shift < 0) {
- val >>= -shift;
- } else {
- uint64_t tmp = val;
- val <<= shift;
- if ((val >> shift) != tmp) {
- SET_QC();
- val = ~(uint64_t)0;
- }
- }
- return val;
+uint32_t HELPER(neon_qshl_u32)(CPUARMState *env, uint32_t val, uint32_t shift)
+{
+ return do_uqrshl_bhs(val, (int8_t)shift, 32, false, env->vfp.qc);
}
-#define NEON_FN(dest, src1, src2) do { \
- int8_t tmp; \
- tmp = (int8_t)src2; \
- if (tmp >= (ssize_t)sizeof(src1) * 8) { \
- if (src1) { \
- SET_QC(); \
- dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \
- if (src1 > 0) { \
- dest--; \
- } \
- } else { \
- dest = src1; \
- } \
- } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
- dest = src1 >> 31; \
- } else if (tmp < 0) { \
- dest = src1 >> -tmp; \
- } else { \
- dest = src1 << tmp; \
- if ((dest >> tmp) != src1) { \
- SET_QC(); \
- dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \
- if (src1 > 0) { \
- dest--; \
- } \
- } \
- }} while (0)
+uint64_t HELPER(neon_qshl_u64)(CPUARMState *env, uint64_t val, uint64_t shift)
+{
+ return do_uqrshl_d(val, (int8_t)shift, false, env->vfp.qc);
+}
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc))
NEON_VOP_ENV(qshl_s8, neon_s8, 4)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc))
NEON_VOP_ENV(qshl_s16, neon_s16, 2)
-NEON_VOP_ENV(qshl_s32, neon_s32, 1)
#undef NEON_FN
-uint64_t HELPER(neon_qshl_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop)
-{
- int8_t shift = (uint8_t)shiftop;
- int64_t val = valop;
- if (shift >= 64) {
- if (val) {
- SET_QC();
- val = (val >> 63) ^ ~SIGNBIT64;
- }
- } else if (shift <= -64) {
- val >>= 63;
- } else if (shift < 0) {
- val >>= -shift;
- } else {
- int64_t tmp = val;
- val <<= shift;
- if ((val >> shift) != tmp) {
- SET_QC();
- val = (tmp >> 63) ^ ~SIGNBIT64;
- }
- }
- return val;
+uint32_t HELPER(neon_qshl_s32)(CPUARMState *env, uint32_t val, uint32_t shift)
+{
+ return do_sqrshl_bhs(val, (int8_t)shift, 32, false, env->vfp.qc);
}
-#define NEON_FN(dest, src1, src2) do { \
- if (src1 & (1 << (sizeof(src1) * 8 - 1))) { \
- SET_QC(); \
- dest = 0; \
- } else { \
- int8_t tmp; \
- tmp = (int8_t)src2; \
- if (tmp >= (ssize_t)sizeof(src1) * 8) { \
- if (src1) { \
- SET_QC(); \
- dest = ~0; \
- } else { \
- dest = 0; \
- } \
- } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
- dest = 0; \
- } else if (tmp < 0) { \
- dest = src1 >> -tmp; \
- } else { \
- dest = src1 << tmp; \
- if ((dest >> tmp) != src1) { \
- SET_QC(); \
- dest = ~0; \
- } \
- } \
- }} while (0)
-NEON_VOP_ENV(qshlu_s8, neon_u8, 4)
-NEON_VOP_ENV(qshlu_s16, neon_u16, 2)
+uint64_t HELPER(neon_qshl_s64)(CPUARMState *env, uint64_t val, uint64_t shift)
+{
+ return do_sqrshl_d(val, (int8_t)shift, false, env->vfp.qc);
+}
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_suqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc))
+NEON_VOP_ENV(qshlu_s8, neon_s8, 4)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_suqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc))
+NEON_VOP_ENV(qshlu_s16, neon_s16, 2)
#undef NEON_FN
-uint32_t HELPER(neon_qshlu_s32)(CPUARMState *env, uint32_t valop, uint32_t shiftop)
+uint32_t HELPER(neon_qshlu_s32)(CPUARMState *env, uint32_t val, uint32_t shift)
{
- if ((int32_t)valop < 0) {
- SET_QC();
- return 0;
- }
- return helper_neon_qshl_u32(env, valop, shiftop);
+ return do_suqrshl_bhs(val, (int8_t)shift, 32, false, env->vfp.qc);
}
-uint64_t HELPER(neon_qshlu_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop)
+uint64_t HELPER(neon_qshlu_s64)(CPUARMState *env, uint64_t val, uint64_t shift)
{
- if ((int64_t)valop < 0) {
- SET_QC();
- return 0;
- }
- return helper_neon_qshl_u64(env, valop, shiftop);
+ return do_suqrshl_d(val, (int8_t)shift, false, env->vfp.qc);
}
-#define NEON_FN(dest, src1, src2) do { \
- int8_t tmp; \
- tmp = (int8_t)src2; \
- if (tmp >= (ssize_t)sizeof(src1) * 8) { \
- if (src1) { \
- SET_QC(); \
- dest = ~0; \
- } else { \
- dest = 0; \
- } \
- } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \
- dest = 0; \
- } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
- dest = src1 >> (sizeof(src1) * 8 - 1); \
- } else if (tmp < 0) { \
- dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
- } else { \
- dest = src1 << tmp; \
- if ((dest >> tmp) != src1) { \
- SET_QC(); \
- dest = ~0; \
- } \
- }} while (0)
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, true, env->vfp.qc))
NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, true, env->vfp.qc))
NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
#undef NEON_FN
-/* The addition of the rounding constant may overflow, so we use an
- * intermediate 64 bit accumulator. */
-uint32_t HELPER(neon_qrshl_u32)(CPUARMState *env, uint32_t val, uint32_t shiftop)
+uint32_t HELPER(neon_qrshl_u32)(CPUARMState *env, uint32_t val, uint32_t shift)
{
- uint32_t dest;
- int8_t shift = (int8_t)shiftop;
- if (shift >= 32) {
- if (val) {
- SET_QC();
- dest = ~0;
- } else {
- dest = 0;
- }
- } else if (shift < -32) {
- dest = 0;
- } else if (shift == -32) {
- dest = val >> 31;
- } else if (shift < 0) {
- uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift)));
- dest = big_dest >> -shift;
- } else {
- dest = val << shift;
- if ((dest >> shift) != val) {
- SET_QC();
- dest = ~0;
- }
- }
- return dest;
+ return do_uqrshl_bhs(val, (int8_t)shift, 32, true, env->vfp.qc);
}
-/* Handling addition overflow with 64 bit input values is more
- * tricky than with 32 bit values. */
-uint64_t HELPER(neon_qrshl_u64)(CPUARMState *env, uint64_t val, uint64_t shiftop)
-{
- int8_t shift = (int8_t)shiftop;
- if (shift >= 64) {
- if (val) {
- SET_QC();
- val = ~0;
- }
- } else if (shift < -64) {
- val = 0;
- } else if (shift == -64) {
- val >>= 63;
- } else if (shift < 0) {
- val >>= (-shift - 1);
- if (val == UINT64_MAX) {
- /* In this case, it means that the rounding constant is 1,
- * and the addition would overflow. Return the actual
- * result directly. */
- val = 0x8000000000000000ULL;
- } else {
- val++;
- val >>= 1;
- }
- } else { \
- uint64_t tmp = val;
- val <<= shift;
- if ((val >> shift) != tmp) {
- SET_QC();
- val = ~0;
- }
- }
- return val;
+uint64_t HELPER(neon_qrshl_u64)(CPUARMState *env, uint64_t val, uint64_t shift)
+{
+ return do_uqrshl_d(val, (int8_t)shift, true, env->vfp.qc);
}
-#define NEON_FN(dest, src1, src2) do { \
- int8_t tmp; \
- tmp = (int8_t)src2; \
- if (tmp >= (ssize_t)sizeof(src1) * 8) { \
- if (src1) { \
- SET_QC(); \
- dest = (typeof(dest))(1 << (sizeof(src1) * 8 - 1)); \
- if (src1 > 0) { \
- dest--; \
- } \
- } else { \
- dest = 0; \
- } \
- } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
- dest = 0; \
- } else if (tmp < 0) { \
- dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
- } else { \
- dest = src1 << tmp; \
- if ((dest >> tmp) != src1) { \
- SET_QC(); \
- dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \
- if (src1 > 0) { \
- dest--; \
- } \
- } \
- }} while (0)
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, true, env->vfp.qc))
NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, true, env->vfp.qc))
NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
#undef NEON_FN
-/* The addition of the rounding constant may overflow, so we use an
- * intermediate 64 bit accumulator. */
-uint32_t HELPER(neon_qrshl_s32)(CPUARMState *env, uint32_t valop, uint32_t shiftop)
+uint32_t HELPER(neon_qrshl_s32)(CPUARMState *env, uint32_t val, uint32_t shift)
{
- int32_t dest;
- int32_t val = (int32_t)valop;
- int8_t shift = (int8_t)shiftop;
- if (shift >= 32) {
- if (val) {
- SET_QC();
- dest = (val >> 31) ^ ~SIGNBIT;
- } else {
- dest = 0;
- }
- } else if (shift <= -32) {
- dest = 0;
- } else if (shift < 0) {
- int64_t big_dest = ((int64_t)val + (1 << (-1 - shift)));
- dest = big_dest >> -shift;
- } else {
- dest = val << shift;
- if ((dest >> shift) != val) {
- SET_QC();
- dest = (val >> 31) ^ ~SIGNBIT;
- }
- }
- return dest;
+ return do_sqrshl_bhs(val, (int8_t)shift, 32, true, env->vfp.qc);
}
-/* Handling addition overflow with 64 bit input values is more
- * tricky than with 32 bit values. */
-uint64_t HELPER(neon_qrshl_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop)
-{
- int8_t shift = (uint8_t)shiftop;
- int64_t val = valop;
-
- if (shift >= 64) {
- if (val) {
- SET_QC();
- val = (val >> 63) ^ ~SIGNBIT64;
- }
- } else if (shift <= -64) {
- val = 0;
- } else if (shift < 0) {
- val >>= (-shift - 1);
- if (val == INT64_MAX) {
- /* In this case, it means that the rounding constant is 1,
- * and the addition would overflow. Return the actual
- * result directly. */
- val = 0x4000000000000000ULL;
- } else {
- val++;
- val >>= 1;
- }
- } else {
- int64_t tmp = val;
- val <<= shift;
- if ((val >> shift) != tmp) {
- SET_QC();
- val = (tmp >> 63) ^ ~SIGNBIT64;
- }
- }
- return val;
+uint64_t HELPER(neon_qrshl_s64)(CPUARMState *env, uint64_t val, uint64_t shift)
+{
+ return do_sqrshl_d(val, (int8_t)shift, true, env->vfp.qc);
}
uint32_t HELPER(neon_add_u8)(uint32_t a, uint32_t b)