summaryrefslogtreecommitdiffstats
path: root/target/arm/sve_helper.c
diff options
context:
space:
mode:
authorPeter Maydell2021-03-14 14:18:49 +0100
committerPeter Maydell2021-03-14 14:18:49 +0100
commit6f8a81fc296535f73c48cf9563862e088cc71c57 (patch)
tree1132db2423ed7ce1fcbec9ce6c32dcf07ead6dc7 /target/arm/sve_helper.c
parentMerge remote-tracking branch 'remotes/vivier/tags/m68k-for-6.0-pull-request' ... (diff)
parenthw/display/pxa2xx: Inline template header (diff)
downloadqemu-6f8a81fc296535f73c48cf9563862e088cc71c57.tar.gz
qemu-6f8a81fc296535f73c48cf9563862e088cc71c57.tar.xz
qemu-6f8a81fc296535f73c48cf9563862e088cc71c57.zip
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20210314' into staging
target-arm queue: * versal: Support XRAMs and XRAM controller * smmu: Various minor bug fixes * SVE emulation: fix bugs handling odd vector lengths * allwinner-sun8i-emac: traverse transmit queue using TX_CUR_DESC register value * tests/acceptance: fix orangepi-pc acceptance tests * hw/timer/sse-timer: Propagate eventual error in sse_timer_realize() * hw/arm/virt: KVM: The IPA lower bound is 32 * npcm7xx: support MFT module * pl110, pxa2xx_lcd: tidy up template headers # gpg: Signature made Sun 14 Mar 2021 13:17:43 GMT # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20210314: (39 commits) hw/display/pxa2xx: Inline template header hw/display/pxa2xx: Apply whitespace-only coding style fixes to template header hw/display/pxa2xx: Apply brace-related coding style fixes to template header hw/display/pxa2xx: Remove use of BITS in pxa2xx_template.h hw/display/pxa2xx_lcd: Remove dest_width state field hw/display/pxa2xx_lcd: Remove dead code for non-32-bpp surfaces hw/display/pl110: Remove use of BITS from pl110_template.h hw/display/pl110: Pull included-once parts of template header into pl110.c hw/display/pl110: Remove dead code for non-32-bpp surfaces tests/qtest: Test PWM fan RPM using MFT in PWM test hw/arm: Connect PWM fans in NPCM7XX boards hw/arm: Add MFT device to NPCM7xx Soc hw/misc: Add NPCM7XX MFT Module hw/misc: Add GPIOs for duty in NPCM7xx PWM hw/arm/virt: KVM: The IPA lower bound is 32 accel: kvm: Fix kvm_type invocation hw/timer/sse-timer: Propagate eventual error in sse_timer_realize() tests/acceptance: drop ARMBIAN_ARTIFACTS_CACHED condition for orangepi-pc, cubieboard tests tests/acceptance: update sunxi kernel from armbian to 5.10.16 tests/acceptance/boot_linux_console: change URL for test_arm_orangepi_bionic_20_08 ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target/arm/sve_helper.c')
-rw-r--r--target/arm/sve_helper.c107
1 files changed, 60 insertions, 47 deletions
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index 844db08bd5..fd6c58f96a 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -1871,6 +1871,7 @@ void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
int esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
intptr_t high = FIELD_EX32(pred_desc, PREDDESC, DATA);
+ int esize = 1 << esz;
uint64_t *d = vd;
intptr_t i;
@@ -1883,33 +1884,35 @@ void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
mm = extract64(mm, high * half, half);
nn = expand_bits(nn, esz);
mm = expand_bits(mm, esz);
- d[0] = nn + (mm << (1 << esz));
+ d[0] = nn | (mm << esize);
} else {
- ARMPredicateReg tmp_n, tmp_m;
+ ARMPredicateReg tmp;
/* We produce output faster than we consume input.
Therefore we must be mindful of possible overlap. */
- if ((vn - vd) < (uintptr_t)oprsz) {
- vn = memcpy(&tmp_n, vn, oprsz);
- }
- if ((vm - vd) < (uintptr_t)oprsz) {
- vm = memcpy(&tmp_m, vm, oprsz);
+ if (vd == vn) {
+ vn = memcpy(&tmp, vn, oprsz);
+ if (vd == vm) {
+ vm = vn;
+ }
+ } else if (vd == vm) {
+ vm = memcpy(&tmp, vm, oprsz);
}
if (high) {
high = oprsz >> 1;
}
- if ((high & 3) == 0) {
+ if ((oprsz & 7) == 0) {
uint32_t *n = vn, *m = vm;
high >>= 2;
- for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) {
+ for (i = 0; i < oprsz / 8; i++) {
uint64_t nn = n[H4(high + i)];
uint64_t mm = m[H4(high + i)];
nn = expand_bits(nn, esz);
mm = expand_bits(mm, esz);
- d[i] = nn + (mm << (1 << esz));
+ d[i] = nn | (mm << esize);
}
} else {
uint8_t *n = vn, *m = vm;
@@ -1921,7 +1924,7 @@ void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
nn = expand_bits(nn, esz);
mm = expand_bits(mm, esz);
- d16[H2(i)] = nn + (mm << (1 << esz));
+ d16[H2(i)] = nn | (mm << esize);
}
}
}
@@ -1939,7 +1942,7 @@ void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
if (oprsz <= 8) {
l = compress_bits(n[0] >> odd, esz);
h = compress_bits(m[0] >> odd, esz);
- d[0] = extract64(l + (h << (4 * oprsz)), 0, 8 * oprsz);
+ d[0] = l | (h << (4 * oprsz));
} else {
ARMPredicateReg tmp_m;
intptr_t oprsz_16 = oprsz / 16;
@@ -1953,23 +1956,35 @@ void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
h = n[2 * i + 1];
l = compress_bits(l >> odd, esz);
h = compress_bits(h >> odd, esz);
- d[i] = l + (h << 32);
+ d[i] = l | (h << 32);
}
- /* For VL which is not a power of 2, the results from M do not
- align nicely with the uint64_t for D. Put the aligned results
- from M into TMP_M and then copy it into place afterward. */
+ /*
+ * For VL which is not a multiple of 512, the results from M do not
+ * align nicely with the uint64_t for D. Put the aligned results
+ * from M into TMP_M and then copy it into place afterward.
+ */
if (oprsz & 15) {
- d[i] = compress_bits(n[2 * i] >> odd, esz);
+ int final_shift = (oprsz & 15) * 2;
+
+ l = n[2 * i + 0];
+ h = n[2 * i + 1];
+ l = compress_bits(l >> odd, esz);
+ h = compress_bits(h >> odd, esz);
+ d[i] = l | (h << final_shift);
for (i = 0; i < oprsz_16; i++) {
l = m[2 * i + 0];
h = m[2 * i + 1];
l = compress_bits(l >> odd, esz);
h = compress_bits(h >> odd, esz);
- tmp_m.p[i] = l + (h << 32);
+ tmp_m.p[i] = l | (h << 32);
}
- tmp_m.p[i] = compress_bits(m[2 * i] >> odd, esz);
+ l = m[2 * i + 0];
+ h = m[2 * i + 1];
+ l = compress_bits(l >> odd, esz);
+ h = compress_bits(h >> odd, esz);
+ tmp_m.p[i] = l | (h << final_shift);
swap_memmove(vd + oprsz / 2, &tmp_m, oprsz / 2);
} else {
@@ -1978,7 +1993,7 @@ void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
h = m[2 * i + 1];
l = compress_bits(l >> odd, esz);
h = compress_bits(h >> odd, esz);
- d[oprsz_16 + i] = l + (h << 32);
+ d[oprsz_16 + i] = l | (h << 32);
}
}
}
@@ -2090,11 +2105,11 @@ void HELPER(sve_punpk_p)(void *vd, void *vn, uint32_t pred_desc)
high = oprsz >> 1;
}
- if ((high & 3) == 0) {
+ if ((oprsz & 7) == 0) {
uint32_t *n = vn;
high >>= 2;
- for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) {
+ for (i = 0; i < oprsz / 8; i++) {
uint64_t nn = n[H4(high + i)];
d[i] = expand_bits(nn, 0);
}
@@ -2222,10 +2237,10 @@ void HELPER(sve_compact_d)(void *vd, void *vn, void *vg, uint32_t desc)
*/
int32_t HELPER(sve_last_active_element)(void *vg, uint32_t pred_desc)
{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+ intptr_t words = DIV_ROUND_UP(FIELD_EX32(pred_desc, PREDDESC, OPRSZ), 8);
+ intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
- return last_active_element(vg, DIV_ROUND_UP(oprsz, 8), esz);
+ return last_active_element(vg, words, esz);
}
void HELPER(sve_splice)(void *vd, void *vn, void *vm, void *vg, uint32_t desc)
@@ -2695,7 +2710,7 @@ static uint32_t do_zero(ARMPredicateReg *d, intptr_t oprsz)
void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg,
uint32_t pred_desc)
{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
if (last_active_pred(vn, vg, oprsz)) {
compute_brk_z(vd, vm, vg, oprsz, true);
} else {
@@ -2706,7 +2721,7 @@ void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg,
uint32_t HELPER(sve_brkpas)(void *vd, void *vn, void *vm, void *vg,
uint32_t pred_desc)
{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
if (last_active_pred(vn, vg, oprsz)) {
return compute_brks_z(vd, vm, vg, oprsz, true);
} else {
@@ -2717,7 +2732,7 @@ uint32_t HELPER(sve_brkpas)(void *vd, void *vn, void *vm, void *vg,
void HELPER(sve_brkpb)(void *vd, void *vn, void *vm, void *vg,
uint32_t pred_desc)
{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
if (last_active_pred(vn, vg, oprsz)) {
compute_brk_z(vd, vm, vg, oprsz, false);
} else {
@@ -2728,7 +2743,7 @@ void HELPER(sve_brkpb)(void *vd, void *vn, void *vm, void *vg,
uint32_t HELPER(sve_brkpbs)(void *vd, void *vn, void *vm, void *vg,
uint32_t pred_desc)
{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
if (last_active_pred(vn, vg, oprsz)) {
return compute_brks_z(vd, vm, vg, oprsz, false);
} else {
@@ -2738,56 +2753,55 @@ uint32_t HELPER(sve_brkpbs)(void *vd, void *vn, void *vm, void *vg,
void HELPER(sve_brka_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
compute_brk_z(vd, vn, vg, oprsz, true);
}
uint32_t HELPER(sve_brkas_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
return compute_brks_z(vd, vn, vg, oprsz, true);
}
void HELPER(sve_brkb_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
compute_brk_z(vd, vn, vg, oprsz, false);
}
uint32_t HELPER(sve_brkbs_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
return compute_brks_z(vd, vn, vg, oprsz, false);
}
void HELPER(sve_brka_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
compute_brk_m(vd, vn, vg, oprsz, true);
}
uint32_t HELPER(sve_brkas_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
return compute_brks_m(vd, vn, vg, oprsz, true);
}
void HELPER(sve_brkb_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
compute_brk_m(vd, vn, vg, oprsz, false);
}
uint32_t HELPER(sve_brkbs_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
return compute_brks_m(vd, vn, vg, oprsz, false);
}
void HELPER(sve_brkn)(void *vd, void *vn, void *vg, uint32_t pred_desc)
{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
-
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
if (!last_active_pred(vn, vg, oprsz)) {
do_zero(vd, oprsz);
}
@@ -2812,8 +2826,7 @@ static uint32_t predtest_ones(ARMPredicateReg *d, intptr_t oprsz,
uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc)
{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
-
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
if (last_active_pred(vn, vg, oprsz)) {
return predtest_ones(vd, oprsz, -1);
} else {
@@ -2823,12 +2836,12 @@ uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc)
uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc)
{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+ intptr_t words = DIV_ROUND_UP(FIELD_EX32(pred_desc, PREDDESC, OPRSZ), 8);
+ intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
uint64_t *n = vn, *g = vg, sum = 0, mask = pred_esz_masks[esz];
intptr_t i;
- for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
+ for (i = 0; i < words; ++i) {
uint64_t t = n[i] & g[i] & mask;
sum += ctpop64(t);
}
@@ -2837,8 +2850,8 @@ uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc)
uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc)
{
- uintptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
uint64_t esz_mask = pred_esz_masks[esz];
ARMPredicateReg *d = vd;
uint32_t flags;
@@ -2883,7 +2896,7 @@ static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \
} \
uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc) \
{ \
- uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_maxsz(desc); \
+ uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_data(desc); \
TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)]; \
for (i = 0; i < oprsz; ) { \
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \