diff options
Diffstat (limited to 'target/arm/vec_internal.h')
-rw-r--r-- | target/arm/vec_internal.h | 28 |
1 files changed, 27 insertions, 1 deletions
diff --git a/target/arm/vec_internal.h b/target/arm/vec_internal.h index 1d63402042..1f4ed80ff7 100644 --- a/target/arm/vec_internal.h +++ b/target/arm/vec_internal.h @@ -50,8 +50,21 @@ #define H8(x) (x) #define H1_8(x) (x) -/* Data for expanding active predicate bits to bytes, for byte elements. */ +/* + * Expand active predicate bits to bytes, for byte elements. + */ extern const uint64_t expand_pred_b_data[256]; +static inline uint64_t expand_pred_b(uint8_t byte) +{ + return expand_pred_b_data[byte]; +} + +/* Similarly for half-word elements. */ +extern const uint64_t expand_pred_h_data[0x55 + 1]; +static inline uint64_t expand_pred_h(uint8_t byte) +{ + return expand_pred_h_data[byte & 0x55]; +} static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) { @@ -217,4 +230,17 @@ uint64_t pmull_h(uint64_t op1, uint64_t op2); */ uint64_t pmull_w(uint64_t op1, uint64_t op2); +/** + * bfdotadd: + * @sum: addend + * @e1, @e2: multiplicand vectors + * + * BFloat16 2-way dot product of @e1 & @e2, accumulating with @sum. + * The @e1 and @e2 operands correspond to the 32-bit source vector + * slots and contain two Bfloat16 values each. + * + * Corresponds to the ARM pseudocode function BFDotAdd. + */ +float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2); + #endif /* TARGET_ARM_VEC_INTERNAL_H */ |