summaryrefslogtreecommitdiffstats
path: root/target/arm/vec_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'target/arm/vec_internal.h')
-rw-r--r--target/arm/vec_internal.h28
1 files changed, 27 insertions, 1 deletions
diff --git a/target/arm/vec_internal.h b/target/arm/vec_internal.h
index 1d63402042..1f4ed80ff7 100644
--- a/target/arm/vec_internal.h
+++ b/target/arm/vec_internal.h
@@ -50,8 +50,21 @@
#define H8(x) (x)
#define H1_8(x) (x)
-/* Data for expanding active predicate bits to bytes, for byte elements. */
+/*
+ * Expand active predicate bits to bytes, for byte elements.
+ */
extern const uint64_t expand_pred_b_data[256];
+static inline uint64_t expand_pred_b(uint8_t byte)
+{
+ return expand_pred_b_data[byte];
+}
+
+/* Similarly for half-word elements. */
+extern const uint64_t expand_pred_h_data[0x55 + 1];
+static inline uint64_t expand_pred_h(uint8_t byte)
+{
+ return expand_pred_h_data[byte & 0x55];
+}
static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
{
@@ -217,4 +230,17 @@ uint64_t pmull_h(uint64_t op1, uint64_t op2);
*/
uint64_t pmull_w(uint64_t op1, uint64_t op2);
+/**
+ * bfdotadd:
+ * @sum: addend
+ * @e1, @e2: multiplicand vectors
+ *
+ * BFloat16 2-way dot product of @e1 & @e2, accumulating with @sum.
+ * The @e1 and @e2 operands correspond to the 32-bit source vector
+ * slots and contain two Bfloat16 values each.
+ *
+ * Corresponds to the ARM pseudocode function BFDotAdd.
+ */
+float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2);
+
#endif /* TARGET_ARM_VEC_INTERNAL_H */