summaryrefslogtreecommitdiffstats
path: root/lib/raid6
diff options
context:
space:
mode:
Diffstat (limited to 'lib/raid6')
-rw-r--r--lib/raid6/Makefile7
-rw-r--r--lib/raid6/algos.c81
-rw-r--r--lib/raid6/neon.uc5
-rw-r--r--lib/raid6/recov_neon_inner.c19
-rw-r--r--lib/raid6/test/Makefile3
5 files changed, 59 insertions, 56 deletions
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 7ed43eaa02ef..e723eacf7868 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -13,8 +13,7 @@ raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
hostprogs-y += mktables
quiet_cmd_unroll = UNROLL $@
- cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \
- < $< > $@ || ( rm -f $@ && exit 1 )
+ cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) < $< > $@
ifeq ($(CONFIG_ALTIVEC),y)
altivec_flags := -maltivec $(call cc-option,-mabi=altivec)
@@ -40,7 +39,7 @@ endif
ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
NEON_FLAGS := -ffreestanding
ifeq ($(ARCH),arm)
-NEON_FLAGS += -mfloat-abi=softfp -mfpu=neon
+NEON_FLAGS += -march=armv7-a -mfloat-abi=softfp -mfpu=neon
endif
CFLAGS_recov_neon_inner.o += $(NEON_FLAGS)
ifeq ($(ARCH),arm64)
@@ -160,7 +159,7 @@ $(obj)/s390vx8.c: $(src)/s390vx.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
quiet_cmd_mktable = TABLE $@
- cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
+ cmd_mktable = $(obj)/mktables > $@
targets += tables.c
$(obj)/tables.c: $(obj)/mktables FORCE
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 5065b1e7e327..7e4f7a8ffa8e 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -34,64 +34,64 @@ struct raid6_calls raid6_call;
EXPORT_SYMBOL_GPL(raid6_call);
const struct raid6_calls * const raid6_algos[] = {
-#if defined(__ia64__)
- &raid6_intx16,
- &raid6_intx32,
-#endif
#if defined(__i386__) && !defined(__arch_um__)
- &raid6_mmxx1,
- &raid6_mmxx2,
- &raid6_sse1x1,
- &raid6_sse1x2,
- &raid6_sse2x1,
- &raid6_sse2x2,
-#ifdef CONFIG_AS_AVX2
- &raid6_avx2x1,
- &raid6_avx2x2,
-#endif
#ifdef CONFIG_AS_AVX512
- &raid6_avx512x1,
&raid6_avx512x2,
+ &raid6_avx512x1,
#endif
-#endif
-#if defined(__x86_64__) && !defined(__arch_um__)
- &raid6_sse2x1,
- &raid6_sse2x2,
- &raid6_sse2x4,
#ifdef CONFIG_AS_AVX2
- &raid6_avx2x1,
&raid6_avx2x2,
- &raid6_avx2x4,
+ &raid6_avx2x1,
+#endif
+ &raid6_sse2x2,
+ &raid6_sse2x1,
+ &raid6_sse1x2,
+ &raid6_sse1x1,
+ &raid6_mmxx2,
+ &raid6_mmxx1,
#endif
+#if defined(__x86_64__) && !defined(__arch_um__)
#ifdef CONFIG_AS_AVX512
- &raid6_avx512x1,
- &raid6_avx512x2,
&raid6_avx512x4,
+ &raid6_avx512x2,
+ &raid6_avx512x1,
#endif
+#ifdef CONFIG_AS_AVX2
+ &raid6_avx2x4,
+ &raid6_avx2x2,
+ &raid6_avx2x1,
+#endif
+ &raid6_sse2x4,
+ &raid6_sse2x2,
+ &raid6_sse2x1,
#endif
#ifdef CONFIG_ALTIVEC
- &raid6_altivec1,
- &raid6_altivec2,
- &raid6_altivec4,
- &raid6_altivec8,
- &raid6_vpermxor1,
- &raid6_vpermxor2,
- &raid6_vpermxor4,
&raid6_vpermxor8,
+ &raid6_vpermxor4,
+ &raid6_vpermxor2,
+ &raid6_vpermxor1,
+ &raid6_altivec8,
+ &raid6_altivec4,
+ &raid6_altivec2,
+ &raid6_altivec1,
#endif
#if defined(CONFIG_S390)
&raid6_s390vx8,
#endif
- &raid6_intx1,
- &raid6_intx2,
- &raid6_intx4,
- &raid6_intx8,
#ifdef CONFIG_KERNEL_MODE_NEON
- &raid6_neonx1,
- &raid6_neonx2,
- &raid6_neonx4,
&raid6_neonx8,
+ &raid6_neonx4,
+ &raid6_neonx2,
+ &raid6_neonx1,
#endif
+#if defined(__ia64__)
+ &raid6_intx32,
+ &raid6_intx16,
+#endif
+ &raid6_intx8,
+ &raid6_intx4,
+ &raid6_intx2,
+ &raid6_intx1,
NULL
};
@@ -163,6 +163,11 @@ static inline const struct raid6_calls *raid6_choose_gen(
if ((*algo)->valid && !(*algo)->valid())
continue;
+ if (!IS_ENABLED(CONFIG_RAID6_PQ_BENCHMARK)) {
+ best = *algo;
+ break;
+ }
+
perf = 0;
preempt_disable();
diff --git a/lib/raid6/neon.uc b/lib/raid6/neon.uc
index d5242f544551..b7c68030da4f 100644
--- a/lib/raid6/neon.uc
+++ b/lib/raid6/neon.uc
@@ -28,7 +28,6 @@
typedef uint8x16_t unative_t;
-#define NBYTES(x) ((unative_t){x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})
#define NSIZE sizeof(unative_t)
/*
@@ -61,7 +60,7 @@ void raid6_neon$#_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
int d, z, z0;
register unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
- const unative_t x1d = NBYTES(0x1d);
+ const unative_t x1d = vdupq_n_u8(0x1d);
z0 = disks - 3; /* Highest data disk */
p = dptr[z0+1]; /* XOR parity */
@@ -92,7 +91,7 @@ void raid6_neon$#_xor_syndrome_real(int disks, int start, int stop,
int d, z, z0;
register unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
- const unative_t x1d = NBYTES(0x1d);
+ const unative_t x1d = vdupq_n_u8(0x1d);
z0 = stop; /* P/Q right side optimization */
p = dptr[disks-2]; /* XOR parity */
diff --git a/lib/raid6/recov_neon_inner.c b/lib/raid6/recov_neon_inner.c
index 8cd20c9f834a..f13c07f82297 100644
--- a/lib/raid6/recov_neon_inner.c
+++ b/lib/raid6/recov_neon_inner.c
@@ -10,11 +10,6 @@
#include <arm_neon.h>
-static const uint8x16_t x0f = {
- 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
- 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-};
-
#ifdef CONFIG_ARM
/*
* AArch32 does not provide this intrinsic natively because it does not
@@ -41,6 +36,7 @@ void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp,
uint8x16_t pm1 = vld1q_u8(pbmul + 16);
uint8x16_t qm0 = vld1q_u8(qmul);
uint8x16_t qm1 = vld1q_u8(qmul + 16);
+ uint8x16_t x0f = vdupq_n_u8(0x0f);
/*
* while ( bytes-- ) {
@@ -60,14 +56,14 @@ void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp,
px = veorq_u8(vld1q_u8(p), vld1q_u8(dp));
vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq));
- vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4);
+ vy = vshrq_n_u8(vx, 4);
vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f));
- vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f));
+ vy = vqtbl1q_u8(qm1, vy);
qx = veorq_u8(vx, vy);
- vy = (uint8x16_t)vshrq_n_s16((int16x8_t)px, 4);
+ vy = vshrq_n_u8(px, 4);
vx = vqtbl1q_u8(pm0, vandq_u8(px, x0f));
- vy = vqtbl1q_u8(pm1, vandq_u8(vy, x0f));
+ vy = vqtbl1q_u8(pm1, vy);
vx = veorq_u8(vx, vy);
db = veorq_u8(vx, qx);
@@ -87,6 +83,7 @@ void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq,
{
uint8x16_t qm0 = vld1q_u8(qmul);
uint8x16_t qm1 = vld1q_u8(qmul + 16);
+ uint8x16_t x0f = vdupq_n_u8(0x0f);
/*
* while (bytes--) {
@@ -100,9 +97,9 @@ void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq,
vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq));
- vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4);
+ vy = vshrq_n_u8(vx, 4);
vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f));
- vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f));
+ vy = vqtbl1q_u8(qm1, vy);
vx = veorq_u8(vx, vy);
vy = veorq_u8(vx, vld1q_u8(p));
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 79777645cac9..3ab8720aa2f8 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -34,6 +34,9 @@ endif
ifeq ($(IS_X86),yes)
OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o
+ CFLAGS += $(shell echo "pshufb %xmm0, %xmm0" | \
+ gcc -c -x assembler - >&/dev/null && \
+ rm ./-.o && echo -DCONFIG_AS_SSSE3=1)
CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \
gcc -c -x assembler - >&/dev/null && \
rm ./-.o && echo -DCONFIG_AS_AVX2=1)