diff options
Diffstat (limited to 'lib/raid6')
-rw-r--r-- | lib/raid6/Makefile | 7 | ||||
-rw-r--r-- | lib/raid6/algos.c | 81 | ||||
-rw-r--r-- | lib/raid6/neon.uc | 5 | ||||
-rw-r--r-- | lib/raid6/recov_neon_inner.c | 19 | ||||
-rw-r--r-- | lib/raid6/test/Makefile | 3 |
5 files changed, 59 insertions, 56 deletions
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 7ed43eaa02ef..e723eacf7868 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -13,8 +13,7 @@ raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o hostprogs-y += mktables quiet_cmd_unroll = UNROLL $@ - cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \ - < $< > $@ || ( rm -f $@ && exit 1 ) + cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) < $< > $@ ifeq ($(CONFIG_ALTIVEC),y) altivec_flags := -maltivec $(call cc-option,-mabi=altivec) @@ -40,7 +39,7 @@ endif ifeq ($(CONFIG_KERNEL_MODE_NEON),y) NEON_FLAGS := -ffreestanding ifeq ($(ARCH),arm) -NEON_FLAGS += -mfloat-abi=softfp -mfpu=neon +NEON_FLAGS += -march=armv7-a -mfloat-abi=softfp -mfpu=neon endif CFLAGS_recov_neon_inner.o += $(NEON_FLAGS) ifeq ($(ARCH),arm64) @@ -160,7 +159,7 @@ $(obj)/s390vx8.c: $(src)/s390vx.uc $(src)/unroll.awk FORCE $(call if_changed,unroll) quiet_cmd_mktable = TABLE $@ - cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) + cmd_mktable = $(obj)/mktables > $@ targets += tables.c $(obj)/tables.c: $(obj)/mktables FORCE diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 5065b1e7e327..7e4f7a8ffa8e 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -34,64 +34,64 @@ struct raid6_calls raid6_call; EXPORT_SYMBOL_GPL(raid6_call); const struct raid6_calls * const raid6_algos[] = { -#if defined(__ia64__) - &raid6_intx16, - &raid6_intx32, -#endif #if defined(__i386__) && !defined(__arch_um__) - &raid6_mmxx1, - &raid6_mmxx2, - &raid6_sse1x1, - &raid6_sse1x2, - &raid6_sse2x1, - &raid6_sse2x2, -#ifdef CONFIG_AS_AVX2 - &raid6_avx2x1, - &raid6_avx2x2, -#endif #ifdef CONFIG_AS_AVX512 - &raid6_avx512x1, &raid6_avx512x2, + &raid6_avx512x1, #endif -#endif -#if defined(__x86_64__) && !defined(__arch_um__) - &raid6_sse2x1, - &raid6_sse2x2, - &raid6_sse2x4, #ifdef CONFIG_AS_AVX2 - &raid6_avx2x1, &raid6_avx2x2, - &raid6_avx2x4, + &raid6_avx2x1, +#endif + &raid6_sse2x2, + &raid6_sse2x1, + &raid6_sse1x2, + &raid6_sse1x1, + &raid6_mmxx2, + &raid6_mmxx1, #endif +#if defined(__x86_64__) && !defined(__arch_um__) #ifdef CONFIG_AS_AVX512 - &raid6_avx512x1, - &raid6_avx512x2, &raid6_avx512x4, + &raid6_avx512x2, + &raid6_avx512x1, #endif +#ifdef CONFIG_AS_AVX2 + &raid6_avx2x4, + &raid6_avx2x2, + &raid6_avx2x1, +#endif + &raid6_sse2x4, + &raid6_sse2x2, + &raid6_sse2x1, #endif #ifdef CONFIG_ALTIVEC - &raid6_altivec1, - &raid6_altivec2, - &raid6_altivec4, - &raid6_altivec8, - &raid6_vpermxor1, - &raid6_vpermxor2, - &raid6_vpermxor4, &raid6_vpermxor8, + &raid6_vpermxor4, + &raid6_vpermxor2, + &raid6_vpermxor1, + &raid6_altivec8, + &raid6_altivec4, + &raid6_altivec2, + &raid6_altivec1, #endif #if defined(CONFIG_S390) &raid6_s390vx8, #endif - &raid6_intx1, - &raid6_intx2, - &raid6_intx4, - &raid6_intx8, #ifdef CONFIG_KERNEL_MODE_NEON - &raid6_neonx1, - &raid6_neonx2, - &raid6_neonx4, &raid6_neonx8, + &raid6_neonx4, + &raid6_neonx2, + &raid6_neonx1, #endif +#if defined(__ia64__) + &raid6_intx32, + &raid6_intx16, +#endif + &raid6_intx8, + &raid6_intx4, + &raid6_intx2, + &raid6_intx1, NULL }; @@ -163,6 +163,11 @@ static inline const struct raid6_calls *raid6_choose_gen( if ((*algo)->valid && !(*algo)->valid()) continue; + if (!IS_ENABLED(CONFIG_RAID6_PQ_BENCHMARK)) { + best = *algo; + break; + } + perf = 0; preempt_disable(); diff --git a/lib/raid6/neon.uc b/lib/raid6/neon.uc index d5242f544551..b7c68030da4f 100644 --- a/lib/raid6/neon.uc +++ b/lib/raid6/neon.uc @@ -28,7 +28,6 @@ typedef uint8x16_t unative_t; -#define NBYTES(x) ((unative_t){x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x}) #define NSIZE sizeof(unative_t) /* @@ -61,7 +60,7 @@ void raid6_neon$#_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) int d, z, z0; register unative_t wd$$, wq$$, wp$$, w1$$, w2$$; - const unative_t x1d = NBYTES(0x1d); + const unative_t x1d = vdupq_n_u8(0x1d); z0 = disks - 3; /* Highest data disk */ p = dptr[z0+1]; /* XOR parity */ @@ -92,7 +91,7 @@ void raid6_neon$#_xor_syndrome_real(int disks, int start, int stop, int d, z, z0; register unative_t wd$$, wq$$, wp$$, w1$$, w2$$; - const unative_t x1d = NBYTES(0x1d); + const unative_t x1d = vdupq_n_u8(0x1d); z0 = stop; /* P/Q right side optimization */ p = dptr[disks-2]; /* XOR parity */ diff --git a/lib/raid6/recov_neon_inner.c b/lib/raid6/recov_neon_inner.c index 8cd20c9f834a..f13c07f82297 100644 --- a/lib/raid6/recov_neon_inner.c +++ b/lib/raid6/recov_neon_inner.c @@ -10,11 +10,6 @@ #include <arm_neon.h> -static const uint8x16_t x0f = { - 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, - 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, -}; - #ifdef CONFIG_ARM /* * AArch32 does not provide this intrinsic natively because it does not @@ -41,6 +36,7 @@ void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp, uint8x16_t pm1 = vld1q_u8(pbmul + 16); uint8x16_t qm0 = vld1q_u8(qmul); uint8x16_t qm1 = vld1q_u8(qmul + 16); + uint8x16_t x0f = vdupq_n_u8(0x0f); /* * while ( bytes-- ) { @@ -60,14 +56,14 @@ void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp, px = veorq_u8(vld1q_u8(p), vld1q_u8(dp)); vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq)); - vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4); + vy = vshrq_n_u8(vx, 4); vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f)); - vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f)); + vy = vqtbl1q_u8(qm1, vy); qx = veorq_u8(vx, vy); - vy = (uint8x16_t)vshrq_n_s16((int16x8_t)px, 4); + vy = vshrq_n_u8(px, 4); vx = vqtbl1q_u8(pm0, vandq_u8(px, x0f)); - vy = vqtbl1q_u8(pm1, vandq_u8(vy, x0f)); + vy = vqtbl1q_u8(pm1, vy); vx = veorq_u8(vx, vy); db = veorq_u8(vx, qx); @@ -87,6 +83,7 @@ void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq, { uint8x16_t qm0 = vld1q_u8(qmul); uint8x16_t qm1 = vld1q_u8(qmul + 16); + uint8x16_t x0f = vdupq_n_u8(0x0f); /* * while (bytes--) { @@ -100,9 +97,9 @@ void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq, vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq)); - vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4); + vy = vshrq_n_u8(vx, 4); vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f)); - vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f)); + vy = vqtbl1q_u8(qm1, vy); vx = veorq_u8(vx, vy); vy = veorq_u8(vx, vld1q_u8(p)); diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index 79777645cac9..3ab8720aa2f8 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -34,6 +34,9 @@ endif ifeq ($(IS_X86),yes) OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o + CFLAGS += $(shell echo "pshufb %xmm0, %xmm0" | \ + gcc -c -x assembler - >&/dev/null && \ + rm ./-.o && echo -DCONFIG_AS_SSSE3=1) CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \ gcc -c -x assembler - >&/dev/null && \ rm ./-.o && echo -DCONFIG_AS_AVX2=1) |