summaryrefslogtreecommitdiffstats
path: root/arch/x86/crypto/chacha20_glue.c
diff options
context:
space:
mode:
authorMartin Willi2018-11-20 17:30:49 +0100
committerHerbert Xu2018-11-29 09:27:04 +0100
commit29a47b54e030efe308aa90e6c26a9ce7f5f84ed8 (patch)
treeb53c29fb2903d8d4f62afb9804cb10e6643c7034 /arch/x86/crypto/chacha20_glue.c
parentcrypto: x86/chacha20 - Add a 8-block AVX-512VL variant (diff)
downloadkernel-qcow2-linux-29a47b54e030efe308aa90e6c26a9ce7f5f84ed8.tar.gz
kernel-qcow2-linux-29a47b54e030efe308aa90e6c26a9ce7f5f84ed8.tar.xz
kernel-qcow2-linux-29a47b54e030efe308aa90e6c26a9ce7f5f84ed8.zip
crypto: x86/chacha20 - Add a 2-block AVX-512VL variant
This version uses the same principle as the AVX2 version. It benefits from the AVX-512VL rotate instructions and the more efficient partial block handling using "vmovdqu8", resulting in a speedup of ~20%. Unlike the AVX2 version, it is faster than the single block SSSE3 version to process a single block. Hence we engage that function for (partial) single block lengths as well. Signed-off-by: Martin Willi <martin@strongswan.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto/chacha20_glue.c')
-rw-r--r--arch/x86/crypto/chacha20_glue.c7
1 files changed, 7 insertions, 0 deletions
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
index 6a67e70bc82a..d6a95a6a324e 100644
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha20_glue.c
@@ -32,6 +32,8 @@ asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
unsigned int len);
static bool chacha20_use_avx2;
#ifdef CONFIG_AS_AVX512
+asmlinkage void chacha20_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len);
asmlinkage void chacha20_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
unsigned int len);
static bool chacha20_use_avx512vl;
@@ -62,6 +64,11 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
state[12] += chacha20_advance(bytes, 8);
return;
}
+ if (bytes) {
+ chacha20_2block_xor_avx512vl(state, dst, src, bytes);
+ state[12] += chacha20_advance(bytes, 2);
+ return;
+ }
}
#endif
if (chacha20_use_avx2) {