summaryrefslogtreecommitdiffstats
path: root/arch/x86/crypto/chacha20_glue.c
diff options
context:
space:
mode:
authorMartin Willi2018-11-20 17:30:50 +0100
committerHerbert Xu2018-11-29 09:27:04 +0100
commit180def6c4ad139ae6f97953ae810092ace295d5b (patch)
treeea9451b8ed9a9da6adac4ed41c2eab0769e4ccf1 /arch/x86/crypto/chacha20_glue.c
parentcrypto: x86/chacha20 - Add a 2-block AVX-512VL variant (diff)
downloadkernel-qcow2-linux-180def6c4ad139ae6f97953ae810092ace295d5b.tar.gz
kernel-qcow2-linux-180def6c4ad139ae6f97953ae810092ace295d5b.tar.xz
kernel-qcow2-linux-180def6c4ad139ae6f97953ae810092ace295d5b.zip
crypto: x86/chacha20 - Add a 4-block AVX-512VL variant
This version uses the same principle as the AVX2 version by scheduling the operations for two block pairs in parallel. It benefits from the AVX-512VL rotate instructions and the more efficient partial block handling using "vmovdqu8", resulting in a speedup of the raw block function of ~20%. Signed-off-by: Martin Willi <martin@strongswan.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto/chacha20_glue.c')
-rw-r--r--arch/x86/crypto/chacha20_glue.c7
1 files changed, 7 insertions, 0 deletions
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
index d6a95a6a324e..773d075a1483 100644
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha20_glue.c
@@ -34,6 +34,8 @@ static bool chacha20_use_avx2;
#ifdef CONFIG_AS_AVX512
asmlinkage void chacha20_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
unsigned int len);
+asmlinkage void chacha20_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len);
asmlinkage void chacha20_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
unsigned int len);
static bool chacha20_use_avx512vl;
@@ -64,6 +66,11 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
state[12] += chacha20_advance(bytes, 8);
return;
}
+ if (bytes > CHACHA_BLOCK_SIZE * 2) {
+ chacha20_4block_xor_avx512vl(state, dst, src, bytes);
+ state[12] += chacha20_advance(bytes, 4);
+ return;
+ }
if (bytes) {
chacha20_2block_xor_avx512vl(state, dst, src, bytes);
state[12] += chacha20_advance(bytes, 2);