diff options
author | Martin Willi | 2018-11-20 17:30:49 +0100 |
---|---|---|
committer | Herbert Xu | 2018-11-29 09:27:04 +0100 |
commit | 29a47b54e030efe308aa90e6c26a9ce7f5f84ed8 (patch) | |
tree | b53c29fb2903d8d4f62afb9804cb10e6643c7034 /arch/x86/crypto/chacha20_glue.c | |
parent | crypto: x86/chacha20 - Add a 8-block AVX-512VL variant (diff) | |
download | kernel-qcow2-linux-29a47b54e030efe308aa90e6c26a9ce7f5f84ed8.tar.gz kernel-qcow2-linux-29a47b54e030efe308aa90e6c26a9ce7f5f84ed8.tar.xz kernel-qcow2-linux-29a47b54e030efe308aa90e6c26a9ce7f5f84ed8.zip |
crypto: x86/chacha20 - Add a 2-block AVX-512VL variant
This version uses the same principle as the AVX2 version. It benefits
from the AVX-512VL rotate instructions and the more efficient partial
block handling using "vmovdqu8", resulting in a speedup of ~20%.
Unlike the AVX2 version, it is faster than the single block SSSE3 version
to process a single block. Hence we engage that function for (partial)
single block lengths as well.
Signed-off-by: Martin Willi <martin@strongswan.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto/chacha20_glue.c')
-rw-r--r-- | arch/x86/crypto/chacha20_glue.c | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index 6a67e70bc82a..d6a95a6a324e 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c @@ -32,6 +32,8 @@ asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src, unsigned int len); static bool chacha20_use_avx2; #ifdef CONFIG_AS_AVX512 +asmlinkage void chacha20_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, + unsigned int len); asmlinkage void chacha20_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, unsigned int len); static bool chacha20_use_avx512vl; @@ -62,6 +64,11 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, state[12] += chacha20_advance(bytes, 8); return; } + if (bytes) { + chacha20_2block_xor_avx512vl(state, dst, src, bytes); + state[12] += chacha20_advance(bytes, 2); + return; + } } #endif if (chacha20_use_avx2) { |