summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Willi <martin@strongswan.org>2018-11-11 12:36:28 +0300
committerHerbert Xu <herbert@gondor.apana.org.au>2018-11-16 09:11:04 +0300
commit9b17608f15b940babe2e32522ea29787abd10af2 (patch)
tree9ff7634d0944867d930ac1ad775dc1b9b9cf42d3
parentc3b734dd325dadc73c2f5b4d187208730bf21df5 (diff)
downloadlinux-9b17608f15b940babe2e32522ea29787abd10af2.tar.xz
crypto: x86/chacha20 - Use larger block functions more aggressively
Now that all block functions support partial lengths, engage the wider block sizes more aggressively. This prevents using smaller block functions multiple times, where the next larger block function would have been faster. Signed-off-by: Martin Willi <martin@strongswan.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r--arch/x86/crypto/chacha20_glue.c39
1 files changed, 24 insertions, 15 deletions
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
index 882e8bf5965a..b541da71f11e 100644
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha20_glue.c
@@ -29,6 +29,12 @@ asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
static bool chacha20_use_avx2;
#endif
+static unsigned int chacha20_advance(unsigned int len, unsigned int maxblocks)
+{
+ len = min(len, maxblocks * CHACHA20_BLOCK_SIZE);
+ return round_up(len, CHACHA20_BLOCK_SIZE) / CHACHA20_BLOCK_SIZE;
+}
+
static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
unsigned int bytes)
{
@@ -41,6 +47,11 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
dst += CHACHA20_BLOCK_SIZE * 8;
state[12] += 8;
}
+ if (bytes > CHACHA20_BLOCK_SIZE * 4) {
+ chacha20_8block_xor_avx2(state, dst, src, bytes);
+ state[12] += chacha20_advance(bytes, 8);
+ return;
+ }
}
#endif
while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
@@ -50,15 +61,14 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
dst += CHACHA20_BLOCK_SIZE * 4;
state[12] += 4;
}
- while (bytes >= CHACHA20_BLOCK_SIZE) {
- chacha20_block_xor_ssse3(state, dst, src, bytes);
- bytes -= CHACHA20_BLOCK_SIZE;
- src += CHACHA20_BLOCK_SIZE;
- dst += CHACHA20_BLOCK_SIZE;
- state[12]++;
+ if (bytes > CHACHA20_BLOCK_SIZE) {
+ chacha20_4block_xor_ssse3(state, dst, src, bytes);
+ state[12] += chacha20_advance(bytes, 4);
+ return;
}
if (bytes) {
chacha20_block_xor_ssse3(state, dst, src, bytes);
+ state[12]++;
}
}
@@ -82,17 +92,16 @@ static int chacha20_simd(struct skcipher_request *req)
kernel_fpu_begin();
- while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
- chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
- rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
- err = skcipher_walk_done(&walk,
- walk.nbytes % CHACHA20_BLOCK_SIZE);
- }
+ while (walk.nbytes > 0) {
+ unsigned int nbytes = walk.nbytes;
+
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, walk.stride);
- if (walk.nbytes) {
chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
- walk.nbytes);
- err = skcipher_walk_done(&walk, 0);
+ nbytes);
+
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
kernel_fpu_end();