UPSTREAM: crypto: x86/chacha20 - Use larger block functions more aggressively
Now that all block functions support partial lengths, engage the wider block sizes more aggressively. This prevents using smaller block functions multiple times, where the next larger block function would have been faster. Signed-off-by: Martin Willi <martin@strongswan.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> (cherry picked from commit 9b17608f15b940babe2e32522ea29787abd10af2) Bug: 152722841 Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: Ic093ecab923675f28bedc3989292ac70f53338d7
This commit is contained in:
parent
5057483342
commit
26b2d7b5d4
1 changed files with 24 additions and 15 deletions
|
@ -29,6 +29,12 @@ asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
|
|||
static bool chacha20_use_avx2;
|
||||
#endif
|
||||
|
||||
static unsigned int chacha20_advance(unsigned int len, unsigned int maxblocks)
|
||||
{
|
||||
len = min(len, maxblocks * CHACHA_BLOCK_SIZE);
|
||||
return round_up(len, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
|
||||
unsigned int bytes)
|
||||
{
|
||||
|
@ -41,6 +47,11 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
|
|||
dst += CHACHA_BLOCK_SIZE * 8;
|
||||
state[12] += 8;
|
||||
}
|
||||
if (bytes > CHACHA_BLOCK_SIZE * 4) {
|
||||
chacha20_8block_xor_avx2(state, dst, src, bytes);
|
||||
state[12] += chacha20_advance(bytes, 8);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
while (bytes >= CHACHA_BLOCK_SIZE * 4) {
|
||||
|
@ -50,15 +61,14 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
|
|||
dst += CHACHA_BLOCK_SIZE * 4;
|
||||
state[12] += 4;
|
||||
}
|
||||
while (bytes >= CHACHA_BLOCK_SIZE) {
|
||||
chacha20_block_xor_ssse3(state, dst, src, bytes);
|
||||
bytes -= CHACHA_BLOCK_SIZE;
|
||||
src += CHACHA_BLOCK_SIZE;
|
||||
dst += CHACHA_BLOCK_SIZE;
|
||||
state[12]++;
|
||||
if (bytes > CHACHA_BLOCK_SIZE) {
|
||||
chacha20_4block_xor_ssse3(state, dst, src, bytes);
|
||||
state[12] += chacha20_advance(bytes, 4);
|
||||
return;
|
||||
}
|
||||
if (bytes) {
|
||||
chacha20_block_xor_ssse3(state, dst, src, bytes);
|
||||
state[12]++;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -82,17 +92,16 @@ static int chacha20_simd(struct skcipher_request *req)
|
|||
|
||||
kernel_fpu_begin();
|
||||
|
||||
while (walk.nbytes >= CHACHA_BLOCK_SIZE) {
|
||||
chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
rounddown(walk.nbytes, CHACHA_BLOCK_SIZE));
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes % CHACHA_BLOCK_SIZE);
|
||||
}
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
|
||||
if (nbytes < walk.total)
|
||||
nbytes = round_down(nbytes, walk.stride);
|
||||
|
||||
if (walk.nbytes) {
|
||||
chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
walk.nbytes);
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
nbytes);
|
||||
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
}
|
||||
|
||||
kernel_fpu_end();
|
||||
|
|
Loading…
Reference in a new issue