8eee8e5f44
Now that the generic implementation of ChaCha20 has been refactored to allow varying the number of rounds, add support for XChaCha12, which is the XSalsa construction applied to ChaCha12. ChaCha12 is one of the three ciphers specified by the original ChaCha paper (https://cr.yp.to/chacha/chacha-20080128.pdf: "ChaCha, a variant of Salsa20"), alongside ChaCha8 and ChaCha20. ChaCha12 is faster than ChaCha20 but has a lower, but still large, security margin. We need XChaCha12 support so that it can be used in the Adiantum encryption mode, which enables disk/file encryption on low-end mobile devices where AES-XTS is too slow as the CPUs lack AES instructions. We'd prefer XChaCha20 (the more popular variant), but it's too slow on some of our target devices, so at least in some cases we do need the XChaCha12-based version. In more detail, the problem is that Adiantum is still much slower than we're happy with, and encryption still has a quite noticeable effect on the feel of low-end devices. Users and vendors push back hard against encryption that degrades the user experience, which always risks encryption being disabled entirely. So we need to choose the fastest option that gives us a solid margin of security, and here that's XChaCha12. The best known attack on ChaCha breaks only 7 rounds and has 2^235 time complexity, so ChaCha12's security margin is still better than AES-256's. Much has been learned about cryptanalysis of ARX ciphers since Salsa20 was originally designed in 2005, and it now seems we can be comfortable with a smaller number of rounds. The eSTREAM project also suggests the 12-round version of Salsa20 as providing the best balance among the different variants: combining very good performance with a "comfortable margin of security". Note that it would be trivial to add vanilla ChaCha12 in addition to XChaCha12. However, it's unneeded for now and therefore is omitted. As discussed in the patch that introduced XChaCha20 support, I considered splitting the code into separate chacha-common, chacha20, xchacha20, and xchacha12 modules, so that these algorithms could be enabled/disabled independently. However, since nearly all the code is shared anyway, I ultimately decided there would have been little benefit to the added complexity. Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Acked-by: Martin Willi <martin@strongswan.org> Signed-off-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> (cherry picked from commit aa7624093cb7fbf4fea95e612580d8d29a819f67 https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: I876a5be92e9f583effcd35a4b66a36608ac581f0 Signed-off-by: Eric Biggers <ebiggers@google.com>
117 lines
3.8 KiB
C
117 lines
3.8 KiB
C
/*
|
|
* The "hash function" used as the core of the ChaCha stream cipher (RFC7539)
|
|
*
|
|
* Copyright (C) 2015 Martin Willi
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/export.h>
|
|
#include <linux/bitops.h>
|
|
#include <linux/cryptohash.h>
|
|
#include <asm/unaligned.h>
|
|
#include <crypto/chacha.h>
|
|
|
|
static void chacha_permute(u32 *x, int nrounds)
|
|
{
|
|
int i;
|
|
|
|
/* whitelist the allowed round counts */
|
|
WARN_ON_ONCE(nrounds != 20 && nrounds != 12);
|
|
|
|
for (i = 0; i < nrounds; i += 2) {
|
|
x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16);
|
|
x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16);
|
|
x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16);
|
|
x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16);
|
|
|
|
x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12);
|
|
x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12);
|
|
x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12);
|
|
x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12);
|
|
|
|
x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8);
|
|
x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8);
|
|
x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8);
|
|
x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8);
|
|
|
|
x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7);
|
|
x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7);
|
|
x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7);
|
|
x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7);
|
|
|
|
x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16);
|
|
x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16);
|
|
x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16);
|
|
x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16);
|
|
|
|
x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12);
|
|
x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12);
|
|
x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12);
|
|
x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12);
|
|
|
|
x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8);
|
|
x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8);
|
|
x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8);
|
|
x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8);
|
|
|
|
x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7);
|
|
x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7);
|
|
x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7);
|
|
x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* chacha_block - generate one keystream block and increment block counter
|
|
* @state: input state matrix (16 32-bit words)
|
|
* @stream: output keystream block (64 bytes)
|
|
* @nrounds: number of rounds (20 or 12; 20 is recommended)
|
|
*
|
|
* This is the ChaCha core, a function from 64-byte strings to 64-byte strings.
|
|
* The caller has already converted the endianness of the input. This function
|
|
* also handles incrementing the block counter in the input matrix.
|
|
*/
|
|
void chacha_block(u32 *state, u8 *stream, int nrounds)
|
|
{
|
|
u32 x[16];
|
|
int i;
|
|
|
|
memcpy(x, state, 64);
|
|
|
|
chacha_permute(x, nrounds);
|
|
|
|
for (i = 0; i < ARRAY_SIZE(x); i++)
|
|
put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
|
|
|
|
state[12]++;
|
|
}
|
|
EXPORT_SYMBOL(chacha_block);
|
|
|
|
/**
|
|
* hchacha_block - abbreviated ChaCha core, for XChaCha
|
|
* @in: input state matrix (16 32-bit words)
|
|
* @out: output (8 32-bit words)
|
|
* @nrounds: number of rounds (20 or 12; 20 is recommended)
|
|
*
|
|
* HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step
|
|
* towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha
|
|
* skips the final addition of the initial state, and outputs only certain words
|
|
* of the state. It should not be used for streaming directly.
|
|
*/
|
|
void hchacha_block(const u32 *in, u32 *out, int nrounds)
|
|
{
|
|
u32 x[16];
|
|
|
|
memcpy(x, in, 64);
|
|
|
|
chacha_permute(x, nrounds);
|
|
|
|
memcpy(&out[0], &x[0], 16);
|
|
memcpy(&out[4], &x[12], 16);
|
|
}
|
|
EXPORT_SYMBOL(hchacha_block);
|