UPSTREAM: crypto: x86/chacha - expose SIMD ChaCha routine as library function
Wire the existing x86 SIMD ChaCha code into the new ChaCha library interface, so that users of the library interface will get the accelerated version when available. Given that calls into the library API will always go through the routines in this module if it is enabled, switch to static keys to select the optimal implementation available (which may be none at all, in which case we defer to the generic implementation for all invocations). Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> (cherry picked from commit 84e03fa39fbe95a5567d43bff458c6d3b3a23ad1) Bug: 152722841 Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: I888d7c807c2c1227195a924895ec68c0377b1771
This commit is contained in:
parent
c2674ee0d9
commit
0a524ae566
3 changed files with 72 additions and 24 deletions
|
@ -25,24 +25,24 @@ asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
|
||||||
asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
|
asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
|
||||||
unsigned int len, int nrounds);
|
unsigned int len, int nrounds);
|
||||||
asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds);
|
asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds);
|
||||||
#ifdef CONFIG_AS_AVX2
|
|
||||||
asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
|
asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
|
||||||
unsigned int len, int nrounds);
|
unsigned int len, int nrounds);
|
||||||
asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
|
asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
|
||||||
unsigned int len, int nrounds);
|
unsigned int len, int nrounds);
|
||||||
asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
|
asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
|
||||||
unsigned int len, int nrounds);
|
unsigned int len, int nrounds);
|
||||||
static bool chacha_use_avx2;
|
|
||||||
#ifdef CONFIG_AS_AVX512
|
|
||||||
asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
|
asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
|
||||||
unsigned int len, int nrounds);
|
unsigned int len, int nrounds);
|
||||||
asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
|
asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
|
||||||
unsigned int len, int nrounds);
|
unsigned int len, int nrounds);
|
||||||
asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
|
asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
|
||||||
unsigned int len, int nrounds);
|
unsigned int len, int nrounds);
|
||||||
static bool chacha_use_avx512vl;
|
|
||||||
#endif
|
static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd);
|
||||||
#endif
|
static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2);
|
||||||
|
static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl);
|
||||||
|
|
||||||
static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
|
static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
|
||||||
{
|
{
|
||||||
|
@ -53,9 +53,8 @@ static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
|
||||||
static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
|
static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
|
||||||
unsigned int bytes, int nrounds)
|
unsigned int bytes, int nrounds)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_AS_AVX2
|
if (IS_ENABLED(CONFIG_AS_AVX512) &&
|
||||||
#ifdef CONFIG_AS_AVX512
|
static_branch_likely(&chacha_use_avx512vl)) {
|
||||||
if (chacha_use_avx512vl) {
|
|
||||||
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
|
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
|
||||||
chacha_8block_xor_avx512vl(state, dst, src, bytes,
|
chacha_8block_xor_avx512vl(state, dst, src, bytes,
|
||||||
nrounds);
|
nrounds);
|
||||||
|
@ -83,8 +82,9 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
if (chacha_use_avx2) {
|
if (IS_ENABLED(CONFIG_AS_AVX2) &&
|
||||||
|
static_branch_likely(&chacha_use_avx2)) {
|
||||||
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
|
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
|
||||||
chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
|
chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
|
||||||
bytes -= CHACHA_BLOCK_SIZE * 8;
|
bytes -= CHACHA_BLOCK_SIZE * 8;
|
||||||
|
@ -108,7 +108,7 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
while (bytes >= CHACHA_BLOCK_SIZE * 4) {
|
while (bytes >= CHACHA_BLOCK_SIZE * 4) {
|
||||||
chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
|
chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
|
||||||
bytes -= CHACHA_BLOCK_SIZE * 4;
|
bytes -= CHACHA_BLOCK_SIZE * 4;
|
||||||
|
@ -127,6 +127,43 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
|
||||||
|
{
|
||||||
|
state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
|
||||||
|
|
||||||
|
if (!static_branch_likely(&chacha_use_simd) || !may_use_simd()) {
|
||||||
|
hchacha_block_generic(state, stream, nrounds);
|
||||||
|
} else {
|
||||||
|
kernel_fpu_begin();
|
||||||
|
hchacha_block_ssse3(state, stream, nrounds);
|
||||||
|
kernel_fpu_end();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(hchacha_block_arch);
|
||||||
|
|
||||||
|
void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
|
||||||
|
{
|
||||||
|
state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
|
||||||
|
|
||||||
|
chacha_init_generic(state, key, iv);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(chacha_init_arch);
|
||||||
|
|
||||||
|
void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
|
||||||
|
int nrounds)
|
||||||
|
{
|
||||||
|
state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
|
||||||
|
|
||||||
|
if (!static_branch_likely(&chacha_use_simd) || !may_use_simd() ||
|
||||||
|
bytes <= CHACHA_BLOCK_SIZE)
|
||||||
|
return chacha_crypt_generic(state, dst, src, bytes, nrounds);
|
||||||
|
|
||||||
|
kernel_fpu_begin();
|
||||||
|
chacha_dosimd(state, dst, src, bytes, nrounds);
|
||||||
|
kernel_fpu_end();
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(chacha_crypt_arch);
|
||||||
|
|
||||||
static int chacha_simd_stream_xor(struct skcipher_request *req,
|
static int chacha_simd_stream_xor(struct skcipher_request *req,
|
||||||
const struct chacha_ctx *ctx, const u8 *iv)
|
const struct chacha_ctx *ctx, const u8 *iv)
|
||||||
{
|
{
|
||||||
|
@ -147,7 +184,8 @@ static int chacha_simd_stream_xor(struct skcipher_request *req,
|
||||||
if (nbytes < walk.total)
|
if (nbytes < walk.total)
|
||||||
nbytes = round_down(nbytes, walk.stride);
|
nbytes = round_down(nbytes, walk.stride);
|
||||||
|
|
||||||
if (!may_use_simd()) {
|
if (!static_branch_likely(&chacha_use_simd) ||
|
||||||
|
!may_use_simd()) {
|
||||||
chacha_crypt_generic(state, walk.dst.virt.addr,
|
chacha_crypt_generic(state, walk.dst.virt.addr,
|
||||||
walk.src.virt.addr, nbytes,
|
walk.src.virt.addr, nbytes,
|
||||||
ctx->nrounds);
|
ctx->nrounds);
|
||||||
|
@ -250,18 +288,21 @@ static struct skcipher_alg algs[] = {
|
||||||
static int __init chacha_simd_mod_init(void)
|
static int __init chacha_simd_mod_init(void)
|
||||||
{
|
{
|
||||||
if (!boot_cpu_has(X86_FEATURE_SSSE3))
|
if (!boot_cpu_has(X86_FEATURE_SSSE3))
|
||||||
return -ENODEV;
|
return 0;
|
||||||
|
|
||||||
#ifdef CONFIG_AS_AVX2
|
static_branch_enable(&chacha_use_simd);
|
||||||
chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
|
|
||||||
|
if (IS_ENABLED(CONFIG_AS_AVX2) &&
|
||||||
|
boot_cpu_has(X86_FEATURE_AVX) &&
|
||||||
boot_cpu_has(X86_FEATURE_AVX2) &&
|
boot_cpu_has(X86_FEATURE_AVX2) &&
|
||||||
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
|
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
|
||||||
#ifdef CONFIG_AS_AVX512
|
static_branch_enable(&chacha_use_avx2);
|
||||||
chacha_use_avx512vl = chacha_use_avx2 &&
|
|
||||||
|
if (IS_ENABLED(CONFIG_AS_AVX512) &&
|
||||||
boot_cpu_has(X86_FEATURE_AVX512VL) &&
|
boot_cpu_has(X86_FEATURE_AVX512VL) &&
|
||||||
boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */
|
boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */
|
||||||
#endif
|
static_branch_enable(&chacha_use_avx512vl);
|
||||||
#endif
|
}
|
||||||
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
|
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1492,6 +1492,7 @@ config CRYPTO_CHACHA20_X86_64
|
||||||
depends on X86 && 64BIT
|
depends on X86 && 64BIT
|
||||||
select CRYPTO_BLKCIPHER
|
select CRYPTO_BLKCIPHER
|
||||||
select CRYPTO_LIB_CHACHA_GENERIC
|
select CRYPTO_LIB_CHACHA_GENERIC
|
||||||
|
select CRYPTO_ARCH_HAVE_LIB_CHACHA
|
||||||
help
|
help
|
||||||
SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20,
|
SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20,
|
||||||
XChaCha20, and XChaCha12 stream ciphers.
|
XChaCha20, and XChaCha12 stream ciphers.
|
||||||
|
|
|
@ -24,6 +24,12 @@
|
||||||
#define CHACHA_KEY_SIZE 32
|
#define CHACHA_KEY_SIZE 32
|
||||||
#define CHACHA_BLOCK_SIZE 64
|
#define CHACHA_BLOCK_SIZE 64
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
#define CHACHA_STATE_WORDS ((CHACHA_BLOCK_SIZE + 12) / sizeof(u32))
|
||||||
|
#else
|
||||||
|
#define CHACHA_STATE_WORDS (CHACHA_BLOCK_SIZE / sizeof(u32))
|
||||||
|
#endif
|
||||||
|
|
||||||
/* 192-bit nonce, then 64-bit stream position */
|
/* 192-bit nonce, then 64-bit stream position */
|
||||||
#define XCHACHA_IV_SIZE 32
|
#define XCHACHA_IV_SIZE 32
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue