crypto: aesni - AVX and AVX2 version of AESNI-GCM encode and decode
We have added AVX and AVX2 routines that optimize AESNI-GCM encode/decode. These routines are optimized for encrypt and decrypt of large buffers. In tests we have seen up to 6% speedup for 1K, 11% speedup for 2K and 18% speedup for 8K buffer over the existing SSE version. These routines should provide even better speedup for future Intel x86_64 cpus. Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
parent
fed286110f
commit
d764593af9
3 changed files with 2953 additions and 3 deletions
|
@ -75,7 +75,7 @@ ifeq ($(avx2_supported),yes)
|
|||
serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
|
||||
endif
|
||||
|
||||
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
|
||||
aesni-intel-y := aesni-intel_asm.o aesni-intel_avx.o aesni-intel_glue.o fpu.o
|
||||
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
|
||||
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
|
||||
crc32c-intel-y := crc32c-intel_glue.o
|
||||
|
|
2811
arch/x86/crypto/aesni-intel_avx.S
Normal file
2811
arch/x86/crypto/aesni-intel_avx.S
Normal file
File diff suppressed because it is too large
Load diff
|
@ -101,6 +101,9 @@ asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
|
|||
int crypto_fpu_init(void);
|
||||
void crypto_fpu_exit(void);
|
||||
|
||||
#define AVX_GEN2_OPTSIZE 640
|
||||
#define AVX_GEN4_OPTSIZE 4096
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
|
||||
const u8 *in, unsigned int len, u8 *iv);
|
||||
|
@ -150,6 +153,123 @@ asmlinkage void aesni_gcm_dec(void *ctx, u8 *out,
|
|||
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
|
||||
u8 *auth_tag, unsigned long auth_tag_len);
|
||||
|
||||
|
||||
#ifdef CONFIG_AS_AVX
|
||||
/*
|
||||
* asmlinkage void aesni_gcm_precomp_avx_gen2()
|
||||
* gcm_data *my_ctx_data, context data
|
||||
* u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
|
||||
*/
|
||||
asmlinkage void aesni_gcm_precomp_avx_gen2(void *my_ctx_data, u8 *hash_subkey);
|
||||
|
||||
asmlinkage void aesni_gcm_enc_avx_gen2(void *ctx, u8 *out,
|
||||
const u8 *in, unsigned long plaintext_len, u8 *iv,
|
||||
const u8 *aad, unsigned long aad_len,
|
||||
u8 *auth_tag, unsigned long auth_tag_len);
|
||||
|
||||
asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, u8 *out,
|
||||
const u8 *in, unsigned long ciphertext_len, u8 *iv,
|
||||
const u8 *aad, unsigned long aad_len,
|
||||
u8 *auth_tag, unsigned long auth_tag_len);
|
||||
|
||||
static void aesni_gcm_enc_avx(void *ctx, u8 *out,
|
||||
const u8 *in, unsigned long plaintext_len, u8 *iv,
|
||||
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
|
||||
u8 *auth_tag, unsigned long auth_tag_len)
|
||||
{
|
||||
if (plaintext_len < AVX_GEN2_OPTSIZE) {
|
||||
aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
|
||||
aad_len, auth_tag, auth_tag_len);
|
||||
} else {
|
||||
aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
|
||||
aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
|
||||
aad_len, auth_tag, auth_tag_len);
|
||||
}
|
||||
}
|
||||
|
||||
static void aesni_gcm_dec_avx(void *ctx, u8 *out,
|
||||
const u8 *in, unsigned long ciphertext_len, u8 *iv,
|
||||
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
|
||||
u8 *auth_tag, unsigned long auth_tag_len)
|
||||
{
|
||||
if (ciphertext_len < AVX_GEN2_OPTSIZE) {
|
||||
aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad,
|
||||
aad_len, auth_tag, auth_tag_len);
|
||||
} else {
|
||||
aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
|
||||
aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
|
||||
aad_len, auth_tag, auth_tag_len);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_AS_AVX2
|
||||
/*
|
||||
* asmlinkage void aesni_gcm_precomp_avx_gen4()
|
||||
* gcm_data *my_ctx_data, context data
|
||||
* u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
|
||||
*/
|
||||
asmlinkage void aesni_gcm_precomp_avx_gen4(void *my_ctx_data, u8 *hash_subkey);
|
||||
|
||||
asmlinkage void aesni_gcm_enc_avx_gen4(void *ctx, u8 *out,
|
||||
const u8 *in, unsigned long plaintext_len, u8 *iv,
|
||||
const u8 *aad, unsigned long aad_len,
|
||||
u8 *auth_tag, unsigned long auth_tag_len);
|
||||
|
||||
asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, u8 *out,
|
||||
const u8 *in, unsigned long ciphertext_len, u8 *iv,
|
||||
const u8 *aad, unsigned long aad_len,
|
||||
u8 *auth_tag, unsigned long auth_tag_len);
|
||||
|
||||
static void aesni_gcm_enc_avx2(void *ctx, u8 *out,
|
||||
const u8 *in, unsigned long plaintext_len, u8 *iv,
|
||||
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
|
||||
u8 *auth_tag, unsigned long auth_tag_len)
|
||||
{
|
||||
if (plaintext_len < AVX_GEN2_OPTSIZE) {
|
||||
aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
|
||||
aad_len, auth_tag, auth_tag_len);
|
||||
} else if (plaintext_len < AVX_GEN4_OPTSIZE) {
|
||||
aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
|
||||
aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
|
||||
aad_len, auth_tag, auth_tag_len);
|
||||
} else {
|
||||
aesni_gcm_precomp_avx_gen4(ctx, hash_subkey);
|
||||
aesni_gcm_enc_avx_gen4(ctx, out, in, plaintext_len, iv, aad,
|
||||
aad_len, auth_tag, auth_tag_len);
|
||||
}
|
||||
}
|
||||
|
||||
static void aesni_gcm_dec_avx2(void *ctx, u8 *out,
|
||||
const u8 *in, unsigned long ciphertext_len, u8 *iv,
|
||||
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
|
||||
u8 *auth_tag, unsigned long auth_tag_len)
|
||||
{
|
||||
if (ciphertext_len < AVX_GEN2_OPTSIZE) {
|
||||
aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey,
|
||||
aad, aad_len, auth_tag, auth_tag_len);
|
||||
} else if (ciphertext_len < AVX_GEN4_OPTSIZE) {
|
||||
aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
|
||||
aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
|
||||
aad_len, auth_tag, auth_tag_len);
|
||||
} else {
|
||||
aesni_gcm_precomp_avx_gen4(ctx, hash_subkey);
|
||||
aesni_gcm_dec_avx_gen4(ctx, out, in, ciphertext_len, iv, aad,
|
||||
aad_len, auth_tag, auth_tag_len);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void (*aesni_gcm_enc_tfm)(void *ctx, u8 *out,
|
||||
const u8 *in, unsigned long plaintext_len, u8 *iv,
|
||||
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
|
||||
u8 *auth_tag, unsigned long auth_tag_len);
|
||||
|
||||
static void (*aesni_gcm_dec_tfm)(void *ctx, u8 *out,
|
||||
const u8 *in, unsigned long ciphertext_len, u8 *iv,
|
||||
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
|
||||
u8 *auth_tag, unsigned long auth_tag_len);
|
||||
|
||||
static inline struct
|
||||
aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
|
||||
{
|
||||
|
@ -915,7 +1035,7 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
|
|||
dst = src;
|
||||
}
|
||||
|
||||
aesni_gcm_enc(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv,
|
||||
aesni_gcm_enc_tfm(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv,
|
||||
ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst
|
||||
+ ((unsigned long)req->cryptlen), auth_tag_len);
|
||||
|
||||
|
@ -996,7 +1116,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
|
|||
dst = src;
|
||||
}
|
||||
|
||||
aesni_gcm_dec(aes_ctx, dst, src, tempCipherLen, iv,
|
||||
aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv,
|
||||
ctx->hash_subkey, assoc, (unsigned long)req->assoclen,
|
||||
authTag, auth_tag_len);
|
||||
|
||||
|
@ -1353,6 +1473,25 @@ static int __init aesni_init(void)
|
|||
|
||||
if (!x86_match_cpu(aesni_cpu_id))
|
||||
return -ENODEV;
|
||||
#ifdef CONFIG_AS_AVX2
|
||||
if (boot_cpu_has(X86_FEATURE_AVX2)) {
|
||||
pr_info("AVX2 version of gcm_enc/dec engaged.\n");
|
||||
aesni_gcm_enc_tfm = aesni_gcm_enc_avx2;
|
||||
aesni_gcm_dec_tfm = aesni_gcm_dec_avx2;
|
||||
} else
|
||||
#endif
|
||||
#ifdef CONFIG_AS_AVX
|
||||
if (boot_cpu_has(X86_FEATURE_AVX)) {
|
||||
pr_info("AVX version of gcm_enc/dec engaged.\n");
|
||||
aesni_gcm_enc_tfm = aesni_gcm_enc_avx;
|
||||
aesni_gcm_dec_tfm = aesni_gcm_dec_avx;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
pr_info("SSE version of gcm_enc/dec engaged.\n");
|
||||
aesni_gcm_enc_tfm = aesni_gcm_enc;
|
||||
aesni_gcm_dec_tfm = aesni_gcm_dec;
|
||||
}
|
||||
|
||||
err = crypto_fpu_init();
|
||||
if (err)
|
||||
|
|
Loading…
Reference in a new issue