0b95ec56ae
Patch adds x86_64 assembler implementation of Camellia block cipher. Two set of functions are provided. First set is regular 'one-block at time' encrypt/decrypt functions. Second is 'two-block at time' functions that gain performance increase on out-of-order CPUs. Performance of 2-way functions should be equal to 1-way functions with in-order CPUs. Patch has been tested with tcrypt and automated filesystem tests. Tcrypt benchmark results: AMD Phenom II 1055T (fam:16, model:10): camellia-asm vs camellia_generic: 128bit key: (lrw:256bit) (xts:256bit) size ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec lrw-enc lrw-dec xts-enc xts-dec 16B 1.27x 1.22x 1.30x 1.42x 1.30x 1.34x 1.19x 1.05x 1.23x 1.24x 64B 1.74x 1.79x 1.43x 1.87x 1.81x 1.87x 1.48x 1.38x 1.55x 1.62x 256B 1.90x 1.87x 1.43x 1.94x 1.94x 1.95x 1.63x 1.62x 1.67x 1.70x 1024B 1.96x 1.93x 1.43x 1.95x 1.98x 2.01x 1.67x 1.69x 1.74x 1.80x 8192B 1.96x 1.96x 1.39x 1.93x 2.01x 2.03x 1.72x 1.64x 1.71x 1.76x 256bit key: (lrw:384bit) (xts:512bit) size ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec lrw-enc lrw-dec xts-enc xts-dec 16B 1.23x 1.23x 1.33x 1.39x 1.34x 1.38x 1.04x 1.18x 1.21x 1.29x 64B 1.72x 1.69x 1.42x 1.78x 1.81x 1.89x 1.57x 1.52x 1.56x 1.65x 256B 1.85x 1.88x 1.42x 1.86x 1.93x 1.96x 1.69x 1.65x 1.70x 1.75x 1024B 1.88x 1.86x 1.45x 1.95x 1.96x 1.95x 1.77x 1.71x 1.77x 1.78x 8192B 1.91x 1.86x 1.42x 1.91x 2.03x 1.98x 1.73x 1.71x 1.78x 1.76x camellia-asm vs aes-asm (8kB block): 128bit 256bit ecb-enc 1.15x 1.22x ecb-dec 1.16x 1.16x cbc-enc 0.85x 0.90x cbc-dec 1.20x 1.23x ctr-enc 1.28x 1.30x ctr-dec 1.27x 1.28x lrw-enc 1.12x 1.16x lrw-dec 1.08x 1.10x xts-enc 1.11x 1.15x xts-dec 1.14x 1.15x Intel Core2 T8100 (fam:6, model:23, step:6): camellia-asm vs camellia_generic: 128bit key: (lrw:256bit) (xts:256bit) size ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec lrw-enc lrw-dec xts-enc xts-dec 16B 1.10x 1.12x 1.14x 1.16x 1.16x 1.15x 1.02x 1.02x 1.08x 1.08x 64B 1.61x 1.60x 1.17x 1.68x 1.67x 1.66x 1.43x 1.42x 1.44x 1.42x 256B 1.65x 1.73x 1.17x 1.77x 1.81x 1.80x 1.54x 1.53x 1.58x 1.54x 1024B 1.76x 1.74x 1.18x 1.80x 1.85x 1.85x 1.60x 1.59x 1.65x 1.60x 8192B 1.77x 1.75x 1.19x 1.81x 1.85x 1.86x 1.63x 1.61x 1.66x 1.62x 256bit key: (lrw:384bit) (xts:512bit) size ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec lrw-enc lrw-dec xts-enc xts-dec 16B 1.10x 1.07x 1.13x 1.16x 1.11x 1.16x 1.03x 1.02x 1.08x 1.07x 64B 1.61x 1.62x 1.15x 1.66x 1.63x 1.68x 1.47x 1.46x 1.47x 1.44x 256B 1.71x 1.70x 1.16x 1.75x 1.69x 1.79x 1.58x 1.57x 1.59x 1.55x 1024B 1.78x 1.72x 1.17x 1.75x 1.80x 1.80x 1.63x 1.62x 1.65x 1.62x 8192B 1.76x 1.73x 1.17x 1.78x 1.80x 1.81x 1.64x 1.62x 1.68x 1.64x camellia-asm vs aes-asm (8kB block): 128bit 256bit ecb-enc 1.17x 1.21x ecb-dec 1.17x 1.20x cbc-enc 0.80x 0.82x cbc-dec 1.22x 1.24x ctr-enc 1.25x 1.26x ctr-dec 1.25x 1.26x lrw-enc 1.14x 1.18x lrw-dec 1.13x 1.17x xts-enc 1.14x 1.18x xts-dec 1.14x 1.17x Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
45 lines
1.9 KiB
Makefile
45 lines
1.9 KiB
Makefile
#
|
|
# Arch-specific CryptoAPI modules.
|
|
#
|
|
|
|
obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
|
|
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
|
|
obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
|
|
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
|
|
|
|
obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
|
|
obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
|
|
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
|
|
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
|
|
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
|
|
obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
|
|
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
|
|
obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
|
|
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
|
|
|
|
obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
|
|
obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
|
|
|
|
aes-i586-y := aes-i586-asm_32.o aes_glue.o
|
|
twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
|
|
salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
|
|
serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
|
|
|
|
aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
|
|
camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
|
|
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
|
|
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
|
|
twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
|
|
salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
|
|
serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
|
|
|
|
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
|
|
|
|
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
|
|
|
|
# enable AVX support only when $(AS) can actually assemble the instructions
|
|
ifeq ($(call as-instr,vpxor %xmm0$(comma)%xmm1$(comma)%xmm2,yes,no),yes)
|
|
AFLAGS_sha1_ssse3_asm.o += -DSHA1_ENABLE_AVX_SUPPORT
|
|
CFLAGS_sha1_ssse3_glue.o += -DSHA1_ENABLE_AVX_SUPPORT
|
|
endif
|
|
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
|