summaryrefslogtreecommitdiff
path: root/arch/x86/crypto/chacha-ssse3-x86_64.S
diff options
context:
space:
mode:
authorArd Biesheuvel <ardb@kernel.org>2020-07-08 12:11:18 +0300
committerHerbert Xu <herbert@gondor.apana.org.au>2020-07-16 14:49:04 +0300
commite79a31715193686e92dadb4caedfbb1f5de3659c (patch)
treefe9d960b52083a72242d8c1a5ecbd0db8b90ff3d /arch/x86/crypto/chacha-ssse3-x86_64.S
parent06cc2afbbdf9a9e8df3e2f8db724997dd6e1b4ac (diff)
downloadlinux-e79a31715193686e92dadb4caedfbb1f5de3659c.tar.xz
crypto: x86/chacha-sse3 - use unaligned loads for state array
Due to the fact that the x86 port does not support allocating objects on the stack with an alignment that exceeds 8 bytes, we have a rather ugly hack in the x86 code for ChaCha to ensure that the state array is aligned to 16 bytes, allowing the SSE3 implementation of the algorithm to use aligned loads. Given that the performance benefit of using of aligned loads appears to be limited (~0.25% for 1k blocks using tcrypt on a Corei7-8650U), and the fact that this hack has leaked into generic ChaCha code, let's just remove it. Cc: Martin Willi <martin@strongswan.org> Cc: Herbert Xu <herbert@gondor.apana.org.au> Cc: Eric Biggers <ebiggers@kernel.org> Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Reviewed-by: Martin Willi <martin@strongswan.org> Reviewed-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto/chacha-ssse3-x86_64.S')
-rw-r--r--arch/x86/crypto/chacha-ssse3-x86_64.S16
1 files changed, 8 insertions, 8 deletions
diff --git a/arch/x86/crypto/chacha-ssse3-x86_64.S b/arch/x86/crypto/chacha-ssse3-x86_64.S
index a38ab2512a6f..ca1788bfee16 100644
--- a/arch/x86/crypto/chacha-ssse3-x86_64.S
+++ b/arch/x86/crypto/chacha-ssse3-x86_64.S
@@ -120,10 +120,10 @@ SYM_FUNC_START(chacha_block_xor_ssse3)
FRAME_BEGIN
# x0..3 = s0..3
- movdqa 0x00(%rdi),%xmm0
- movdqa 0x10(%rdi),%xmm1
- movdqa 0x20(%rdi),%xmm2
- movdqa 0x30(%rdi),%xmm3
+ movdqu 0x00(%rdi),%xmm0
+ movdqu 0x10(%rdi),%xmm1
+ movdqu 0x20(%rdi),%xmm2
+ movdqu 0x30(%rdi),%xmm3
movdqa %xmm0,%xmm8
movdqa %xmm1,%xmm9
movdqa %xmm2,%xmm10
@@ -205,10 +205,10 @@ SYM_FUNC_START(hchacha_block_ssse3)
# %edx: nrounds
FRAME_BEGIN
- movdqa 0x00(%rdi),%xmm0
- movdqa 0x10(%rdi),%xmm1
- movdqa 0x20(%rdi),%xmm2
- movdqa 0x30(%rdi),%xmm3
+ movdqu 0x00(%rdi),%xmm0
+ movdqu 0x10(%rdi),%xmm1
+ movdqu 0x20(%rdi),%xmm2
+ movdqu 0x30(%rdi),%xmm3
mov %edx,%r8d
call chacha_permute