crypto: x86/chacha-sse3 - use unaligned loads for state array

Due to the fact that the x86 port does not support allocating objects on the stack with an alignment that exceeds 8 bytes, we have a rather ugly hack in the x86 code for ChaCha to ensure that the state array is aligned to 16 bytes, allowing the SSE3 implementation of the algorithm to use aligned loads. Given that the performance benefit of using of aligned loads appears to be limited (~0.25% for 1k blocks using tcrypt on a Corei7-8650U), and the fact that this hack has leaked into generic ChaCha code, let's just remove it. Cc: Martin Willi <martin@strongswan.org> Cc: Herbert Xu <herbert@gondor.apana.org.au> Cc: Eric Biggers <ebiggers@kernel.org> Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Reviewed-by: Martin Willi <martin@strongswan.org> Reviewed-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
author: Ard Biesheuvel <ardb@kernel.org> 2020-07-08 12:11:18 +0300
committer: Herbert Xu <herbert@gondor.apana.org.au> 2020-07-16 14:49:04 +0300
commit: e79a31715193686e92dadb4caedfbb1f5de3659c (patch)
tree: fe9d960b52083a72242d8c1a5ecbd0db8b90ff3d /arch/x86/crypto/chacha-ssse3-x86_64.S
parent: 06cc2afbbdf9a9e8df3e2f8db724997dd6e1b4ac (diff)
download: linux-e79a31715193686e92dadb4caedfbb1f5de3659c.tar.xz
1 files changed, 8 insertions, 8 deletions
diff --git a/arch/x86/crypto/chacha-ssse3-x86_64.S b/arch/x86/crypto/chacha-ssse3-x86_64.S
index a38ab2512a6f..ca1788bfee16 100644
--- a/arch/x86/crypto/chacha-ssse3-x86_64.S
+++ b/arch/x86/crypto/chacha-ssse3-x86_64.S
@@ -120,10 +120,10 @@ SYM_FUNC_START(chacha_block_xor_ssse3)
 	FRAME_BEGIN
 
 	# x0..3 = s0..3
-	movdqa		0x00(%rdi),%xmm0
-	movdqa		0x10(%rdi),%xmm1
-	movdqa		0x20(%rdi),%xmm2
-	movdqa		0x30(%rdi),%xmm3
+	movdqu		0x00(%rdi),%xmm0
+	movdqu		0x10(%rdi),%xmm1
+	movdqu		0x20(%rdi),%xmm2
+	movdqu		0x30(%rdi),%xmm3
 	movdqa		%xmm0,%xmm8
 	movdqa		%xmm1,%xmm9
 	movdqa		%xmm2,%xmm10
@@ -205,10 +205,10 @@ SYM_FUNC_START(hchacha_block_ssse3)
 	# %edx: nrounds
 	FRAME_BEGIN
 
-	movdqa		0x00(%rdi),%xmm0
-	movdqa		0x10(%rdi),%xmm1
-	movdqa		0x20(%rdi),%xmm2
-	movdqa		0x30(%rdi),%xmm3
+	movdqu		0x00(%rdi),%xmm0
+	movdqu		0x10(%rdi),%xmm1
+	movdqu		0x20(%rdi),%xmm2
+	movdqu		0x30(%rdi),%xmm3
 
 	mov		%edx,%r8d
 	call		chacha_permute
author	Ard Biesheuvel <ardb@kernel.org>	2020-07-08 12:11:18 +0300
committer	Herbert Xu <herbert@gondor.apana.org.au>	2020-07-16 14:49:04 +0300
commit	e79a31715193686e92dadb4caedfbb1f5de3659c (patch)
tree	fe9d960b52083a72242d8c1a5ecbd0db8b90ff3d /arch/x86/crypto/chacha-ssse3-x86_64.S
parent	06cc2afbbdf9a9e8df3e2f8db724997dd6e1b4ac (diff)
download	linux-e79a31715193686e92dadb4caedfbb1f5de3659c.tar.xz