summaryrefslogtreecommitdiff
path: root/arch/arm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-14 23:18:19 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-14 23:18:19 +0300
commit9e4b0d55d84a66dbfede56890501dc96e696059c (patch)
treedb60e36510c170109f0fe28003d6959cd4264c72 /arch/arm
parent51895d58c7c0c65afac21570cc14a7189942959a (diff)
parent93cebeb1c21a65b92636aaa278a32fbc0415ec67 (diff)
downloadlinux-9e4b0d55d84a66dbfede56890501dc96e696059c.tar.xz
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu: "API: - Add speed testing on 1420-byte blocks for networking Algorithms: - Improve performance of chacha on ARM for network packets - Improve performance of aegis128 on ARM for network packets Drivers: - Add support for Keem Bay OCS AES/SM4 - Add support for QAT 4xxx devices - Enable crypto-engine retry mechanism in caam - Enable support for crypto engine on sdm845 in qce - Add HiSilicon PRNG driver support" * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (161 commits) crypto: qat - add capability detection logic in qat_4xxx crypto: qat - add AES-XTS support for QAT GEN4 devices crypto: qat - add AES-CTR support for QAT GEN4 devices crypto: atmel-i2c - select CONFIG_BITREVERSE crypto: hisilicon/trng - replace atomic_add_return() crypto: keembay - Add support for Keem Bay OCS AES/SM4 dt-bindings: Add Keem Bay OCS AES bindings crypto: aegis128 - avoid spurious references crypto_aegis128_update_simd crypto: seed - remove trailing semicolon in macro definition crypto: x86/poly1305 - Use TEST %reg,%reg instead of CMP $0,%reg crypto: x86/sha512 - Use TEST %reg,%reg instead of CMP $0,%reg crypto: aesni - Use TEST %reg,%reg instead of CMP $0,%reg crypto: cpt - Fix sparse warnings in cptpf hwrng: ks-sa - Add dependency on IOMEM and OF crypto: lib/blake2s - Move selftest prototype into header file crypto: arm/aes-ce - work around Cortex-A57/A72 silion errata crypto: ecdh - avoid unaligned accesses in ecdh_set_secret() crypto: ccree - rework cache parameters handling crypto: cavium - Use dma_set_mask_and_coherent to simplify code crypto: marvell/octeontx - Use dma_set_mask_and_coherent to simplify code ...
Diffstat (limited to 'arch/arm')
-rw-r--r--arch/arm/crypto/aes-ce-core.S32
-rw-r--r--arch/arm/crypto/aes-neonbs-glue.c8
-rw-r--r--arch/arm/crypto/chacha-glue.c34
-rw-r--r--arch/arm/crypto/chacha-neon-core.S97
-rw-r--r--arch/arm/crypto/sha1-ce-glue.c2
-rw-r--r--arch/arm/crypto/sha1.h2
-rw-r--r--arch/arm/crypto/sha1_glue.c2
-rw-r--r--arch/arm/crypto/sha1_neon_glue.c2
-rw-r--r--arch/arm/crypto/sha2-ce-glue.c2
-rw-r--r--arch/arm/crypto/sha256_glue.c2
-rw-r--r--arch/arm/crypto/sha256_neon_glue.c2
-rw-r--r--arch/arm/crypto/sha512-glue.c2
-rw-r--r--arch/arm/crypto/sha512-neon-glue.c2
13 files changed, 143 insertions, 46 deletions
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S
index 4d1707388d94..312428d83eed 100644
--- a/arch/arm/crypto/aes-ce-core.S
+++ b/arch/arm/crypto/aes-ce-core.S
@@ -386,20 +386,32 @@ ENTRY(ce_aes_ctr_encrypt)
.Lctrloop4x:
subs r4, r4, #4
bmi .Lctr1x
- add r6, r6, #1
+
+ /*
+ * NOTE: the sequence below has been carefully tweaked to avoid
+ * a silicon erratum that exists in Cortex-A57 (#1742098) and
+ * Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs
+ * may produce an incorrect result if they take their input from a
+ * register of which a single 32-bit lane has been updated the last
+ * time it was modified. To work around this, the lanes of registers
+ * q0-q3 below are not manipulated individually, and the different
+ * counter values are prepared by successive manipulations of q7.
+ */
+ add ip, r6, #1
vmov q0, q7
+ rev ip, ip
+ add lr, r6, #2
+ vmov s31, ip @ set lane 3 of q1 via q7
+ add ip, r6, #3
+ rev lr, lr
vmov q1, q7
- rev ip, r6
- add r6, r6, #1
+ vmov s31, lr @ set lane 3 of q2 via q7
+ rev ip, ip
vmov q2, q7
- vmov s7, ip
- rev ip, r6
- add r6, r6, #1
+ vmov s31, ip @ set lane 3 of q3 via q7
+ add r6, r6, #4
vmov q3, q7
- vmov s11, ip
- rev ip, r6
- add r6, r6, #1
- vmov s15, ip
+
vld1.8 {q4-q5}, [r1]!
vld1.8 {q6}, [r1]!
vld1.8 {q15}, [r1]!
diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c
index bda8bf17631e..f70af1d0514b 100644
--- a/arch/arm/crypto/aes-neonbs-glue.c
+++ b/arch/arm/crypto/aes-neonbs-glue.c
@@ -19,7 +19,7 @@ MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("ecb(aes)");
-MODULE_ALIAS_CRYPTO("cbc(aes)");
+MODULE_ALIAS_CRYPTO("cbc(aes)-all");
MODULE_ALIAS_CRYPTO("ctr(aes)");
MODULE_ALIAS_CRYPTO("xts(aes)");
@@ -191,7 +191,8 @@ static int cbc_init(struct crypto_skcipher *tfm)
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
unsigned int reqsize;
- ctx->enc_tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
+ ctx->enc_tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK);
if (IS_ERR(ctx->enc_tfm))
return PTR_ERR(ctx->enc_tfm);
@@ -441,7 +442,8 @@ static struct skcipher_alg aes_algs[] = { {
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
.base.cra_module = THIS_MODULE,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL |
+ CRYPTO_ALG_NEED_FALLBACK,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c
index 59da6c0b63b6..7b5cf8430c6d 100644
--- a/arch/arm/crypto/chacha-glue.c
+++ b/arch/arm/crypto/chacha-glue.c
@@ -23,7 +23,7 @@
asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
int nrounds);
asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
- int nrounds);
+ int nrounds, unsigned int nbytes);
asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
@@ -42,24 +42,24 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
{
u8 buf[CHACHA_BLOCK_SIZE];
- while (bytes >= CHACHA_BLOCK_SIZE * 4) {
- chacha_4block_xor_neon(state, dst, src, nrounds);
- bytes -= CHACHA_BLOCK_SIZE * 4;
- src += CHACHA_BLOCK_SIZE * 4;
- dst += CHACHA_BLOCK_SIZE * 4;
- state[12] += 4;
- }
- while (bytes >= CHACHA_BLOCK_SIZE) {
- chacha_block_xor_neon(state, dst, src, nrounds);
- bytes -= CHACHA_BLOCK_SIZE;
- src += CHACHA_BLOCK_SIZE;
- dst += CHACHA_BLOCK_SIZE;
- state[12]++;
+ while (bytes > CHACHA_BLOCK_SIZE) {
+ unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U);
+
+ chacha_4block_xor_neon(state, dst, src, nrounds, l);
+ bytes -= l;
+ src += l;
+ dst += l;
+ state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
}
if (bytes) {
- memcpy(buf, src, bytes);
- chacha_block_xor_neon(state, buf, buf, nrounds);
- memcpy(dst, buf, bytes);
+ const u8 *s = src;
+ u8 *d = dst;
+
+ if (bytes != CHACHA_BLOCK_SIZE)
+ s = d = memcpy(buf, src, bytes);
+ chacha_block_xor_neon(state, d, s, nrounds);
+ if (d != dst)
+ memcpy(dst, buf, bytes);
}
}
diff --git a/arch/arm/crypto/chacha-neon-core.S b/arch/arm/crypto/chacha-neon-core.S
index eb22926d4912..13d12f672656 100644
--- a/arch/arm/crypto/chacha-neon-core.S
+++ b/arch/arm/crypto/chacha-neon-core.S
@@ -47,6 +47,7 @@
*/
#include <linux/linkage.h>
+#include <asm/cache.h>
.text
.fpu neon
@@ -205,7 +206,7 @@ ENDPROC(hchacha_block_neon)
.align 5
ENTRY(chacha_4block_xor_neon)
- push {r4-r5}
+ push {r4, lr}
mov r4, sp // preserve the stack pointer
sub ip, sp, #0x20 // allocate a 32 byte buffer
bic ip, ip, #0x1f // aligned to 32 bytes
@@ -229,10 +230,10 @@ ENTRY(chacha_4block_xor_neon)
vld1.32 {q0-q1}, [r0]
vld1.32 {q2-q3}, [ip]
- adr r5, .Lctrinc
+ adr lr, .Lctrinc
vdup.32 q15, d7[1]
vdup.32 q14, d7[0]
- vld1.32 {q4}, [r5, :128]
+ vld1.32 {q4}, [lr, :128]
vdup.32 q13, d6[1]
vdup.32 q12, d6[0]
vdup.32 q11, d5[1]
@@ -455,7 +456,7 @@ ENTRY(chacha_4block_xor_neon)
// Re-interleave the words in the first two rows of each block (x0..7).
// Also add the counter values 0-3 to x12[0-3].
- vld1.32 {q8}, [r5, :128] // load counter values 0-3
+ vld1.32 {q8}, [lr, :128] // load counter values 0-3
vzip.32 q0, q1 // => (0 1 0 1) (0 1 0 1)
vzip.32 q2, q3 // => (2 3 2 3) (2 3 2 3)
vzip.32 q4, q5 // => (4 5 4 5) (4 5 4 5)
@@ -493,6 +494,8 @@ ENTRY(chacha_4block_xor_neon)
// Re-interleave the words in the last two rows of each block (x8..15).
vld1.32 {q8-q9}, [sp, :256]
+ mov sp, r4 // restore original stack pointer
+ ldr r4, [r4, #8] // load number of bytes
vzip.32 q12, q13 // => (12 13 12 13) (12 13 12 13)
vzip.32 q14, q15 // => (14 15 14 15) (14 15 14 15)
vzip.32 q8, q9 // => (8 9 8 9) (8 9 8 9)
@@ -520,41 +523,121 @@ ENTRY(chacha_4block_xor_neon)
// XOR the rest of the data with the keystream
vld1.8 {q0-q1}, [r2]!
+ subs r4, r4, #96
veor q0, q0, q8
veor q1, q1, q12
+ ble .Lle96
vst1.8 {q0-q1}, [r1]!
vld1.8 {q0-q1}, [r2]!
+ subs r4, r4, #32
veor q0, q0, q2
veor q1, q1, q6
+ ble .Lle128
vst1.8 {q0-q1}, [r1]!
vld1.8 {q0-q1}, [r2]!
+ subs r4, r4, #32
veor q0, q0, q10
veor q1, q1, q14
+ ble .Lle160
vst1.8 {q0-q1}, [r1]!
vld1.8 {q0-q1}, [r2]!
+ subs r4, r4, #32
veor q0, q0, q4
veor q1, q1, q5
+ ble .Lle192
vst1.8 {q0-q1}, [r1]!
vld1.8 {q0-q1}, [r2]!
+ subs r4, r4, #32
veor q0, q0, q9
veor q1, q1, q13
+ ble .Lle224
vst1.8 {q0-q1}, [r1]!
vld1.8 {q0-q1}, [r2]!
+ subs r4, r4, #32
veor q0, q0, q3
veor q1, q1, q7
+ blt .Llt256
+.Lout:
vst1.8 {q0-q1}, [r1]!
vld1.8 {q0-q1}, [r2]
- mov sp, r4 // restore original stack pointer
veor q0, q0, q11
veor q1, q1, q15
vst1.8 {q0-q1}, [r1]
- pop {r4-r5}
- bx lr
+ pop {r4, pc}
+
+.Lle192:
+ vmov q4, q9
+ vmov q5, q13
+
+.Lle160:
+ // nothing to do
+
+.Lfinalblock:
+ // Process the final block if processing less than 4 full blocks.
+ // Entered with 32 bytes of ChaCha cipher stream in q4-q5, and the
+ // previous 32 byte output block that still needs to be written at
+ // [r1] in q0-q1.
+ beq .Lfullblock
+
+.Lpartialblock:
+ adr lr, .Lpermute + 32
+ add r2, r2, r4
+ add lr, lr, r4
+ add r4, r4, r1
+
+ vld1.8 {q2-q3}, [lr]
+ vld1.8 {q6-q7}, [r2]
+
+ add r4, r4, #32
+
+ vtbl.8 d4, {q4-q5}, d4
+ vtbl.8 d5, {q4-q5}, d5
+ vtbl.8 d6, {q4-q5}, d6
+ vtbl.8 d7, {q4-q5}, d7
+
+ veor q6, q6, q2
+ veor q7, q7, q3
+
+ vst1.8 {q6-q7}, [r4] // overlapping stores
+ vst1.8 {q0-q1}, [r1]
+ pop {r4, pc}
+
+.Lfullblock:
+ vmov q11, q4
+ vmov q15, q5
+ b .Lout
+.Lle96:
+ vmov q4, q2
+ vmov q5, q6
+ b .Lfinalblock
+.Lle128:
+ vmov q4, q10
+ vmov q5, q14
+ b .Lfinalblock
+.Lle224:
+ vmov q4, q3
+ vmov q5, q7
+ b .Lfinalblock
+.Llt256:
+ vmov q4, q11
+ vmov q5, q15
+ b .Lpartialblock
ENDPROC(chacha_4block_xor_neon)
+
+ .align L1_CACHE_SHIFT
+.Lpermute:
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+ .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
+ .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+ .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
+ .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c
index e79b1fb4b4dc..de9100c67b37 100644
--- a/arch/arm/crypto/sha1-ce-glue.c
+++ b/arch/arm/crypto/sha1-ce-glue.c
@@ -7,7 +7,7 @@
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
#include <crypto/sha1_base.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
diff --git a/arch/arm/crypto/sha1.h b/arch/arm/crypto/sha1.h
index 758db3e9ff0a..b1b7e21da2c3 100644
--- a/arch/arm/crypto/sha1.h
+++ b/arch/arm/crypto/sha1.h
@@ -3,7 +3,7 @@
#define ASM_ARM_CRYPTO_SHA1_H
#include <linux/crypto.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
extern int sha1_update_arm(struct shash_desc *desc, const u8 *data,
unsigned int len);
diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c
index 4e954b3f7ecd..6c2b849e459d 100644
--- a/arch/arm/crypto/sha1_glue.c
+++ b/arch/arm/crypto/sha1_glue.c
@@ -15,7 +15,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
#include <crypto/sha1_base.h>
#include <asm/byteorder.h>
diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c
index 0071e5e4411a..cfe36ae0f3f5 100644
--- a/arch/arm/crypto/sha1_neon_glue.c
+++ b/arch/arm/crypto/sha1_neon_glue.c
@@ -19,7 +19,7 @@
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/types.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
#include <crypto/sha1_base.h>
#include <asm/neon.h>
#include <asm/simd.h>
diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c
index 87f0b62386c6..c62ce89dd3e0 100644
--- a/arch/arm/crypto/sha2-ce-glue.c
+++ b/arch/arm/crypto/sha2-ce-glue.c
@@ -7,7 +7,7 @@
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
#include <crypto/sha256_base.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c
index b8a4f79020cf..433ee4ddce6c 100644
--- a/arch/arm/crypto/sha256_glue.c
+++ b/arch/arm/crypto/sha256_glue.c
@@ -17,7 +17,7 @@
#include <linux/mm.h>
#include <linux/types.h>
#include <linux/string.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
#include <crypto/sha256_base.h>
#include <asm/simd.h>
#include <asm/neon.h>
diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c
index 79820b9e2541..701706262ef3 100644
--- a/arch/arm/crypto/sha256_neon_glue.c
+++ b/arch/arm/crypto/sha256_neon_glue.c
@@ -13,7 +13,7 @@
#include <crypto/internal/simd.h>
#include <linux/types.h>
#include <linux/string.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
#include <crypto/sha256_base.h>
#include <asm/byteorder.h>
#include <asm/simd.h>
diff --git a/arch/arm/crypto/sha512-glue.c b/arch/arm/crypto/sha512-glue.c
index 8775aa42bbbe..0635a65aa488 100644
--- a/arch/arm/crypto/sha512-glue.c
+++ b/arch/arm/crypto/sha512-glue.c
@@ -6,7 +6,7 @@
*/
#include <crypto/internal/hash.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
#include <crypto/sha512_base.h>
#include <linux/crypto.h>
#include <linux/module.h>
diff --git a/arch/arm/crypto/sha512-neon-glue.c b/arch/arm/crypto/sha512-neon-glue.c
index 96cb94403540..c879ad32db51 100644
--- a/arch/arm/crypto/sha512-neon-glue.c
+++ b/arch/arm/crypto/sha512-neon-glue.c
@@ -7,7 +7,7 @@
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
#include <crypto/sha512_base.h>
#include <linux/crypto.h>
#include <linux/module.h>