summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/crypto/sha1_avx2_x86_64_asm.S67
-rw-r--r--arch/x86/crypto/sha1_ssse3_glue.c2
-rw-r--r--crypto/chacha20_generic.c9
-rw-r--r--crypto/testmgr.h7
-rw-r--r--drivers/crypto/inside-secure/safexcel_hash.c8
-rw-r--r--drivers/crypto/ixp4xx_crypto.c6
-rw-r--r--lib/mpi/mpicoder.c4
7 files changed, 61 insertions, 42 deletions
diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
index 1cd792db15ef..1eab79c9ac48 100644
--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
@@ -117,11 +117,10 @@
.set T1, REG_T1
.endm
-#define K_BASE %r8
#define HASH_PTR %r9
+#define BLOCKS_CTR %r8
#define BUFFER_PTR %r10
#define BUFFER_PTR2 %r13
-#define BUFFER_END %r11
#define PRECALC_BUF %r14
#define WK_BUF %r15
@@ -205,14 +204,14 @@
* blended AVX2 and ALU instruction scheduling
* 1 vector iteration per 8 rounds
*/
- vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP
+ vmovdqu (i * 2)(BUFFER_PTR), W_TMP
.elseif ((i & 7) == 1)
- vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\
+ vinsertf128 $1, ((i-1) * 2)(BUFFER_PTR2),\
WY_TMP, WY_TMP
.elseif ((i & 7) == 2)
vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY
.elseif ((i & 7) == 4)
- vpaddd K_XMM(K_BASE), WY, WY_TMP
+ vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
.elseif ((i & 7) == 7)
vmovdqu WY_TMP, PRECALC_WK(i&~7)
@@ -255,7 +254,7 @@
vpxor WY, WY_TMP, WY_TMP
.elseif ((i & 7) == 7)
vpxor WY_TMP2, WY_TMP, WY
- vpaddd K_XMM(K_BASE), WY, WY_TMP
+ vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
vmovdqu WY_TMP, PRECALC_WK(i&~7)
PRECALC_ROTATE_WY
@@ -291,7 +290,7 @@
vpsrld $30, WY, WY
vpor WY, WY_TMP, WY
.elseif ((i & 7) == 7)
- vpaddd K_XMM(K_BASE), WY, WY_TMP
+ vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
vmovdqu WY_TMP, PRECALC_WK(i&~7)
PRECALC_ROTATE_WY
@@ -446,6 +445,16 @@
.endm
+/* Add constant only if (%2 > %3) condition met (uses RTA as temp)
+ * %1 + %2 >= %3 ? %4 : 0
+ */
+.macro ADD_IF_GE a, b, c, d
+ mov \a, RTA
+ add $\d, RTA
+ cmp $\c, \b
+ cmovge RTA, \a
+.endm
+
/*
* macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining
*/
@@ -463,13 +472,16 @@
lea (2*4*80+32)(%rsp), WK_BUF
# Precalc WK for first 2 blocks
- PRECALC_OFFSET = 0
+ ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 2, 64
.set i, 0
.rept 160
PRECALC i
.set i, i + 1
.endr
- PRECALC_OFFSET = 128
+
+ /* Go to next block if needed */
+ ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 3, 128
+ ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128
xchg WK_BUF, PRECALC_BUF
.align 32
@@ -479,8 +491,8 @@ _loop:
* we use K_BASE value as a signal of a last block,
* it is set below by: cmovae BUFFER_PTR, K_BASE
*/
- cmp K_BASE, BUFFER_PTR
- jne _begin
+ test BLOCKS_CTR, BLOCKS_CTR
+ jnz _begin
.align 32
jmp _end
.align 32
@@ -512,10 +524,10 @@ _loop0:
.set j, j+2
.endr
- add $(2*64), BUFFER_PTR /* move to next odd-64-byte block */
- cmp BUFFER_END, BUFFER_PTR /* is current block the last one? */
- cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */
-
+ /* Update Counter */
+ sub $1, BLOCKS_CTR
+ /* Move to the next block only if needed*/
+ ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 4, 128
/*
* rounds
* 60,62,64,66,68
@@ -532,8 +544,8 @@ _loop0:
UPDATE_HASH 12(HASH_PTR), D
UPDATE_HASH 16(HASH_PTR), E
- cmp K_BASE, BUFFER_PTR /* is current block the last one? */
- je _loop
+ test BLOCKS_CTR, BLOCKS_CTR
+ jz _loop
mov TB, B
@@ -575,10 +587,10 @@ _loop2:
.set j, j+2
.endr
- add $(2*64), BUFFER_PTR2 /* move to next even-64-byte block */
-
- cmp BUFFER_END, BUFFER_PTR2 /* is current block the last one */
- cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */
+ /* update counter */
+ sub $1, BLOCKS_CTR
+ /* Move to the next block only if needed*/
+ ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128
jmp _loop3
_loop3:
@@ -641,19 +653,12 @@ _loop3:
avx2_zeroupper
- lea K_XMM_AR(%rip), K_BASE
-
+ /* Setup initial values */
mov CTX, HASH_PTR
mov BUF, BUFFER_PTR
- lea 64(BUF), BUFFER_PTR2
-
- shl $6, CNT /* mul by 64 */
- add BUF, CNT
- add $64, CNT
- mov CNT, BUFFER_END
- cmp BUFFER_END, BUFFER_PTR2
- cmovae K_BASE, BUFFER_PTR2
+ mov BUF, BUFFER_PTR2
+ mov CNT, BLOCKS_CTR
xmm_mov BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index f960a043cdeb..fc61739150e7 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -201,7 +201,7 @@ asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
static bool avx2_usable(void)
{
- if (false && avx_usable() && boot_cpu_has(X86_FEATURE_AVX2)
+ if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2)
&& boot_cpu_has(X86_FEATURE_BMI1)
&& boot_cpu_has(X86_FEATURE_BMI2))
return true;
diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c
index 8b3c04d625c3..4a45fa4890c0 100644
--- a/crypto/chacha20_generic.c
+++ b/crypto/chacha20_generic.c
@@ -91,9 +91,14 @@ int crypto_chacha20_crypt(struct skcipher_request *req)
crypto_chacha20_init(state, ctx, walk.iv);
while (walk.nbytes > 0) {
+ unsigned int nbytes = walk.nbytes;
+
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, walk.stride);
+
chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
- walk.nbytes);
- err = skcipher_walk_done(&walk, 0);
+ nbytes);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return err;
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 6ceb0e2758bb..d54971d2d1c8 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -32675,6 +32675,10 @@ static const struct cipher_testvec chacha20_enc_tv_template[] = {
"\x5b\x86\x2f\x37\x30\xe3\x7c\xfd"
"\xc4\xfd\x80\x6c\x22\xf2\x21",
.rlen = 375,
+ .also_non_np = 1,
+ .np = 3,
+ .tap = { 375 - 20, 4, 16 },
+
}, { /* RFC7539 A.2. Test Vector #3 */
.key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
"\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
@@ -33049,6 +33053,9 @@ static const struct cipher_testvec chacha20_enc_tv_template[] = {
"\xa1\xed\xad\xd5\x76\xfa\x24\x8f"
"\x98",
.rlen = 1281,
+ .also_non_np = 1,
+ .np = 3,
+ .tap = { 1200, 1, 80 },
},
};
diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c
index 8527a5899a2f..3f819399cd95 100644
--- a/drivers/crypto/inside-secure/safexcel_hash.c
+++ b/drivers/crypto/inside-secure/safexcel_hash.c
@@ -883,10 +883,7 @@ static int safexcel_hmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key,
if (ret)
return ret;
- memcpy(ctx->ipad, &istate.state, SHA1_DIGEST_SIZE);
- memcpy(ctx->opad, &ostate.state, SHA1_DIGEST_SIZE);
-
- for (i = 0; i < ARRAY_SIZE(istate.state); i++) {
+ for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) {
if (ctx->ipad[i] != le32_to_cpu(istate.state[i]) ||
ctx->opad[i] != le32_to_cpu(ostate.state[i])) {
ctx->base.needs_inv = true;
@@ -894,6 +891,9 @@ static int safexcel_hmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key,
}
}
+ memcpy(ctx->ipad, &istate.state, SHA1_DIGEST_SIZE);
+ memcpy(ctx->opad, &ostate.state, SHA1_DIGEST_SIZE);
+
return 0;
}
diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index 427cbe012729..dadc4a808df5 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -1073,7 +1073,7 @@ static int aead_perform(struct aead_request *req, int encrypt,
req_ctx->hmac_virt = dma_pool_alloc(buffer_pool, flags,
&crypt->icv_rev_aes);
if (unlikely(!req_ctx->hmac_virt))
- goto free_buf_src;
+ goto free_buf_dst;
if (!encrypt) {
scatterwalk_map_and_copy(req_ctx->hmac_virt,
req->src, cryptlen, authsize, 0);
@@ -1088,10 +1088,10 @@ static int aead_perform(struct aead_request *req, int encrypt,
BUG_ON(qmgr_stat_overflow(SEND_QID));
return -EINPROGRESS;
-free_buf_src:
- free_buf_chain(dev, req_ctx->src, crypt->src_buf);
free_buf_dst:
free_buf_chain(dev, req_ctx->dst, crypt->dst_buf);
+free_buf_src:
+ free_buf_chain(dev, req_ctx->src, crypt->src_buf);
crypt->ctl_flags = CTL_FLAG_UNUSED;
return -ENOMEM;
}
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 5a0f75a3bf01..eead4b339466 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -364,11 +364,11 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
}
miter.consumed = lzeros;
- sg_miter_stop(&miter);
nbytes -= lzeros;
nbits = nbytes * 8;
if (nbits > MAX_EXTERN_MPI_BITS) {
+ sg_miter_stop(&miter);
pr_info("MPI: mpi too large (%u bits)\n", nbits);
return NULL;
}
@@ -376,6 +376,8 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
if (nbytes > 0)
nbits -= count_leading_zeros(*buff) - (BITS_PER_LONG - 8);
+ sg_miter_stop(&miter);
+
nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
val = mpi_alloc(nlimbs);
if (!val)