summaryrefslogtreecommitdiff
path: root/arch/x86/crypto
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r--arch/x86/crypto/aes_ctrby8_avx-x86_64.S15
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S739
-rw-r--r--arch/x86/crypto/aesni-intel_avx-x86_64.S1
-rw-r--r--arch/x86/crypto/chacha-ssse3-x86_64.S16
-rw-r--r--arch/x86/crypto/chacha_glue.c17
-rw-r--r--arch/x86/crypto/crc32-pclmul_asm.S47
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S7
-rw-r--r--arch/x86/crypto/curve25519-x86_64.c6
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_asm.S17
9 files changed, 418 insertions, 447 deletions
diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
index ec437db1fa54..3f0fc7dd87d7 100644
--- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
+++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
@@ -63,7 +63,6 @@
*/
#include <linux/linkage.h>
-#include <asm/inst.h>
#define VMOVDQ vmovdqu
@@ -127,10 +126,6 @@ ddq_add_8:
/* generate a unique variable for ddq_add_x */
-.macro setddq n
- var_ddq_add = ddq_add_\n
-.endm
-
/* generate a unique variable for xmm register */
.macro setxdata n
var_xdata = %xmm\n
@@ -140,9 +135,7 @@ ddq_add_8:
.macro club name, id
.altmacro
- .if \name == DDQ_DATA
- setddq %\id
- .elseif \name == XDATA
+ .if \name == XDATA
setxdata %\id
.endif
.noaltmacro
@@ -165,9 +158,8 @@ ddq_add_8:
.set i, 1
.rept (by - 1)
- club DDQ_DATA, i
club XDATA, i
- vpaddq var_ddq_add(%rip), xcounter, var_xdata
+ vpaddq (ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata
vptest ddq_low_msk(%rip), var_xdata
jnz 1f
vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata
@@ -180,8 +172,7 @@ ddq_add_8:
vmovdqa 1*16(p_keys), xkeyA
vpxor xkey0, xdata0, xdata0
- club DDQ_DATA, by
- vpaddq var_ddq_add(%rip), xcounter, xcounter
+ vpaddq (ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter
vptest ddq_low_msk(%rip), xcounter
jnz 1f
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 54e7d15dbd0d..1852b19a73a0 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -26,7 +26,6 @@
*/
#include <linux/linkage.h>
-#include <asm/inst.h>
#include <asm/frame.h>
#include <asm/nospec-branch.h>
@@ -201,7 +200,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
mov \SUBKEY, %r12
movdqu (%r12), \TMP3
movdqa SHUF_MASK(%rip), \TMP2
- PSHUFB_XMM \TMP2, \TMP3
+ pshufb \TMP2, \TMP3
# precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
@@ -263,10 +262,10 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv
movdqa SHUF_MASK(%rip), %xmm2
- PSHUFB_XMM %xmm2, %xmm0
+ pshufb %xmm2, %xmm0
movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
- PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7
movdqu HashKey(%arg2), %xmm13
CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \
@@ -347,7 +346,7 @@ _zero_cipher_left_\@:
paddd ONE(%rip), %xmm0 # INCR CNT to get Yn
movdqu %xmm0, CurCount(%arg2)
movdqa SHUF_MASK(%rip), %xmm10
- PSHUFB_XMM %xmm10, %xmm0
+ pshufb %xmm10, %xmm0
ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn)
movdqu %xmm0, PBlockEncKey(%arg2)
@@ -377,7 +376,7 @@ _large_enough_update_\@:
# get the appropriate shuffle mask
movdqu (%r12), %xmm2
# shift right 16-r13 bytes
- PSHUFB_XMM %xmm2, %xmm1
+ pshufb %xmm2, %xmm1
_data_read_\@:
lea ALL_F+16(%rip), %r12
@@ -393,12 +392,12 @@ _data_read_\@:
.ifc \operation, dec
pand %xmm1, %xmm2
movdqa SHUF_MASK(%rip), %xmm10
- PSHUFB_XMM %xmm10 ,%xmm2
+ pshufb %xmm10 ,%xmm2
pxor %xmm2, %xmm8
.else
movdqa SHUF_MASK(%rip), %xmm10
- PSHUFB_XMM %xmm10,%xmm0
+ pshufb %xmm10,%xmm0
pxor %xmm0, %xmm8
.endif
@@ -408,17 +407,17 @@ _data_read_\@:
# GHASH computation for the last <16 byte block
movdqa SHUF_MASK(%rip), %xmm10
# shuffle xmm0 back to output as ciphertext
- PSHUFB_XMM %xmm10, %xmm0
+ pshufb %xmm10, %xmm0
.endif
# Output %r13 bytes
- MOVQ_R64_XMM %xmm0, %rax
+ movq %xmm0, %rax
cmp $8, %r13
jle _less_than_8_bytes_left_\@
mov %rax, (%arg3 , %r11, 1)
add $8, %r11
psrldq $8, %xmm0
- MOVQ_R64_XMM %xmm0, %rax
+ movq %xmm0, %rax
sub $8, %r13
_less_than_8_bytes_left_\@:
mov %al, (%arg3, %r11, 1)
@@ -449,7 +448,7 @@ _partial_done\@:
movd %r12d, %xmm15 # len(A) in %xmm15
mov InLen(%arg2), %r12
shl $3, %r12 # len(C) in bits (*128)
- MOVQ_R64_XMM %r12, %xmm1
+ movq %r12, %xmm1
pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000
pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C)
@@ -457,7 +456,7 @@ _partial_done\@:
GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
# final GHASH computation
movdqa SHUF_MASK(%rip), %xmm10
- PSHUFB_XMM %xmm10, %xmm8
+ pshufb %xmm10, %xmm8
movdqu OrigIV(%arg2), %xmm0 # %xmm0 = Y0
ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0)
@@ -470,7 +469,7 @@ _return_T_\@:
cmp $8, %r11
jl _T_4_\@
_T_8_\@:
- MOVQ_R64_XMM %xmm0, %rax
+ movq %xmm0, %rax
mov %rax, (%r10)
add $8, %r10
sub $8, %r11
@@ -518,9 +517,9 @@ _return_T_done_\@:
pshufd $78, \HK, \TMP3
pxor \GH, \TMP2 # TMP2 = a1+a0
pxor \HK, \TMP3 # TMP3 = b1+b0
- PCLMULQDQ 0x11, \HK, \TMP1 # TMP1 = a1*b1
- PCLMULQDQ 0x00, \HK, \GH # GH = a0*b0
- PCLMULQDQ 0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0)
+ pclmulqdq $0x11, \HK, \TMP1 # TMP1 = a1*b1
+ pclmulqdq $0x00, \HK, \GH # GH = a0*b0
+ pclmulqdq $0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0)
pxor \GH, \TMP2
pxor \TMP1, \TMP2 # TMP2 = (a0*b0)+(a1*b0)
movdqa \TMP2, \TMP3
@@ -570,7 +569,7 @@ _return_T_done_\@:
cmp $8, \DLEN
jl _read_lt8_\@
mov (\DPTR), %rax
- MOVQ_R64_XMM %rax, \XMMDst
+ movq %rax, \XMMDst
sub $8, \DLEN
jz _done_read_partial_block_\@
xor %eax, %eax
@@ -579,7 +578,7 @@ _read_next_byte_\@:
mov 7(\DPTR, \DLEN, 1), %al
dec \DLEN
jnz _read_next_byte_\@
- MOVQ_R64_XMM %rax, \XMM1
+ movq %rax, \XMM1
pslldq $8, \XMM1
por \XMM1, \XMMDst
jmp _done_read_partial_block_\@
@@ -590,7 +589,7 @@ _read_next_byte_lt8_\@:
mov -1(\DPTR, \DLEN, 1), %al
dec \DLEN
jnz _read_next_byte_lt8_\@
- MOVQ_R64_XMM %rax, \XMMDst
+ movq %rax, \XMMDst
_done_read_partial_block_\@:
.endm
@@ -608,7 +607,7 @@ _done_read_partial_block_\@:
jl _get_AAD_rest\@
_get_AAD_blocks\@:
movdqu (%r10), \TMP7
- PSHUFB_XMM %xmm14, \TMP7 # byte-reflect the AAD data
+ pshufb %xmm14, \TMP7 # byte-reflect the AAD data
pxor \TMP7, \TMP6
GHASH_MUL \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
add $16, %r10
@@ -624,7 +623,7 @@ _get_AAD_rest\@:
je _get_AAD_done\@
READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7
- PSHUFB_XMM %xmm14, \TMP7 # byte-reflect the AAD data
+ pshufb %xmm14, \TMP7 # byte-reflect the AAD data
pxor \TMP6, \TMP7
GHASH_MUL \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
movdqu \TMP7, \TMP6
@@ -667,7 +666,7 @@ _data_read_\@: # Finished reading in data
# r16-r13 is the number of bytes in plaintext mod 16)
add %r13, %r12
movdqu (%r12), %xmm2 # get the appropriate shuffle mask
- PSHUFB_XMM %xmm2, %xmm9 # shift right r13 bytes
+ pshufb %xmm2, %xmm9 # shift right r13 bytes
.ifc \operation, dec
movdqa %xmm1, %xmm3
@@ -689,8 +688,8 @@ _no_extra_mask_1_\@:
pand %xmm1, %xmm3
movdqa SHUF_MASK(%rip), %xmm10
- PSHUFB_XMM %xmm10, %xmm3
- PSHUFB_XMM %xmm2, %xmm3
+ pshufb %xmm10, %xmm3
+ pshufb %xmm2, %xmm3
pxor %xmm3, \AAD_HASH
cmp $0, %r10
@@ -724,8 +723,8 @@ _no_extra_mask_2_\@:
pand %xmm1, %xmm9
movdqa SHUF_MASK(%rip), %xmm1
- PSHUFB_XMM %xmm1, %xmm9
- PSHUFB_XMM %xmm2, %xmm9
+ pshufb %xmm1, %xmm9
+ pshufb %xmm2, %xmm9
pxor %xmm9, \AAD_HASH
cmp $0, %r10
@@ -744,8 +743,8 @@ _encode_done_\@:
movdqa SHUF_MASK(%rip), %xmm10
# shuffle xmm9 back to output as ciphertext
- PSHUFB_XMM %xmm10, %xmm9
- PSHUFB_XMM %xmm2, %xmm9
+ pshufb %xmm10, %xmm9
+ pshufb %xmm2, %xmm9
.endif
# output encrypted Bytes
cmp $0, %r10
@@ -759,14 +758,14 @@ _partial_fill_\@:
mov \PLAIN_CYPH_LEN, %r13
_count_set_\@:
movdqa %xmm9, %xmm0
- MOVQ_R64_XMM %xmm0, %rax
+ movq %xmm0, %rax
cmp $8, %r13
jle _less_than_8_bytes_left_\@
mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
add $8, \DATA_OFFSET
psrldq $8, %xmm0
- MOVQ_R64_XMM %xmm0, %rax
+ movq %xmm0, %rax
sub $8, %r13
_less_than_8_bytes_left_\@:
movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
@@ -810,7 +809,7 @@ _partial_block_done_\@:
.else
MOVADQ \XMM0, %xmm\index
.endif
- PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap
+ pshufb %xmm14, %xmm\index # perform a 16 byte swap
pxor \TMP2, %xmm\index
.endr
lea 0x10(%arg1),%r10
@@ -821,7 +820,7 @@ _partial_block_done_\@:
aes_loop_initial_\@:
MOVADQ (%r10),\TMP1
.irpc index, \i_seq
- AESENC \TMP1, %xmm\index
+ aesenc \TMP1, %xmm\index
.endr
add $16,%r10
sub $1,%eax
@@ -829,7 +828,7 @@ aes_loop_initial_\@:
MOVADQ (%r10), \TMP1
.irpc index, \i_seq
- AESENCLAST \TMP1, %xmm\index # Last Round
+ aesenclast \TMP1, %xmm\index # Last Round
.endr
.irpc index, \i_seq
movdqu (%arg4 , %r11, 1), \TMP1
@@ -841,7 +840,7 @@ aes_loop_initial_\@:
.ifc \operation, dec
movdqa \TMP1, %xmm\index
.endif
- PSHUFB_XMM %xmm14, %xmm\index
+ pshufb %xmm14, %xmm\index
# prepare plaintext/ciphertext for GHASH computation
.endr
@@ -876,19 +875,19 @@ aes_loop_initial_\@:
MOVADQ ONE(%RIP),\TMP1
paddd \TMP1, \XMM0 # INCR Y0
MOVADQ \XMM0, \XMM1
- PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
+ pshufb %xmm14, \XMM1 # perform a 16 byte swap
paddd \TMP1, \XMM0 # INCR Y0
MOVADQ \XMM0, \XMM2
- PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
+ pshufb %xmm14, \XMM2 # perform a 16 byte swap
paddd \TMP1, \XMM0 # INCR Y0
MOVADQ \XMM0, \XMM3
- PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
+ pshufb %xmm14, \XMM3 # perform a 16 byte swap
paddd \TMP1, \XMM0 # INCR Y0
MOVADQ \XMM0, \XMM4
- PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
+ pshufb %xmm14, \XMM4 # perform a 16 byte swap
MOVADQ 0(%arg1),\TMP1
pxor \TMP1, \XMM1
@@ -897,17 +896,17 @@ aes_loop_initial_\@:
pxor \TMP1, \XMM4
.irpc index, 1234 # do 4 rounds
movaps 0x10*\index(%arg1), \TMP1
- AESENC \TMP1, \XMM1
- AESENC \TMP1, \XMM2
- AESENC \TMP1, \XMM3
- AESENC \TMP1, \XMM4
+ aesenc \TMP1, \XMM1
+ aesenc \TMP1, \XMM2
+ aesenc \TMP1, \XMM3
+ aesenc \TMP1, \XMM4
.endr
.irpc index, 56789 # do next 5 rounds
movaps 0x10*\index(%arg1), \TMP1
- AESENC \TMP1, \XMM1
- AESENC \TMP1, \XMM2
- AESENC \TMP1, \XMM3
- AESENC \TMP1, \XMM4
+ aesenc \TMP1, \XMM1
+ aesenc \TMP1, \XMM2
+ aesenc \TMP1, \XMM3
+ aesenc \TMP1, \XMM4
.endr
lea 0xa0(%arg1),%r10
mov keysize,%eax
@@ -918,7 +917,7 @@ aes_loop_initial_\@:
aes_loop_pre_\@:
MOVADQ (%r10),\TMP2
.irpc index, 1234
- AESENC \TMP2, %xmm\index
+ aesenc \TMP2, %xmm\index
.endr
add $16,%r10
sub $1,%eax
@@ -926,10 +925,10 @@ aes_loop_pre_\@:
aes_loop_pre_done\@:
MOVADQ (%r10), \TMP2
- AESENCLAST \TMP2, \XMM1
- AESENCLAST \TMP2, \XMM2
- AESENCLAST \TMP2, \XMM3
- AESENCLAST \TMP2, \XMM4
+ aesenclast \TMP2, \XMM1
+ aesenclast \TMP2, \XMM2
+ aesenclast \TMP2, \XMM3
+ aesenclast \TMP2, \XMM4
movdqu 16*0(%arg4 , %r11 , 1), \TMP1
pxor \TMP1, \XMM1
.ifc \operation, dec
@@ -961,12 +960,12 @@ aes_loop_pre_done\@:
.endif
add $64, %r11
- PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
+ pshufb %xmm14, \XMM1 # perform a 16 byte swap
pxor \XMMDst, \XMM1
# combine GHASHed value with the corresponding ciphertext
- PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
- PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
- PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
+ pshufb %xmm14, \XMM2 # perform a 16 byte swap
+ pshufb %xmm14, \XMM3 # perform a 16 byte swap
+ pshufb %xmm14, \XMM4 # perform a 16 byte swap
_initial_blocks_done\@:
@@ -978,7 +977,7 @@ _initial_blocks_done\@:
* arg1, %arg3, %arg4 are used as pointers only, not modified
* %r11 is the data offset value
*/
-.macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \
+.macro GHASH_4_ENCRYPT_4_PARALLEL_enc TMP1 TMP2 TMP3 TMP4 TMP5 \
TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM1, \XMM5
@@ -994,7 +993,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
pxor \XMM5, \TMP6
paddd ONE(%rip), \XMM0 # INCR CNT
movdqu HashKey_4(%arg2), \TMP5
- PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1
+ pclmulqdq $0x11, \TMP5, \TMP4 # TMP4 = a1*b1
movdqa \XMM0, \XMM1
paddd ONE(%rip), \XMM0 # INCR CNT
movdqa \XMM0, \XMM2
@@ -1002,51 +1001,51 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM0, \XMM3
paddd ONE(%rip), \XMM0 # INCR CNT
movdqa \XMM0, \XMM4
- PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap
- PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0
- PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap
- PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap
- PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap
+ pshufb %xmm15, \XMM1 # perform a 16 byte swap
+ pclmulqdq $0x00, \TMP5, \XMM5 # XMM5 = a0*b0
+ pshufb %xmm15, \XMM2 # perform a 16 byte swap
+ pshufb %xmm15, \XMM3 # perform a 16 byte swap
+ pshufb %xmm15, \XMM4 # perform a 16 byte swap
pxor (%arg1), \XMM1
pxor (%arg1), \XMM2
pxor (%arg1), \XMM3
pxor (%arg1), \XMM4
movdqu HashKey_4_k(%arg2), \TMP5
- PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
+ pclmulqdq $0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
movaps 0x10(%arg1), \TMP1
- AESENC \TMP1, \XMM1 # Round 1
- AESENC \TMP1, \XMM2
- AESENC \TMP1, \XMM3
- AESENC \TMP1, \XMM4
+ aesenc \TMP1, \XMM1 # Round 1
+ aesenc \TMP1, \XMM2
+ aesenc \TMP1, \XMM3
+ aesenc \TMP1, \XMM4
movaps 0x20(%arg1), \TMP1
- AESENC \TMP1, \XMM1 # Round 2
- AESENC \TMP1, \XMM2
- AESENC \TMP1, \XMM3
- AESENC \TMP1, \XMM4
+ aesenc \TMP1, \XMM1 # Round 2
+ aesenc \TMP1, \XMM2
+ aesenc \TMP1, \XMM3
+ aesenc \TMP1, \XMM4
movdqa \XMM6, \TMP1
pshufd $78, \XMM6, \TMP2
pxor \XMM6, \TMP2
movdqu HashKey_3(%arg2), \TMP5
- PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
+ pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
movaps 0x30(%arg1), \TMP3
- AESENC \TMP3, \XMM1 # Round 3
- AESENC \TMP3, \XMM2
- AESENC \TMP3, \XMM3
- AESENC \TMP3, \XMM4
- PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0
+ aesenc \TMP3, \XMM1 # Round 3
+ aesenc \TMP3, \XMM2
+ aesenc \TMP3, \XMM3
+ aesenc \TMP3, \XMM4
+ pclmulqdq $0x00, \TMP5, \XMM6 # XMM6 = a0*b0
movaps 0x40(%arg1), \TMP3
- AESENC \TMP3, \XMM1 # Round 4
- AESENC \TMP3, \XMM2
- AESENC \TMP3, \XMM3
- AESENC \TMP3, \XMM4
+ aesenc \TMP3, \XMM1 # Round 4
+ aesenc \TMP3, \XMM2
+ aesenc \TMP3, \XMM3
+ aesenc \TMP3, \XMM4
movdqu HashKey_3_k(%arg2), \TMP5
- PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
+ pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x50(%arg1), \TMP3
- AESENC \TMP3, \XMM1 # Round 5
- AESENC \TMP3, \XMM2
- AESENC \TMP3, \XMM3
- AESENC \TMP3, \XMM4
+ aesenc \TMP3, \XMM1 # Round 5
+ aesenc \TMP3, \XMM2
+ aesenc \TMP3, \XMM3
+ aesenc \TMP3, \XMM4
pxor \TMP1, \TMP4
# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
pxor \XMM6, \XMM5
@@ -1058,25 +1057,25 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
# Multiply TMP5 * HashKey using karatsuba
- PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
+ pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
movaps 0x60(%arg1), \TMP3
- AESENC \TMP3, \XMM1 # Round 6
- AESENC \TMP3, \XMM2
- AESENC \TMP3, \XMM3
- AESENC \TMP3, \XMM4
- PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0
+ aesenc \TMP3, \XMM1 # Round 6
+ aesenc \TMP3, \XMM2
+ aesenc \TMP3, \XMM3
+ aesenc \TMP3, \XMM4
+ pclmulqdq $0x00, \TMP5, \XMM7 # XMM7 = a0*b0
movaps 0x70(%arg1), \TMP3
- AESENC \TMP3, \XMM1 # Round 7
- AESENC \TMP3, \XMM2
- AESENC \TMP3, \XMM3
- AESENC \TMP3, \XMM4
+ aesenc \TMP3, \XMM1 # Round 7
+ aesenc \TMP3, \XMM2
+ aesenc \TMP3, \XMM3
+ aesenc \TMP3, \XMM4
movdqu HashKey_2_k(%arg2), \TMP5
- PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
+ pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x80(%arg1), \TMP3
- AESENC \TMP3, \XMM1 # Round 8
- AESENC \TMP3, \XMM2
- AESENC \TMP3, \XMM3
- AESENC \TMP3, \XMM4
+ aesenc \TMP3, \XMM1 # Round 8
+ aesenc \TMP3, \XMM2
+ aesenc \TMP3, \XMM3
+ aesenc \TMP3, \XMM4
pxor \TMP1, \TMP4
# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
pxor \XMM7, \XMM5
@@ -1089,13 +1088,13 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
pshufd $78, \XMM8, \TMP2
pxor \XMM8, \TMP2
movdqu HashKey(%arg2), \TMP5
- PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
+ pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
movaps 0x90(%arg1), \TMP3
- AESENC \TMP3, \XMM1 # Round 9
- AESENC \TMP3, \XMM2
- AESENC \TMP3, \XMM3
- AESENC \TMP3, \XMM4
- PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0
+ aesenc \TMP3, \XMM1 # Round 9
+ aesenc \TMP3, \XMM2
+ aesenc \TMP3, \XMM3
+ aesenc \TMP3, \XMM4
+ pclmulqdq $0x00, \TMP5, \XMM8 # XMM8 = a0*b0
lea 0xa0(%arg1),%r10
mov keysize,%eax
shr $2,%eax # 128->4, 192->6, 256->8
@@ -1105,7 +1104,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
aes_loop_par_enc\@:
MOVADQ (%r10),\TMP3
.irpc index, 1234
- AESENC \TMP3, %xmm\index
+ aesenc \TMP3, %xmm\index
.endr
add $16,%r10
sub $1,%eax
@@ -1113,12 +1112,12 @@ aes_loop_par_enc\@:
aes_loop_par_enc_done\@:
MOVADQ (%r10), \TMP3
- AESENCLAST \TMP3, \XMM1 # Round 10
- AESENCLAST \TMP3, \XMM2
- AESENCLAST \TMP3, \XMM3
- AESENCLAST \TMP3, \XMM4
+ aesenclast \TMP3, \XMM1 # Round 10
+ aesenclast \TMP3, \XMM2
+ aesenclast \TMP3, \XMM3
+ aesenclast \TMP3, \XMM4
movdqu HashKey_k(%arg2), \TMP5
- PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
+ pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movdqu (%arg4,%r11,1), \TMP3
pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
movdqu 16(%arg4,%r11,1), \TMP3
@@ -1131,10 +1130,10 @@ aes_loop_par_enc_done\@:
movdqu \XMM2, 16(%arg3,%r11,1) # Write to the ciphertext buffer
movdqu \XMM3, 32(%arg3,%r11,1) # Write to the ciphertext buffer
movdqu \XMM4, 48(%arg3,%r11,1) # Write to the ciphertext buffer
- PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap
- PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap
- PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap
- PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap
+ pshufb %xmm15, \XMM1 # perform a 16 byte swap
+ pshufb %xmm15, \XMM2 # perform a 16 byte swap
+ pshufb %xmm15, \XMM3 # perform a 16 byte swap
+ pshufb %xmm15, \XMM4 # perform a 16 byte swap
pxor \TMP4, \TMP1
pxor \XMM8, \XMM5
@@ -1186,7 +1185,7 @@ aes_loop_par_enc_done\@:
* arg1, %arg3, %arg4 are used as pointers only, not modified
* %r11 is the data offset value
*/
-.macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \
+.macro GHASH_4_ENCRYPT_4_PARALLEL_dec TMP1 TMP2 TMP3 TMP4 TMP5 \
TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM1, \XMM5
@@ -1202,7 +1201,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
pxor \XMM5, \TMP6
paddd ONE(%rip), \XMM0 # INCR CNT
movdqu HashKey_4(%arg2), \TMP5
- PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1
+ pclmulqdq $0x11, \TMP5, \TMP4 # TMP4 = a1*b1
movdqa \XMM0, \XMM1
paddd ONE(%rip), \XMM0 # INCR CNT
movdqa \XMM0, \XMM2
@@ -1210,51 +1209,51 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM0, \XMM3
paddd ONE(%rip), \XMM0 # INCR CNT
movdqa \XMM0, \XMM4
- PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap
- PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0
- PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap
- PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap
- PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap
+ pshufb %xmm15, \XMM1 # perform a 16 byte swap
+ pclmulqdq $0x00, \TMP5, \XMM5 # XMM5 = a0*b0
+ pshufb %xmm15, \XMM2 # perform a 16 byte swap
+ pshufb %xmm15, \XMM3 # perform a 16 byte swap
+ pshufb %xmm15, \XMM4 # perform a 16 byte swap
pxor (%arg1), \XMM1
pxor (%arg1), \XMM2
pxor (%arg1), \XMM3
pxor (%arg1), \XMM4
movdqu HashKey_4_k(%arg2), \TMP5
- PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
+ pclmulqdq $0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
movaps 0x10(%arg1), \TMP1
- AESENC \TMP1, \XMM1 # Round 1
- AESENC \TMP1, \XMM2
- AESENC \TMP1, \XMM3
- AESENC \TMP1, \XMM4
+ aesenc \TMP1, \XMM1 # Round 1
+ aesenc \TMP1, \XMM2
+ aesenc \TMP1, \XMM3
+ aesenc \TMP1, \XMM4
movaps 0x20(%arg1), \TMP1
- AESENC \TMP1, \XMM1 # Round 2
- AESENC \TMP1, \XMM2
- AESENC \TMP1, \XMM3
- AESENC \TMP1, \XMM4
+ aesenc \TMP1, \XMM1 # Round 2
+ aesenc \TMP1, \XMM2
+ aesenc \TMP1, \XMM3
+ aesenc \TMP1, \XMM4
movdqa \XMM6, \TMP1
pshufd $78, \XMM6, \TMP2
pxor \XMM6, \TMP2
movdqu HashKey_3(%arg2), \TMP5
- PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
+ pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
movaps 0x30(%arg1), \TMP3
- AESENC \TMP3, \XMM1 # Round 3
- AESENC \TMP3, \XMM2
- AESENC \TMP3, \XMM3
- AESENC \TMP3, \XMM4
- PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0
+ aesenc \TMP3, \XMM1 # Round 3
+ aesenc \TMP3, \XMM2
+ aesenc \TMP3, \XMM3
+ aesenc \TMP3, \XMM4
+ pclmulqdq $0x00, \TMP5, \XMM6 # XMM6 = a0*b0
movaps 0x40(%arg1), \TMP3
- AESENC \TMP3, \XMM1 # Round 4
- AESENC \TMP3, \XMM2
- AESENC \TMP3, \XMM3
- AESENC \TMP3, \XMM4
+ aesenc \TMP3, \XMM1 # Round 4
+ aesenc \TMP3, \XMM2
+ aesenc \TMP3, \XMM3
+ aesenc \TMP3, \XMM4
movdqu HashKey_3_k(%arg2), \TMP5
- PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
+ pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x50(%arg1), \TMP3
- AESENC \TMP3, \XMM1 # Round 5
- AESENC \TMP3, \XMM2
- AESENC \TMP3, \XMM3
- AESENC \TMP3, \XMM4
+ aesenc \TMP3, \XMM1 # Round 5
+ aesenc \TMP3, \XMM2
+ aesenc \TMP3, \XMM3
+ aesenc \TMP3, \XMM4
pxor \TMP1, \TMP4
# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
pxor \XMM6, \XMM5
@@ -1266,25 +1265,25 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
# Multiply TMP5 * HashKey using karatsuba
- PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
+ pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
movaps 0x60(%arg1), \TMP3
- AESENC \TMP3, \XMM1 # Round 6
- AESENC \TMP3, \XMM2
- AESENC \TMP3, \XMM3
- AESENC \TMP3, \XMM4
- PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0
+ aesenc \TMP3, \XMM1 # Round 6
+ aesenc \TMP3, \XMM2
+ aesenc \TMP3, \XMM3
+ aesenc \TMP3, \XMM4
+ pclmulqdq $0x00, \TMP5, \XMM7 # XMM7 = a0*b0
movaps 0x70(%arg1), \TMP3
- AESENC \TMP3, \XMM1 # Round 7
- AESENC \TMP3, \XMM2
- AESENC \TMP3, \XMM3
- AESENC \TMP3, \XMM4
+ aesenc \TMP3, \XMM1 # Round 7
+ aesenc \TMP3, \XMM2
+ aesenc \TMP3, \XMM3
+ aesenc \TMP3, \XMM4
movdqu HashKey_2_k(%arg2), \TMP5
- PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
+ pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x80(%arg1), \TMP3
- AESENC \TMP3, \XMM1 # Round 8
- AESENC \TMP3, \XMM2
- AESENC \TMP3, \XMM3
- AESENC \TMP3, \XMM4
+ aesenc \TMP3, \XMM1 # Round 8
+ aesenc \TMP3, \XMM2
+ aesenc \TMP3, \XMM3
+ aesenc \TMP3, \XMM4
pxor \TMP1, \TMP4
# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
pxor \XMM7, \XMM5
@@ -1297,13 +1296,13 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
pshufd $78, \XMM8, \TMP2
pxor \XMM8, \TMP2
movdqu HashKey(%arg2), \TMP5
- PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
+ pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
movaps 0x90(%arg1), \TMP3
- AESENC \TMP3, \XMM1 # Round 9
- AESENC \TMP3, \XMM2
- AESENC \TMP3, \XMM3
- AESENC \TMP3, \XMM4
- PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0
+ aesenc \TMP3, \XMM1 # Round 9
+ aesenc \TMP3, \XMM2
+ aesenc \TMP3, \XMM3
+ aesenc \TMP3, \XMM4
+ pclmulqdq $0x00, \TMP5, \XMM8 # XMM8 = a0*b0
lea 0xa0(%arg1),%r10
mov keysize,%eax
shr $2,%eax # 128->4, 192->6, 256->8
@@ -1313,7 +1312,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
aes_loop_par_dec\@:
MOVADQ (%r10),\TMP3
.irpc index, 1234
- AESENC \TMP3, %xmm\index
+ aesenc \TMP3, %xmm\index
.endr
add $16,%r10
sub $1,%eax
@@ -1321,12 +1320,12 @@ aes_loop_par_dec\@:
aes_loop_par_dec_done\@:
MOVADQ (%r10), \TMP3
- AESENCLAST \TMP3, \XMM1 # last round
- AESENCLAST \TMP3, \XMM2
- AESENCLAST \TMP3, \XMM3
- AESENCLAST \TMP3, \XMM4
+ aesenclast \TMP3, \XMM1 # last round
+ aesenclast \TMP3, \XMM2
+ aesenclast \TMP3, \XMM3
+ aesenclast \TMP3, \XMM4
movdqu HashKey_k(%arg2), \TMP5
- PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
+ pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movdqu (%arg4,%r11,1), \TMP3
pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
movdqu \XMM1, (%arg3,%r11,1) # Write to plaintext buffer
@@ -1343,10 +1342,10 @@ aes_loop_par_dec_done\@:
pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK
movdqu \XMM4, 48(%arg3,%r11,1) # Write to plaintext buffer
movdqa \TMP3, \XMM4
- PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap
- PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap
- PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap
- PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap
+ pshufb %xmm15, \XMM1 # perform a 16 byte swap
+ pshufb %xmm15, \XMM2 # perform a 16 byte swap
+ pshufb %xmm15, \XMM3 # perform a 16 byte swap
+ pshufb %xmm15, \XMM4 # perform a 16 byte swap
pxor \TMP4, \TMP1
pxor \XMM8, \XMM5
@@ -1402,10 +1401,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
pshufd $78, \XMM1, \TMP2
pxor \XMM1, \TMP2
movdqu HashKey_4(%arg2), \TMP5
- PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1
- PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0
+ pclmulqdq $0x11, \TMP5, \TMP6 # TMP6 = a1*b1
+ pclmulqdq $0x00, \TMP5, \XMM1 # XMM1 = a0*b0
movdqu HashKey_4_k(%arg2), \TMP4
- PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
+ pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movdqa \XMM1, \XMMDst
movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1
@@ -1415,10 +1414,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
pshufd $78, \XMM2, \TMP2
pxor \XMM2, \TMP2
movdqu HashKey_3(%arg2), \TMP5
- PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
- PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0
+ pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
+ pclmulqdq $0x00, \TMP5, \XMM2 # XMM2 = a0*b0
movdqu HashKey_3_k(%arg2), \TMP4
- PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
+ pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
pxor \TMP1, \TMP6
pxor \XMM2, \XMMDst
pxor \TMP2, \XMM1
@@ -1430,10 +1429,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
pshufd $78, \XMM3, \TMP2
pxor \XMM3, \TMP2
movdqu HashKey_2(%arg2), \TMP5
- PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
- PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0
+ pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
+ pclmulqdq $0x00, \TMP5, \XMM3 # XMM3 = a0*b0
movdqu HashKey_2_k(%arg2), \TMP4
- PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
+ pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
pxor \TMP1, \TMP6
pxor \XMM3, \XMMDst
pxor \TMP2, \XMM1 # results accumulated in TMP6, XMMDst, XMM1
@@ -1443,10 +1442,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
pshufd $78, \XMM4, \TMP2
pxor \XMM4, \TMP2
movdqu HashKey(%arg2), \TMP5
- PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
- PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0
+ pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
+ pclmulqdq $0x00, \TMP5, \XMM4 # XMM4 = a0*b0
movdqu HashKey_k(%arg2), \TMP4
- PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
+ pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
pxor \TMP1, \TMP6
pxor \XMM4, \XMMDst
pxor \XMM1, \TMP2
@@ -1504,13 +1503,13 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
_esb_loop_\@:
MOVADQ (%r10),\TMP1
- AESENC \TMP1,\XMM0
+ aesenc \TMP1,\XMM0
add $16,%r10
sub $1,%eax
jnz _esb_loop_\@
MOVADQ (%r10),\TMP1
- AESENCLAST \TMP1,\XMM0
+ aesenclast \TMP1,\XMM0
.endm
/*****************************************************************************
* void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
@@ -1849,72 +1848,72 @@ SYM_FUNC_START(aesni_set_key)
movups 0x10(UKEYP), %xmm2 # other user key
movaps %xmm2, (TKEYP)
add $0x10, TKEYP
- AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
+ aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
call _key_expansion_256a
- AESKEYGENASSIST 0x1 %xmm0 %xmm1
+ aeskeygenassist $0x1, %xmm0, %xmm1
call _key_expansion_256b
- AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
+ aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
call _key_expansion_256a
- AESKEYGENASSIST 0x2 %xmm0 %xmm1
+ aeskeygenassist $0x2, %xmm0, %xmm1
call _key_expansion_256b
- AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3
+ aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
call _key_expansion_256a
- AESKEYGENASSIST 0x4 %xmm0 %xmm1
+ aeskeygenassist $0x4, %xmm0, %xmm1
call _key_expansion_256b
- AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4
+ aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
call _key_expansion_256a
- AESKEYGENASSIST 0x8 %xmm0 %xmm1
+ aeskeygenassist $0x8, %xmm0, %xmm1
call _key_expansion_256b
- AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5
+ aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
call _key_expansion_256a
- AESKEYGENASSIST 0x10 %xmm0 %xmm1
+ aeskeygenassist $0x10, %xmm0, %xmm1
call _key_expansion_256b
- AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6
+ aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
call _key_expansion_256a
- AESKEYGENASSIST 0x20 %xmm0 %xmm1
+ aeskeygenassist $0x20, %xmm0, %xmm1
call _key_expansion_256b
- AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7
+ aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
call _key_expansion_256a
jmp .Ldec_key
.Lenc_key192:
movq 0x10(UKEYP), %xmm2 # other user key
- AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
+ aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
call _key_expansion_192a
- AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
+ aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
call _key_expansion_192b
- AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3
+ aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
call _key_expansion_192a
- AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4
+ aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
call _key_expansion_192b
- AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5
+ aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
call _key_expansion_192a
- AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6
+ aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
call _key_expansion_192b
- AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7
+ aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
call _key_expansion_192a
- AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8
+ aeskeygenassist $0x80, %xmm2, %xmm1 # round 8
call _key_expansion_192b
jmp .Ldec_key
.Lenc_key128:
- AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1
+ aeskeygenassist $0x1, %xmm0, %xmm1 # round 1
call _key_expansion_128
- AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2
+ aeskeygenassist $0x2, %xmm0, %xmm1 # round 2
call _key_expansion_128
- AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3
+ aeskeygenassist $0x4, %xmm0, %xmm1 # round 3
call _key_expansion_128
- AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4
+ aeskeygenassist $0x8, %xmm0, %xmm1 # round 4
call _key_expansion_128
- AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5
+ aeskeygenassist $0x10, %xmm0, %xmm1 # round 5
call _key_expansion_128
- AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6
+ aeskeygenassist $0x20, %xmm0, %xmm1 # round 6
call _key_expansion_128
- AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7
+ aeskeygenassist $0x40, %xmm0, %xmm1 # round 7
call _key_expansion_128
- AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8
+ aeskeygenassist $0x80, %xmm0, %xmm1 # round 8
call _key_expansion_128
- AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9
+ aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9
call _key_expansion_128
- AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10
+ aeskeygenassist $0x36, %xmm0, %xmm1 # round 10
call _key_expansion_128
.Ldec_key:
sub $0x10, TKEYP
@@ -1927,7 +1926,7 @@ SYM_FUNC_START(aesni_set_key)
.align 4
.Ldec_key_loop:
movaps (KEYP), %xmm0
- AESIMC %xmm0 %xmm1
+ aesimc %xmm0, %xmm1
movaps %xmm1, (UKEYP)
add $0x10, KEYP
sub $0x10, UKEYP
@@ -1988,37 +1987,37 @@ SYM_FUNC_START_LOCAL(_aesni_enc1)
je .Lenc192
add $0x20, TKEYP
movaps -0x60(TKEYP), KEY
- AESENC KEY STATE
+ aesenc KEY, STATE
movaps -0x50(TKEYP), KEY
- AESENC KEY STATE
+ aesenc KEY, STATE
.align 4
.Lenc192:
movaps -0x40(TKEYP), KEY
- AESENC KEY STATE
+ aesenc KEY, STATE
movaps -0x30(TKEYP), KEY
- AESENC KEY STATE
+ aesenc KEY, STATE
.align 4
.Lenc128:
movaps -0x20(TKEYP), KEY
- AESENC KEY STATE
+ aesenc KEY, STATE
movaps -0x10(TKEYP), KEY
- AESENC KEY STATE
+ aesenc KEY, STATE
movaps (TKEYP), KEY
- AESENC KEY STATE
+ aesenc KEY, STATE
movaps 0x10(TKEYP), KEY
- AESENC KEY STATE
+ aesenc KEY, STATE
movaps 0x20(TKEYP), KEY
- AESENC KEY STATE
+ aesenc KEY, STATE
movaps 0x30(TKEYP), KEY
- AESENC KEY STATE
+ aesenc KEY, STATE
movaps 0x40(TKEYP), KEY
- AESENC KEY STATE
+ aesenc KEY, STATE
movaps 0x50(TKEYP), KEY
- AESENC KEY STATE
+ aesenc KEY, STATE
movaps 0x60(TKEYP), KEY
- AESENC KEY STATE
+ aesenc KEY, STATE
movaps 0x70(TKEYP), KEY
- AESENCLAST KEY STATE
+ aesenclast KEY, STATE
ret
SYM_FUNC_END(_aesni_enc1)
@@ -2054,79 +2053,79 @@ SYM_FUNC_START_LOCAL(_aesni_enc4)
je .L4enc192
add $0x20, TKEYP
movaps -0x60(TKEYP), KEY
- AESENC KEY STATE1
- AESENC KEY STATE2
- AESENC KEY STATE3
- AESENC KEY STATE4
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
movaps -0x50(TKEYP), KEY
- AESENC KEY STATE1
- AESENC KEY STATE2
- AESENC KEY STATE3
- AESENC KEY STATE4
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
#.align 4
.L4enc192:
movaps -0x40(TKEYP), KEY
- AESENC KEY STATE1
- AESENC KEY STATE2
- AESENC KEY STATE3
- AESENC KEY STATE4
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
movaps -0x30(TKEYP), KEY
- AESENC KEY STATE1
- AESENC KEY STATE2
- AESENC KEY STATE3
- AESENC KEY STATE4
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
#.align 4
.L4enc128:
movaps -0x20(TKEYP), KEY
- AESENC KEY STATE1
- AESENC KEY STATE2
- AESENC KEY STATE3
- AESENC KEY STATE4
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
movaps -0x10(TKEYP), KEY
- AESENC KEY STATE1
- AESENC KEY STATE2
- AESENC KEY STATE3
- AESENC KEY STATE4
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
movaps (TKEYP), KEY
- AESENC KEY STATE1
- AESENC KEY STATE2
- AESENC KEY STATE3
- AESENC KEY STATE4
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
movaps 0x10(TKEYP), KEY
- AESENC KEY STATE1
- AESENC KEY STATE2
- AESENC KEY STATE3
- AESENC KEY STATE4
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
movaps 0x20(TKEYP), KEY
- AESENC KEY STATE1
- AESENC KEY STATE2
- AESENC KEY STATE3
- AESENC KEY STATE4
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
movaps 0x30(TKEYP), KEY
- AESENC KEY STATE1
- AESENC KEY STATE2
- AESENC KEY STATE3
- AESENC KEY STATE4
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
movaps 0x40(TKEYP), KEY
- AESENC KEY STATE1
- AESENC KEY STATE2
- AESENC KEY STATE3
- AESENC KEY STATE4
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
movaps 0x50(TKEYP), KEY
- AESENC KEY STATE1
- AESENC KEY STATE2
- AESENC KEY STATE3
- AESENC KEY STATE4
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
movaps 0x60(TKEYP), KEY
- AESENC KEY STATE1
- AESENC KEY STATE2
- AESENC KEY STATE3
- AESENC KEY STATE4
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
movaps 0x70(TKEYP), KEY
- AESENCLAST KEY STATE1 # last round
- AESENCLAST KEY STATE2
- AESENCLAST KEY STATE3
- AESENCLAST KEY STATE4
+ aesenclast KEY, STATE1 # last round
+ aesenclast KEY, STATE2
+ aesenclast KEY, STATE3
+ aesenclast KEY, STATE4
ret
SYM_FUNC_END(_aesni_enc4)
@@ -2178,37 +2177,37 @@ SYM_FUNC_START_LOCAL(_aesni_dec1)
je .Ldec192
add $0x20, TKEYP
movaps -0x60(TKEYP), KEY
- AESDEC KEY STATE
+ aesdec KEY, STATE
movaps -0x50(TKEYP), KEY
- AESDEC KEY STATE
+ aesdec KEY, STATE
.align 4
.Ldec192:
movaps -0x40(TKEYP), KEY
- AESDEC KEY STATE
+ aesdec KEY, STATE
movaps -0x30(TKEYP), KEY
- AESDEC KEY STATE
+ aesdec KEY, STATE
.align 4
.Ldec128:
movaps -0x20(TKEYP), KEY
- AESDEC KEY STATE
+ aesdec KEY, STATE
movaps -0x10(TKEYP), KEY
- AESDEC KEY STATE
+ aesdec KEY, STATE
movaps (TKEYP), KEY
- AESDEC KEY STATE
+ aesdec KEY, STATE
movaps 0x10(TKEYP), KEY
- AESDEC KEY STATE
+ aesdec KEY, STATE
movaps 0x20(TKEYP), KEY
- AESDEC KEY STATE
+ aesdec KEY, STATE
movaps 0x30(TKEYP), KEY
- AESDEC KEY STATE
+ aesdec KEY, STATE
movaps 0x40(TKEYP), KEY
- AESDEC KEY STATE
+ aesdec KEY, STATE
movaps 0x50(TKEYP), KEY
- AESDEC KEY STATE
+ aesdec KEY, STATE
movaps 0x60(TKEYP), KEY
- AESDEC KEY STATE
+ aesdec KEY, STATE
movaps 0x70(TKEYP), KEY
- AESDECLAST KEY STATE
+ aesdeclast KEY, STATE
ret
SYM_FUNC_END(_aesni_dec1)
@@ -2244,79 +2243,79 @@ SYM_FUNC_START_LOCAL(_aesni_dec4)
je .L4dec192
add $0x20, TKEYP
movaps -0x60(TKEYP), KEY
- AESDEC KEY STATE1
- AESDEC KEY STATE2
- AESDEC KEY STATE3
- AESDEC KEY STATE4
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
movaps -0x50(TKEYP), KEY
- AESDEC KEY STATE1
- AESDEC KEY STATE2
- AESDEC KEY STATE3
- AESDEC KEY STATE4
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
.align 4
.L4dec192:
movaps -0x40(TKEYP), KEY
- AESDEC KEY STATE1
- AESDEC KEY STATE2
- AESDEC KEY STATE3
- AESDEC KEY STATE4
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
movaps -0x30(TKEYP), KEY
- AESDEC KEY STATE1
- AESDEC KEY STATE2
- AESDEC KEY STATE3
- AESDEC KEY STATE4
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
.align 4
.L4dec128:
movaps -0x20(TKEYP), KEY
- AESDEC KEY STATE1
- AESDEC KEY STATE2
- AESDEC KEY STATE3
- AESDEC KEY STATE4
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
movaps -0x10(TKEYP), KEY
- AESDEC KEY STATE1
- AESDEC KEY STATE2
- AESDEC KEY STATE3
- AESDEC KEY STATE4
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
movaps (TKEYP), KEY
- AESDEC KEY STATE1
- AESDEC KEY STATE2
- AESDEC KEY STATE3
- AESDEC KEY STATE4
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
movaps 0x10(TKEYP), KEY
- AESDEC KEY STATE1
- AESDEC KEY STATE2
- AESDEC KEY STATE3
- AESDEC KEY STATE4
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
movaps 0x20(TKEYP), KEY
- AESDEC KEY STATE1
- AESDEC KEY STATE2
- AESDEC KEY STATE3
- AESDEC KEY STATE4
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
movaps 0x30(TKEYP), KEY
- AESDEC KEY STATE1
- AESDEC KEY STATE2
- AESDEC KEY STATE3
- AESDEC KEY STATE4
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
movaps 0x40(TKEYP), KEY
- AESDEC KEY STATE1
- AESDEC KEY STATE2
- AESDEC KEY STATE3
- AESDEC KEY STATE4
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
movaps 0x50(TKEYP), KEY
- AESDEC KEY STATE1
- AESDEC KEY STATE2
- AESDEC KEY STATE3
- AESDEC KEY STATE4
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
movaps 0x60(TKEYP), KEY
- AESDEC KEY STATE1
- AESDEC KEY STATE2
- AESDEC KEY STATE3
- AESDEC KEY STATE4
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
movaps 0x70(TKEYP), KEY
- AESDECLAST KEY STATE1 # last round
- AESDECLAST KEY STATE2
- AESDECLAST KEY STATE3
- AESDECLAST KEY STATE4
+ aesdeclast KEY, STATE1 # last round
+ aesdeclast KEY, STATE2
+ aesdeclast KEY, STATE3
+ aesdeclast KEY, STATE4
ret
SYM_FUNC_END(_aesni_dec4)
@@ -2599,10 +2598,10 @@ SYM_FUNC_END(aesni_cbc_dec)
SYM_FUNC_START_LOCAL(_aesni_inc_init)
movaps .Lbswap_mask, BSWAP_MASK
movaps IV, CTR
- PSHUFB_XMM BSWAP_MASK CTR
+ pshufb BSWAP_MASK, CTR
mov $1, TCTR_LOW
- MOVQ_R64_XMM TCTR_LOW INC
- MOVQ_R64_XMM CTR TCTR_LOW
+ movq TCTR_LOW, INC
+ movq CTR, TCTR_LOW
ret
SYM_FUNC_END(_aesni_inc_init)
@@ -2630,7 +2629,7 @@ SYM_FUNC_START_LOCAL(_aesni_inc)
psrldq $8, INC
.Linc_low:
movaps CTR, IV
- PSHUFB_XMM BSWAP_MASK IV
+ pshufb BSWAP_MASK, IV
ret
SYM_FUNC_END(_aesni_inc)
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index 0cea33295287..5fee47956f3b 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -120,7 +120,6 @@
##
#include <linux/linkage.h>
-#include <asm/inst.h>
# constants in mergeable sections, linker can reorder and merge
.section .rodata.cst16.POLY, "aM", @progbits, 16
diff --git a/arch/x86/crypto/chacha-ssse3-x86_64.S b/arch/x86/crypto/chacha-ssse3-x86_64.S
index a38ab2512a6f..ca1788bfee16 100644
--- a/arch/x86/crypto/chacha-ssse3-x86_64.S
+++ b/arch/x86/crypto/chacha-ssse3-x86_64.S
@@ -120,10 +120,10 @@ SYM_FUNC_START(chacha_block_xor_ssse3)
FRAME_BEGIN
# x0..3 = s0..3
- movdqa 0x00(%rdi),%xmm0
- movdqa 0x10(%rdi),%xmm1
- movdqa 0x20(%rdi),%xmm2
- movdqa 0x30(%rdi),%xmm3
+ movdqu 0x00(%rdi),%xmm0
+ movdqu 0x10(%rdi),%xmm1
+ movdqu 0x20(%rdi),%xmm2
+ movdqu 0x30(%rdi),%xmm3
movdqa %xmm0,%xmm8
movdqa %xmm1,%xmm9
movdqa %xmm2,%xmm10
@@ -205,10 +205,10 @@ SYM_FUNC_START(hchacha_block_ssse3)
# %edx: nrounds
FRAME_BEGIN
- movdqa 0x00(%rdi),%xmm0
- movdqa 0x10(%rdi),%xmm1
- movdqa 0x20(%rdi),%xmm2
- movdqa 0x30(%rdi),%xmm3
+ movdqu 0x00(%rdi),%xmm0
+ movdqu 0x10(%rdi),%xmm1
+ movdqu 0x20(%rdi),%xmm2
+ movdqu 0x30(%rdi),%xmm3
mov %edx,%r8d
call chacha_permute
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index 22250091cdbe..e67a59130025 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -14,8 +14,6 @@
#include <linux/module.h>
#include <asm/simd.h>
-#define CHACHA_STATE_ALIGN 16
-
asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
@@ -124,8 +122,6 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
{
- state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
-
if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) {
hchacha_block_generic(state, stream, nrounds);
} else {
@@ -138,8 +134,6 @@ EXPORT_SYMBOL(hchacha_block_arch);
void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
{
- state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
-
chacha_init_generic(state, key, iv);
}
EXPORT_SYMBOL(chacha_init_arch);
@@ -147,8 +141,6 @@ EXPORT_SYMBOL(chacha_init_arch);
void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
int nrounds)
{
- state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
-
if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() ||
bytes <= CHACHA_BLOCK_SIZE)
return chacha_crypt_generic(state, dst, src, bytes, nrounds);
@@ -170,15 +162,12 @@ EXPORT_SYMBOL(chacha_crypt_arch);
static int chacha_simd_stream_xor(struct skcipher_request *req,
const struct chacha_ctx *ctx, const u8 *iv)
{
- u32 *state, state_buf[16 + 2] __aligned(8);
+ u32 state[CHACHA_STATE_WORDS] __aligned(8);
struct skcipher_walk walk;
int err;
err = skcipher_walk_virt(&walk, req, false);
- BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
- state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
-
chacha_init_generic(state, ctx->key, iv);
while (walk.nbytes > 0) {
@@ -217,12 +206,10 @@ static int xchacha_simd(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- u32 *state, state_buf[16 + 2] __aligned(8);
+ u32 state[CHACHA_STATE_WORDS] __aligned(8);
struct chacha_ctx subctx;
u8 real_iv[16];
- BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
- state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
chacha_init_generic(state, ctx->key, req->iv);
if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) {
diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S
index 9fd28ff65bc2..6e7d4c4d3208 100644
--- a/arch/x86/crypto/crc32-pclmul_asm.S
+++ b/arch/x86/crypto/crc32-pclmul_asm.S
@@ -38,7 +38,6 @@
*/
#include <linux/linkage.h>
-#include <asm/inst.h>
.section .rodata
@@ -129,17 +128,17 @@ loop_64:/* 64 bytes Full cache line folding */
#ifdef __x86_64__
movdqa %xmm4, %xmm8
#endif
- PCLMULQDQ 00, CONSTANT, %xmm1
- PCLMULQDQ 00, CONSTANT, %xmm2
- PCLMULQDQ 00, CONSTANT, %xmm3
+ pclmulqdq $0x00, CONSTANT, %xmm1
+ pclmulqdq $0x00, CONSTANT, %xmm2
+ pclmulqdq $0x00, CONSTANT, %xmm3
#ifdef __x86_64__
- PCLMULQDQ 00, CONSTANT, %xmm4
+ pclmulqdq $0x00, CONSTANT, %xmm4
#endif
- PCLMULQDQ 0x11, CONSTANT, %xmm5
- PCLMULQDQ 0x11, CONSTANT, %xmm6
- PCLMULQDQ 0x11, CONSTANT, %xmm7
+ pclmulqdq $0x11, CONSTANT, %xmm5
+ pclmulqdq $0x11, CONSTANT, %xmm6
+ pclmulqdq $0x11, CONSTANT, %xmm7
#ifdef __x86_64__
- PCLMULQDQ 0x11, CONSTANT, %xmm8
+ pclmulqdq $0x11, CONSTANT, %xmm8
#endif
pxor %xmm5, %xmm1
pxor %xmm6, %xmm2
@@ -149,8 +148,8 @@ loop_64:/* 64 bytes Full cache line folding */
#else
/* xmm8 unsupported for x32 */
movdqa %xmm4, %xmm5
- PCLMULQDQ 00, CONSTANT, %xmm4
- PCLMULQDQ 0x11, CONSTANT, %xmm5
+ pclmulqdq $0x00, CONSTANT, %xmm4
+ pclmulqdq $0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm4
#endif
@@ -172,20 +171,20 @@ less_64:/* Folding cache line into 128bit */
prefetchnta (BUF)
movdqa %xmm1, %xmm5
- PCLMULQDQ 0x00, CONSTANT, %xmm1
- PCLMULQDQ 0x11, CONSTANT, %xmm5
+ pclmulqdq $0x00, CONSTANT, %xmm1
+ pclmulqdq $0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1
pxor %xmm2, %xmm1
movdqa %xmm1, %xmm5
- PCLMULQDQ 0x00, CONSTANT, %xmm1
- PCLMULQDQ 0x11, CONSTANT, %xmm5
+ pclmulqdq $0x00, CONSTANT, %xmm1
+ pclmulqdq $0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1
pxor %xmm3, %xmm1
movdqa %xmm1, %xmm5
- PCLMULQDQ 0x00, CONSTANT, %xmm1
- PCLMULQDQ 0x11, CONSTANT, %xmm5
+ pclmulqdq $0x00, CONSTANT, %xmm1
+ pclmulqdq $0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1
pxor %xmm4, %xmm1
@@ -193,8 +192,8 @@ less_64:/* Folding cache line into 128bit */
jb fold_64
loop_16:/* Folding rest buffer into 128bit */
movdqa %xmm1, %xmm5
- PCLMULQDQ 0x00, CONSTANT, %xmm1
- PCLMULQDQ 0x11, CONSTANT, %xmm5
+ pclmulqdq $0x00, CONSTANT, %xmm1
+ pclmulqdq $0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1
pxor (BUF), %xmm1
sub $0x10, LEN
@@ -205,7 +204,7 @@ loop_16:/* Folding rest buffer into 128bit */
fold_64:
/* perform the last 64 bit fold, also adds 32 zeroes
* to the input stream */
- PCLMULQDQ 0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
+ pclmulqdq $0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
psrldq $0x08, %xmm1
pxor CONSTANT, %xmm1
@@ -220,7 +219,7 @@ fold_64:
#endif
psrldq $0x04, %xmm2
pand %xmm3, %xmm1
- PCLMULQDQ 0x00, CONSTANT, %xmm1
+ pclmulqdq $0x00, CONSTANT, %xmm1
pxor %xmm2, %xmm1
/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
@@ -231,11 +230,11 @@ fold_64:
#endif
movdqa %xmm1, %xmm2
pand %xmm3, %xmm1
- PCLMULQDQ 0x10, CONSTANT, %xmm1
+ pclmulqdq $0x10, CONSTANT, %xmm1
pand %xmm3, %xmm1
- PCLMULQDQ 0x00, CONSTANT, %xmm1
+ pclmulqdq $0x00, CONSTANT, %xmm1
pxor %xmm2, %xmm1
- PEXTRD 0x01, %xmm1, %eax
+ pextrd $0x01, %xmm1, %eax
ret
SYM_FUNC_END(crc32_pclmul_le_16)
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 8501ec4532f4..884dc767b051 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -43,7 +43,6 @@
* SOFTWARE.
*/
-#include <asm/inst.h>
#include <linux/linkage.h>
#include <asm/nospec-branch.h>
@@ -170,7 +169,7 @@ continue_block:
## branch into array
lea jump_table(%rip), %bufp
- movzxw (%bufp, %rax, 2), len
+ movzwq (%bufp, %rax, 2), len
lea crc_array(%rip), %bufp
lea (%bufp, len, 1), %bufp
JMP_NOSPEC bufp
@@ -225,10 +224,10 @@ LABEL crc_ %i
subq %rax, tmp # tmp -= rax*24
movq crc_init, %xmm1 # CRC for block 1
- PCLMULQDQ 0x00,%xmm0,%xmm1 # Multiply by K2
+ pclmulqdq $0x00, %xmm0, %xmm1 # Multiply by K2
movq crc1, %xmm2 # CRC for block 2
- PCLMULQDQ 0x10, %xmm0, %xmm2 # Multiply by K1
+ pclmulqdq $0x10, %xmm0, %xmm2 # Multiply by K1
pxor %xmm2,%xmm1
movq %xmm1, %rax
diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
index 8a17621f7d3a..8acbb6584a37 100644
--- a/arch/x86/crypto/curve25519-x86_64.c
+++ b/arch/x86/crypto/curve25519-x86_64.c
@@ -948,10 +948,8 @@ static void store_felem(u64 *b, u64 *f)
{
u64 f30 = f[3U];
u64 top_bit0 = f30 >> (u32)63U;
- u64 carry0;
u64 f31;
u64 top_bit;
- u64 carry;
u64 f0;
u64 f1;
u64 f2;
@@ -970,11 +968,11 @@ static void store_felem(u64 *b, u64 *f)
u64 o2;
u64 o3;
f[3U] = f30 & (u64)0x7fffffffffffffffU;
- carry0 = add_scalar(f, f, (u64)19U * top_bit0);
+ add_scalar(f, f, (u64)19U * top_bit0);
f31 = f[3U];
top_bit = f31 >> (u32)63U;
f[3U] = f31 & (u64)0x7fffffffffffffffU;
- carry = add_scalar(f, f, (u64)19U * top_bit);
+ add_scalar(f, f, (u64)19U * top_bit);
f0 = f[0U];
f1 = f[1U];
f2 = f[2U];
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
index bb9735fbb865..99ac25e18e09 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -14,7 +14,6 @@
*/
#include <linux/linkage.h>
-#include <asm/inst.h>
#include <asm/frame.h>
.section .rodata.cst16.bswap_mask, "aM", @progbits, 16
@@ -51,9 +50,9 @@ SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
pxor DATA, T2
pxor SHASH, T3
- PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0
- PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1
- PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0)
+ pclmulqdq $0x00, SHASH, DATA # DATA = a0 * b0
+ pclmulqdq $0x11, SHASH, T1 # T1 = a1 * b1
+ pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0)
pxor DATA, T2
pxor T1, T2 # T2 = a0 * b1 + a1 * b0
@@ -95,9 +94,9 @@ SYM_FUNC_START(clmul_ghash_mul)
movups (%rdi), DATA
movups (%rsi), SHASH
movaps .Lbswap_mask, BSWAP
- PSHUFB_XMM BSWAP DATA
+ pshufb BSWAP, DATA
call __clmul_gf128mul_ble
- PSHUFB_XMM BSWAP DATA
+ pshufb BSWAP, DATA
movups DATA, (%rdi)
FRAME_END
ret
@@ -114,18 +113,18 @@ SYM_FUNC_START(clmul_ghash_update)
movaps .Lbswap_mask, BSWAP
movups (%rdi), DATA
movups (%rcx), SHASH
- PSHUFB_XMM BSWAP DATA
+ pshufb BSWAP, DATA
.align 4
.Lupdate_loop:
movups (%rsi), IN1
- PSHUFB_XMM BSWAP IN1
+ pshufb BSWAP, IN1
pxor IN1, DATA
call __clmul_gf128mul_ble
sub $16, %rdx
add $16, %rsi
cmp $16, %rdx
jge .Lupdate_loop
- PSHUFB_XMM BSWAP DATA
+ pshufb BSWAP, DATA
movups DATA, (%rdi)
.Lupdate_just_ret:
FRAME_END