summaryrefslogtreecommitdiff
path: root/arch/s390/include/asm/checksum.h
diff options
context:
space:
mode:
authorHeiko Carstens <hca@linux.ibm.com>2020-08-11 17:36:26 +0300
committerVasily Gorbik <gor@linux.ibm.com>2020-08-26 19:47:20 +0300
commit614b4f5d0fa3f622cfcc899491d8a3e6af3d4dc5 (patch)
treeea7c6e546dfd8d226bce2ba053f521b0fc9101e3 /arch/s390/include/asm/checksum.h
parentbb4644b14accb05663847277002e3efa9fa3cd3b (diff)
downloadlinux-614b4f5d0fa3f622cfcc899491d8a3e6af3d4dc5.tar.xz
s390/checksum: make ip_fast_csum() faster
Convert ip_fast_csum() so it doesn't call csum_partial(), but instead open code the checksum calculation. The problem with csum_partial() is that it makes use of the cksm instruction, which has high startup costs and therefore is only very fast if used on larger memory regions. IPv4 headers however are small in size (5-16 32-bit words). The open coded variant calculates the checksum in ~30% of the time compared to the old variant (z14, march=z196). Signed-off-by: Heiko Carstens <hca@linux.ibm.com> Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Diffstat (limited to 'arch/s390/include/asm/checksum.h')
-rw-r--r--arch/s390/include/asm/checksum.h13
1 files changed, 12 insertions, 1 deletions
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index 961c25c5124b..8bc6bed4715b 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -66,7 +66,18 @@ static inline __sum16 csum_fold(__wsum sum)
*/
static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
{
- return csum_fold(csum_partial(iph, ihl*4, 0));
+ __u64 csum = 0;
+ __u32 *ptr = (u32 *)iph;
+
+ csum += *ptr++;
+ csum += *ptr++;
+ csum += *ptr++;
+ csum += *ptr++;
+ ihl -= 4;
+ while (ihl--)
+ csum += *ptr++;
+ csum += (csum >> 32) | (csum << 32);
+ return csum_fold((__force __wsum)(csum >> 32));
}
/*