From 0b77d6701cf8d4eb343a83fa8d7eca81a863bb7c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 4 Oct 2017 19:27:05 +0200 Subject: s390: implement memset16, memset32 & memset64 Provide fast versions of the new memset variants. E.g. the generic memset64 is ten times slower than the optimized version if used on a whole page. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/string.h | 22 +++++++++++++++++++++ arch/s390/lib/mem.S | 44 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) (limited to 'arch') diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h index 8fb43319693d..aa9c3a0f59ff 100644 --- a/arch/s390/include/asm/string.h +++ b/arch/s390/include/asm/string.h @@ -17,6 +17,9 @@ #define __HAVE_ARCH_MEMMOVE /* gcc builtin & arch function */ #define __HAVE_ARCH_MEMSCAN /* inline & arch function */ #define __HAVE_ARCH_MEMSET /* gcc builtin & arch function */ +#define __HAVE_ARCH_MEMSET16 /* arch function */ +#define __HAVE_ARCH_MEMSET32 /* arch function */ +#define __HAVE_ARCH_MEMSET64 /* arch function */ #define __HAVE_ARCH_STRCAT /* inline & arch function */ #define __HAVE_ARCH_STRCMP /* arch function */ #define __HAVE_ARCH_STRCPY /* inline & arch function */ @@ -49,6 +52,25 @@ extern char *strstr(const char *, const char *); #undef __HAVE_ARCH_STRSEP #undef __HAVE_ARCH_STRSPN +void *__memset16(uint16_t *s, uint16_t v, size_t count); +void *__memset32(uint32_t *s, uint32_t v, size_t count); +void *__memset64(uint64_t *s, uint64_t v, size_t count); + +static inline void *memset16(uint16_t *s, uint16_t v, size_t count) +{ + return __memset16(s, v, count * sizeof(v)); +} + +static inline void *memset32(uint32_t *s, uint32_t v, size_t count) +{ + return __memset32(s, v, count * sizeof(v)); +} + +static inline void *memset64(uint64_t *s, uint64_t v, size_t count) +{ + return __memset64(s, v, count * sizeof(v)); +} + #if !defined(IN_ARCH_STRING_C) && (!defined(CONFIG_FORTIFY_SOURCE) || defined(__NO_FORTIFY)) static inline void *memchr(const void * s, int c, size_t n) diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index 7ff79a4ff00c..f88cf6983849 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -126,3 +126,47 @@ ENTRY(memcpy) .Lmemcpy_mvc: mvc 0(1,%r1),0(%r3) EXPORT_SYMBOL(memcpy) + +/* + * __memset16/32/64 + * + * void *__memset16(uint16_t *s, uint16_t v, size_t count) + * void *__memset32(uint32_t *s, uint32_t v, size_t count) + * void *__memset64(uint64_t *s, uint64_t v, size_t count) + */ +.macro __MEMSET bits,bytes,insn +ENTRY(__memset\bits) + ltgr %r4,%r4 + bzr %r14 + cghi %r4,\bytes + je .L__memset_exit\bits + aghi %r4,-(\bytes+1) + srlg %r5,%r4,8 + ltgr %r5,%r5 + lgr %r1,%r2 + jz .L__memset_remainder\bits +.L__memset_loop\bits: + \insn %r3,0(%r1) + mvc \bytes(256-\bytes,%r1),0(%r1) + la %r1,256(%r1) + brctg %r5,.L__memset_loop\bits +.L__memset_remainder\bits: + \insn %r3,0(%r1) + larl %r5,.L__memset_mvc\bits + ex %r4,0(%r5) + br %r14 +.L__memset_exit\bits: + \insn %r3,0(%r2) + br %r14 +.L__memset_mvc\bits: + mvc \bytes(1,%r1),0(%r1) +.endm + +__MEMSET 16,2,sth +EXPORT_SYMBOL(__memset16) + +__MEMSET 32,4,st +EXPORT_SYMBOL(__memset32) + +__MEMSET 64,8,stg +EXPORT_SYMBOL(__memset64) -- cgit v1.2.3