From 19d7b2a34f3c69d62f570ac9d0f6bc3cd584b496 Mon Sep 17 00:00:00 2001 From: Nagaraju Date: Thu, 14 Mar 2019 18:16:32 +0530 Subject: [PATCH 09/11] Added MB-64 support to strcmp/strcpy/strlen files --- newlib/libc/machine/microblaze/strcmp.c | 61 ++++++++++++++++++++++++++++++++- newlib/libc/machine/microblaze/strcpy.c | 57 ++++++++++++++++++++++++++++++ newlib/libc/machine/microblaze/strlen.c | 38 ++++++++++++++++++++ 3 files changed, 155 insertions(+), 1 deletion(-) diff --git a/newlib/libc/machine/microblaze/strcmp.c b/newlib/libc/machine/microblaze/strcmp.c index 3119d82..dac64da 100644 --- a/newlib/libc/machine/microblaze/strcmp.c +++ b/newlib/libc/machine/microblaze/strcmp.c @@ -133,6 +133,65 @@ strcmp (const char *s1, #include "mb_endian.h" +#ifdef __arch64__ + asm volatile (" \n\ + orl r9, r0, r0 /* Index register */\n\ +check_alignment: \n\ + andli r3, r5, 3 \n\ + andli r4, r6, 3 \n\ + beanei r3, try_align_args \n\ + beanei r4, regular_strcmp /* At this point we don't have a choice */ \n\ +cmp_loop: \n" + LOAD4BYTES("r3", "r5", "r9") + LOAD4BYTES("r4", "r6", "r9") +" \n\ + pcmplbf r7, r3, r0 /* See if there is Null byte */ \n\ + beanei r7, end_cmp_loop /* IF yes (r7 > 0) use byte compares in end_cmp_loop */ \n\ + cmplu r7, r4, r3 /* ELSE compare whole word */ \n\ + beanei r7, end_cmp \n\ + addlik r9, r9, 4 /* delay slot */ \n\ + breaid cmp_loop \n\ + nop /* delay slot */ \n\ +end_cmp_loop: \n\ + lbu r3, r5, r9 /* byte compare loop */ \n\ + lbu r4, r6, r9 \n\ + cmplu r7, r4, r3 /* Compare bytes */ \n\ + beanei r7, end_cmp_early \n\ + addlik r9, r9, 1 /* delay slot */ \n\ + beaneid r3, end_cmp_loop /* If reached null on one string, terminate */ \n\ + nop \n\ +end_cmp_early: \n\ + or r3, r0, r7 /* delay slot */ \n\ + rtsd r15, 8 \n\ + nop \n\ +try_align_args: \n\ + xorl r7, r4, r3 \n\ + beanei r7, regular_strcmp /* cannot align args */ \n\ + rsublik r10, r3, 4 /* Number of initial bytes to align */ \n\ +align_loop: \n\ + lbu r3, r5, r9 \n\ + lbu r4, r6, r9 \n\ + cmpu r7, r4, r3 \n\ + beanei r7, end_cmp \n\ + beaeqi r3, end_cmp \n\ + addlik r10, r10, -1 \n\ + addlik r9, r9, 1 \n\ + beaeqid r10, cmp_loop \n\ + nop \n\ + breai align_loop \n\ +regular_strcmp: \n\ + lbu r3, r5, r9 \n\ + lbu r4, r6, r9 \n\ + cmplu r7, r4, r3 \n\ + beanei r7, end_cmp \n\ + beaeqi r3, end_cmp \n\ + breaid regular_strcmp \n\ + addlik r9, r9, 1 \n\ +end_cmp: \n\ + or r3, r0, r7 \n\ + rtsd r15, 8 \n\ + nop /* Return strcmp result */"); +#else asm volatile (" \n\ or r9, r0, r0 /* Index register */\n\ check_alignment: \n\ @@ -181,11 +240,11 @@ regular_strcmp: bnei r7, end_cmp \n\ beqi r3, end_cmp \n\ brid regular_strcmp \n\ - addik r9, r9, 1 \n\ end_cmp: \n\ rtsd r15, 8 \n\ or r3, r0, r7 /* Return strcmp result */"); +#endif #endif /* ! HAVE_HW_PCMP */ } diff --git a/newlib/libc/machine/microblaze/strcpy.c b/newlib/libc/machine/microblaze/strcpy.c index 62072fa..6dbc60d 100644 --- a/newlib/libc/machine/microblaze/strcpy.c +++ b/newlib/libc/machine/microblaze/strcpy.c @@ -125,6 +125,62 @@ strcpy (char *__restrict dst0, #else #include "mb_endian.h" +#ifdef __arch64__ + + asm volatile (" \n\ + orl r9, r0, r0 /* Index register */ \n\ +check_alignment: \n\ + andli r3, r5, 3 \n\ + andli r4, r6, 3 \n\ + beanei r3, try_align_args \n\ + beanei r4, regular_strcpy /* At this point we dont have a choice */ \n\ +cpy_loop: \n" + LOAD4BYTES("r3", "r6", "r9") +" \n\ + pcmplbf r4, r0, r3 \n\ + beanei r4, cpy_bytes /* If r4 != 0, then null present within string */\n" + STORE4BYTES("r3", "r5", "r9") +" \n\ + addlik r9, r9, 4 \n\ + breaid cpy_loop \n\ + nop \n\ +cpy_bytes: \n\ + lbu r3, r6, r9 \n\ + sb r3, r5, r9 \n\ + addlik r4, r4, -1 \n\ + addlik r9, r9, 1 /* delay slot */\n\ + beaneid r4, cpy_bytes \n\ + nop \n\ +cpy_null: \n\ + orl r3, r0, r5 /* Return strcpy result */\n\ + rtsd r15, 8 \n\ + nop \n\ +try_align_args: \n\ + xorl r7, r4, r3 \n\ + beanei r7, regular_strcpy /* cannot align args */\n\ + rsublik r10, r3, 4 /* Number of initial bytes to align */\n\ +align_loop: \n\ + lbu r3, r6, r9 \n\ + sb r3, r5, r9 \n\ + addlik r10, r10, -1 \n\ + beaeqid r3, end_cpy /* Break if we have seen null character */\n\ + nop \n\ + addlik r9, r9, 1 \n\ + beaneid r10, align_loop \n\ + nop \n\ + breai cpy_loop \n\ +regular_strcpy: \n\ + lbu r3, r6, r9 \n\ + sb r3, r5, r9 \n\ + addlik r9, r9, 1 \n\ + beaneid r3, regular_strcpy \n\ + nop \n\ +end_cpy: \n\ + orl r3, r0, r5 \n\ + rtsd r15, 8 \n\ + nop /* Return strcpy result */"); + +#else asm volatile (" \n\ or r9, r0, r0 /* Index register */ \n\ @@ -171,6 +227,7 @@ regular_strcpy: \n\ end_cpy: \n\ rtsd r15, 8 \n\ or r3, r0, r5 /* Return strcpy result */"); +#endif #endif /* ! HAVE_HW_PCMP */ } diff --git a/newlib/libc/machine/microblaze/strlen.c b/newlib/libc/machine/microblaze/strlen.c index acb4464..c04fa4f 100644 --- a/newlib/libc/machine/microblaze/strlen.c +++ b/newlib/libc/machine/microblaze/strlen.c @@ -116,6 +116,43 @@ strlen (const char *str) #include "mb_endian.h" +#ifdef __arch64__ + asm volatile (" \n\ + orl r9, r0, r0 /* Index register */ \n\ +check_alignment: \n\ + andli r3, r5, 3 \n\ + beanei r3, align_arg \n\ +len_loop: \n" + LOAD4BYTES("r3", "r5", "r9") +" \n\ + pcmplbf r4, r3, r0 \n\ + beanei r4, end_len \n\ + addik r9, r9, 4 \n\ + breaid len_loop \n\ + nop \n\ +end_len: \n\ + lbu r3, r5, r9 \n\ + beaeqi r3, done_len \n\ + addik r9, r9, 1 \n\ + breaid end_len \n\ + nop \n\ +done_len: \n\ + orl r3, r0, r9 /* Return len */ \n\ + rtsd r15, 8 \n\ + nop \n\ +align_arg: \n\ + rsublik r10, r3, 4 \n\ +align_loop: \n\ + lbu r3, r5, r9 \n\ + addlik r10, r10, -1 \n\ + beaeqid r3, done_len \n\ + nop \n\ + addlik r9, r9, 1 \n\ + beaneid r10, align_loop \n\ + nop \n\ + breai len_loop"); + +#else asm volatile (" \n\ or r9, r0, r0 /* Index register */ \n\ check_alignment: \n\ @@ -146,5 +183,6 @@ align_loop: \n\ addik r9, r9, 1 \n\ bri len_loop"); +#endif #endif /* ! HAVE_HW_PCMP */ } -- 2.7.4