summaryrefslogtreecommitdiff
path: root/meta-xilinx/meta-microblaze/recipes-core/newlib/files/0009-Patch-MicroBlaze-Added-MB-64-support-to-strcmp-strcp.patch
blob: 1d4394d3625ae9e459a15a40020c75e80a3ff989 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
From 6587a1cae28468f5a49659a39040f60e425827a7 Mon Sep 17 00:00:00 2001
From: Mahesh Bodapati <mbodapat@xilinx.com>
Date: Tue, 17 Nov 2020 13:06:41 +0530
Subject: [PATCH 09/11] [Patch,MicroBlaze] : Added MB-64 support to
 strcmp/strcpy/strlen files Signed-off-by:Mahesh Bodapati<mbodapat@xilinx.com>

---
 newlib/libc/machine/microblaze/strcmp.c | 63 ++++++++++++++++++++++++-
 newlib/libc/machine/microblaze/strcpy.c | 57 ++++++++++++++++++++++
 newlib/libc/machine/microblaze/strlen.c | 38 +++++++++++++++
 3 files changed, 157 insertions(+), 1 deletion(-)

diff --git a/newlib/libc/machine/microblaze/strcmp.c b/newlib/libc/machine/microblaze/strcmp.c
index 3119d82c5..2cfef7388 100644
--- a/newlib/libc/machine/microblaze/strcmp.c
+++ b/newlib/libc/machine/microblaze/strcmp.c
@@ -133,6 +133,66 @@ strcmp (const char *s1,
 
 #include "mb_endian.h"
 
+#ifdef __arch64__
+    asm volatile ("                                          \n\
+        orl      r9, r0, r0               /* Index register */ \n\
+check_alignment:                                             \n\
+        andli    r3, r5, 3                                    \n\
+        andli    r4, r6, 3                                    \n\
+        beanei    r3, try_align_args                           \n\
+        beanei    r4, regular_strcmp     /* At this point we don't have a choice */ \n\
+cmp_loop:                                                                       \n"
+        LOAD4BYTES("r3", "r5", "r9")
+        LOAD4BYTES("r4", "r6", "r9")
+"                                                                                      \n\
+        pcmplbf  r7, r3, r0              /* See if there is Null byte */                         \n\
+        beanei    r7, end_cmp_loop        /* IF yes (r7 > 0) use byte compares in end_cmp_loop */ \n\
+        cmplu    r7, r4, r3              /* ELSE compare whole word */                   \n\
+        beanei    r7, end_cmp                                                             \n\
+        addlik   r9, r9, 4               /* delay slot */                                \n\
+        breaid    cmp_loop                                                                \n\
+        nop		               /* delay slot */                                \n\
+end_cmp_loop:                                                                           \n\
+        lbu     r3, r5, r9              /* byte compare loop */                         \n\
+        lbu     r4, r6, r9                                                              \n\
+        cmplu    r7, r4, r3              /* Compare bytes */                             \n\
+        beanei    r7, end_cmp_early                                                       \n\
+        addlik   r9, r9, 1               /* delay slot */                        \n\
+        beaneid   r3, end_cmp_loop        /* If reached null on one string, terminate */  \n\
+        nop					                              \n\
+end_cmp_early:                                                                  \n\
+        orl   r3, r0, r7               /* delay slot */                        \n\
+        rtsd    r15, 8                                                          \n\
+        nop		                                                         \n\
+try_align_args:                                                                 \n\
+        xorl     r7, r4, r3                                                      \n\
+        beanei    r7, regular_strcmp      /* cannot align args */                 \n\
+        rsublik  r10, r3, 4              /* Number of initial bytes to align */  \n\
+align_loop:                                                                     \n\
+        lbu     r3, r5, r9                                                      \n\
+        lbu     r4, r6, r9                                                      \n\
+        cmplu    r7, r4, r3                                                      \n\
+        beanei    r7, end_cmp                                                     \n\
+        beaeqi    r3, end_cmp                                                     \n\
+        addlik   r10, r10, -1                                                    \n\
+        addlik   r9, r9, 1                                                       \n\
+        beaeqid   r10, cmp_loop                                                   \n\
+        nop		                                                           \n\
+        breai     align_loop                                                      \n\
+regular_strcmp:                                                                 \n\
+        lbu     r3, r5, r9                                                      \n\
+        lbu     r4, r6, r9                                                      \n\
+        cmplu    r7, r4, r3                                                      \n\
+        beanei    r7, end_cmp                                                     \n\
+        beaeqi    r3, end_cmp                                                     \n\
+        addlik   r9, r9, 1                                                       \n\
+        breaid    regular_strcmp                                                  \n\
+        nop									\n\
+end_cmp:                                                                        \n\
+        orl       r3, r0, r7                                                       \n\
+        rtsd    r15, 8                                                          \n\
+        nop              /* Return strcmp result */");
+#else
     asm volatile ("                                          \n\
         or      r9, r0, r0               /* Index register */\n\
 check_alignment:                                             \n\
@@ -181,11 +241,12 @@ regular_strcmp:
         bnei    r7, end_cmp                                                     \n\
         beqi    r3, end_cmp                                                     \n\
         brid    regular_strcmp                                                  \n\
-        addik   r9, r9, 1                                                       \n\
+        addik	r9, r9, 1
 end_cmp:                                                                        \n\
         rtsd    r15, 8                                                          \n\
         or      r3, r0, r7              /* Return strcmp result */");
 
+#endif
 #endif /* ! HAVE_HW_PCMP */
 }
 
diff --git a/newlib/libc/machine/microblaze/strcpy.c b/newlib/libc/machine/microblaze/strcpy.c
index 62072fa28..6dbc60d77 100644
--- a/newlib/libc/machine/microblaze/strcpy.c
+++ b/newlib/libc/machine/microblaze/strcpy.c
@@ -125,6 +125,62 @@ strcpy (char *__restrict dst0,
 #else    
 
 #include "mb_endian.h"
+#ifdef __arch64__
+
+  asm volatile ("                                                   \n\
+        orl      r9, r0, r0              /* Index register */        \n\
+check_alignment:                                                    \n\
+        andli    r3, r5, 3                                           \n\
+        andli    r4, r6, 3                                           \n\
+        beanei    r3, try_align_args                                  \n\
+        beanei    r4, regular_strcpy      /* At this point we dont have a choice */       \n\
+cpy_loop:                                   \n"
+        LOAD4BYTES("r3", "r6", "r9")
+"                                           \n\
+        pcmplbf  r4, r0, r3                  \n\
+        beanei    r4, cpy_bytes           /* If r4 != 0, then null present within string */\n"
+        STORE4BYTES("r3", "r5", "r9")
+"                                           \n\
+        addlik   r9, r9, 4                   \n\
+        breaid    cpy_loop                    \n\
+	nop					\n\
+cpy_bytes:                                  \n\
+        lbu     r3, r6, r9                  \n\
+        sb      r3, r5, r9                  \n\
+        addlik   r4, r4, -1                  \n\
+        addlik   r9, r9, 1               /* delay slot */\n\
+        beaneid   r4, cpy_bytes               \n\
+	nop					\n\
+cpy_null:                                   \n\
+        orl      r3, r0, r5              /* Return strcpy result */\n\
+        rtsd    r15, 8                      \n\
+	nop					\n\
+try_align_args:                             \n\
+        xorl     r7, r4, r3                  \n\
+        beanei    r7, regular_strcpy      /* cannot align args */\n\
+        rsublik  r10, r3, 4              /* Number of initial bytes to align */\n\
+align_loop:                                 \n\
+        lbu     r3, r6, r9                  \n\
+        sb      r3, r5, r9                  \n\
+        addlik   r10, r10, -1                \n\
+        beaeqid   r3, end_cpy             /* Break if we have seen null character */\n\
+	nop					\n\
+        addlik   r9, r9, 1                   \n\
+        beaneid   r10, align_loop             \n\
+	nop					\n\
+        breai     cpy_loop                    \n\
+regular_strcpy:                             \n\
+        lbu     r3, r6, r9                  \n\
+        sb      r3, r5, r9                  \n\
+        addlik   r9, r9, 1                   \n\
+        beaneid   r3, regular_strcpy          \n\
+	nop					\n\
+end_cpy:                                    \n\
+        orl      r3, r0, r5  			\n\
+        rtsd    r15, 8                      \n\
+	nop			 /* Return strcpy result */");
+
+#else
 
   asm volatile ("                                                   \n\
         or      r9, r0, r0              /* Index register */        \n\
@@ -171,6 +227,7 @@ regular_strcpy:                             \n\
 end_cpy:                                    \n\
         rtsd    r15, 8                      \n\
         or      r3, r0, r5              /* Return strcpy result */");
+#endif
 #endif /* ! HAVE_HW_PCMP */
 }
 
diff --git a/newlib/libc/machine/microblaze/strlen.c b/newlib/libc/machine/microblaze/strlen.c
index acb4464bc..b6f2d3c13 100644
--- a/newlib/libc/machine/microblaze/strlen.c
+++ b/newlib/libc/machine/microblaze/strlen.c
@@ -116,6 +116,43 @@ strlen (const char *str)
 
 #include "mb_endian.h"
 
+#ifdef __arch64__
+  asm volatile ("                                               \n\
+        orl      r9, r0, r0              /* Index register */    \n\
+check_alignment:                                                \n\
+        andli    r3, r5, 3                                       \n\
+        beanei    r3, align_arg                                   \n\
+len_loop:                                                       \n"
+        LOAD4BYTES("r3", "r5", "r9")
+"                                                               \n\
+        pcmplbf  r4, r3, r0                                      \n\
+        beanei    r4, end_len                                     \n\
+        addlik   r9, r9, 4                                       \n\
+        breaid    len_loop                                        \n\
+        nop		                                        \n\
+end_len:                                                        \n\
+        lbu     r3, r5, r9                                      \n\
+        beaeqi    r3, done_len                                    \n\
+        addlik   r9, r9, 1                                       \n\
+        breaid    end_len                                         \n\
+        nop		                                       \n\
+done_len:                                                       \n\
+        orl      r3, r0, r9              /* Return len */        \n\
+        rtsd    r15, 8                                          \n\
+        nop						        \n\
+align_arg:                                                      \n\
+        rsublik  r10, r3, 4                                      \n\
+align_loop:                                                     \n\
+        lbu     r3, r5, r9                                      \n\
+        addlik   r10, r10, -1                                    \n\
+        beaeqid   r3, done_len                                    \n\
+        nop		                                       \n\
+        addlik   r9, r9, 1                                       \n\
+        beaneid   r10, align_loop                                 \n\
+        nop		                                        \n\
+        breai     len_loop");
+
+#else
   asm volatile ("                                               \n\
         or      r9, r0, r0              /* Index register */    \n\
 check_alignment:                                                \n\
@@ -146,5 +183,6 @@ align_loop:                                                     \n\
         addik   r9, r9, 1                                       \n\
         bri     len_loop");
 
+#endif
 #endif  /* ! HAVE_HW_PCMP */
 }
-- 
2.17.1