diff options
Diffstat (limited to 'meta-arm/meta-arm-bsp/recipes-security/trusted-services/corstone1000/ts-newlib/0001-newlib-memcpy-remove-optimized-version.patch')
-rw-r--r-- | meta-arm/meta-arm-bsp/recipes-security/trusted-services/corstone1000/ts-newlib/0001-newlib-memcpy-remove-optimized-version.patch | 210 |
1 files changed, 210 insertions, 0 deletions
diff --git a/meta-arm/meta-arm-bsp/recipes-security/trusted-services/corstone1000/ts-newlib/0001-newlib-memcpy-remove-optimized-version.patch b/meta-arm/meta-arm-bsp/recipes-security/trusted-services/corstone1000/ts-newlib/0001-newlib-memcpy-remove-optimized-version.patch new file mode 100644 index 0000000000..7d8504d938 --- /dev/null +++ b/meta-arm/meta-arm-bsp/recipes-security/trusted-services/corstone1000/ts-newlib/0001-newlib-memcpy-remove-optimized-version.patch @@ -0,0 +1,210 @@ +From 03d97c104f2d68cffd1bfc48cd62727e13a64712 Mon Sep 17 00:00:00 2001 +From: Rui Miguel Silva <rui.silva@linaro.org> +Date: Fri, 14 Oct 2022 17:42:52 +0100 +Subject: [PATCH] newlib: memcpy: remove optimized version + +When creating messages packed to send over openamp we may need +to do some copy in unaligned address, because of that we may +not always use the assembler optimized version, which will +trough a data-abort on aligned address exception. + +So, we may just use the version in string.h (the same used in +optee-os) that will take care to check and use different +optimization based on given source or destination address's. + +Upstream-Status: Pending +Signed-off-by: Rui Miguel Silva <rui.silva@linaro.org> +--- + newlib/libc/machine/aarch64/memcpy-stub.c | 2 +- + newlib/libc/machine/aarch64/memcpy.S | 166 ---------------------- + 2 files changed, 1 insertion(+), 167 deletions(-) + +diff --git a/newlib/libc/machine/aarch64/memcpy-stub.c b/newlib/libc/machine/aarch64/memcpy-stub.c +index cd6d72a8b8af..5f2b7968c7fc 100644 +--- a/newlib/libc/machine/aarch64/memcpy-stub.c ++++ b/newlib/libc/machine/aarch64/memcpy-stub.c +@@ -27,5 +27,5 @@ + #if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) + # include "../../string/memcpy.c" + #else +-/* See memcpy.S */ ++# include "../../string/memcpy.c" + #endif +diff --git a/newlib/libc/machine/aarch64/memcpy.S b/newlib/libc/machine/aarch64/memcpy.S +index 463bad0a1816..2a1460546374 100644 +--- a/newlib/libc/machine/aarch64/memcpy.S ++++ b/newlib/libc/machine/aarch64/memcpy.S +@@ -61,170 +61,4 @@ + #if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) + /* See memcpy-stub.c */ + #else +- +-#define dstin x0 +-#define src x1 +-#define count x2 +-#define dst x3 +-#define srcend x4 +-#define dstend x5 +-#define A_l x6 +-#define A_lw w6 +-#define A_h x7 +-#define A_hw w7 +-#define B_l x8 +-#define B_lw w8 +-#define B_h x9 +-#define C_l x10 +-#define C_h x11 +-#define D_l x12 +-#define D_h x13 +-#define E_l src +-#define E_h count +-#define F_l srcend +-#define F_h dst +-#define tmp1 x9 +- +-#define L(l) .L ## l +- +- .macro def_fn f p2align=0 +- .text +- .p2align \p2align +- .global \f +- .type \f, %function +-\f: +- .endm +- +-/* Copies are split into 3 main cases: small copies of up to 16 bytes, +- medium copies of 17..96 bytes which are fully unrolled. Large copies +- of more than 96 bytes align the destination and use an unrolled loop +- processing 64 bytes per iteration. +- Small and medium copies read all data before writing, allowing any +- kind of overlap, and memmove tailcalls memcpy for these cases as +- well as non-overlapping copies. +-*/ +- +-def_fn memcpy p2align=6 +- prfm PLDL1KEEP, [src] +- add srcend, src, count +- add dstend, dstin, count +- cmp count, 16 +- b.ls L(copy16) +- cmp count, 96 +- b.hi L(copy_long) +- +- /* Medium copies: 17..96 bytes. */ +- sub tmp1, count, 1 +- ldp A_l, A_h, [src] +- tbnz tmp1, 6, L(copy96) +- ldp D_l, D_h, [srcend, -16] +- tbz tmp1, 5, 1f +- ldp B_l, B_h, [src, 16] +- ldp C_l, C_h, [srcend, -32] +- stp B_l, B_h, [dstin, 16] +- stp C_l, C_h, [dstend, -32] +-1: +- stp A_l, A_h, [dstin] +- stp D_l, D_h, [dstend, -16] +- ret +- +- .p2align 4 +- /* Small copies: 0..16 bytes. */ +-L(copy16): +- cmp count, 8 +- b.lo 1f +- ldr A_l, [src] +- ldr A_h, [srcend, -8] +- str A_l, [dstin] +- str A_h, [dstend, -8] +- ret +- .p2align 4 +-1: +- tbz count, 2, 1f +- ldr A_lw, [src] +- ldr A_hw, [srcend, -4] +- str A_lw, [dstin] +- str A_hw, [dstend, -4] +- ret +- +- /* Copy 0..3 bytes. Use a branchless sequence that copies the same +- byte 3 times if count==1, or the 2nd byte twice if count==2. */ +-1: +- cbz count, 2f +- lsr tmp1, count, 1 +- ldrb A_lw, [src] +- ldrb A_hw, [srcend, -1] +- ldrb B_lw, [src, tmp1] +- strb A_lw, [dstin] +- strb B_lw, [dstin, tmp1] +- strb A_hw, [dstend, -1] +-2: ret +- +- .p2align 4 +- /* Copy 64..96 bytes. Copy 64 bytes from the start and +- 32 bytes from the end. */ +-L(copy96): +- ldp B_l, B_h, [src, 16] +- ldp C_l, C_h, [src, 32] +- ldp D_l, D_h, [src, 48] +- ldp E_l, E_h, [srcend, -32] +- ldp F_l, F_h, [srcend, -16] +- stp A_l, A_h, [dstin] +- stp B_l, B_h, [dstin, 16] +- stp C_l, C_h, [dstin, 32] +- stp D_l, D_h, [dstin, 48] +- stp E_l, E_h, [dstend, -32] +- stp F_l, F_h, [dstend, -16] +- ret +- +- /* Align DST to 16 byte alignment so that we don't cross cache line +- boundaries on both loads and stores. There are at least 96 bytes +- to copy, so copy 16 bytes unaligned and then align. The loop +- copies 64 bytes per iteration and prefetches one iteration ahead. */ +- +- .p2align 4 +-L(copy_long): +- and tmp1, dstin, 15 +- bic dst, dstin, 15 +- ldp D_l, D_h, [src] +- sub src, src, tmp1 +- add count, count, tmp1 /* Count is now 16 too large. */ +- ldp A_l, A_h, [src, 16] +- stp D_l, D_h, [dstin] +- ldp B_l, B_h, [src, 32] +- ldp C_l, C_h, [src, 48] +- ldp D_l, D_h, [src, 64]! +- subs count, count, 128 + 16 /* Test and readjust count. */ +- b.ls 2f +-1: +- stp A_l, A_h, [dst, 16] +- ldp A_l, A_h, [src, 16] +- stp B_l, B_h, [dst, 32] +- ldp B_l, B_h, [src, 32] +- stp C_l, C_h, [dst, 48] +- ldp C_l, C_h, [src, 48] +- stp D_l, D_h, [dst, 64]! +- ldp D_l, D_h, [src, 64]! +- subs count, count, 64 +- b.hi 1b +- +- /* Write the last full set of 64 bytes. The remainder is at most 64 +- bytes, so it is safe to always copy 64 bytes from the end even if +- there is just 1 byte left. */ +-2: +- ldp E_l, E_h, [srcend, -64] +- stp A_l, A_h, [dst, 16] +- ldp A_l, A_h, [srcend, -48] +- stp B_l, B_h, [dst, 32] +- ldp B_l, B_h, [srcend, -32] +- stp C_l, C_h, [dst, 48] +- ldp C_l, C_h, [srcend, -16] +- stp D_l, D_h, [dst, 64] +- stp E_l, E_h, [dstend, -64] +- stp A_l, A_h, [dstend, -48] +- stp B_l, B_h, [dstend, -32] +- stp C_l, C_h, [dstend, -16] +- ret +- +- .size memcpy, . - memcpy + #endif +-- +2.38.0 + |