summaryrefslogtreecommitdiff
path: root/arch/loongarch/lib/copy_user.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/loongarch/lib/copy_user.S')
-rw-r--r--arch/loongarch/lib/copy_user.S251
1 files changed, 201 insertions, 50 deletions
diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S
index 55ac6020a1ad..b21f6d5d38f5 100644
--- a/arch/loongarch/lib/copy_user.S
+++ b/arch/loongarch/lib/copy_user.S
@@ -13,7 +13,14 @@
.irp to, 0, 1, 2, 3, 4, 5, 6, 7
.L_fixup_handle_\to\():
- addi.d a0, a2, (\to) * (-8)
+ sub.d a0, a2, a0
+ addi.d a0, a0, (\to) * (-8)
+ jr ra
+.endr
+
+.irp to, 0, 2, 4
+.L_fixup_handle_s\to\():
+ addi.d a0, a2, -\to
jr ra
.endr
@@ -47,8 +54,8 @@ SYM_FUNC_START(__copy_user_generic)
3: move a0, a2
jr ra
- _asm_extable 1b, .L_fixup_handle_0
- _asm_extable 2b, .L_fixup_handle_0
+ _asm_extable 1b, .L_fixup_handle_s0
+ _asm_extable 2b, .L_fixup_handle_s0
SYM_FUNC_END(__copy_user_generic)
/*
@@ -59,65 +66,209 @@ SYM_FUNC_END(__copy_user_generic)
* a2: n
*/
SYM_FUNC_START(__copy_user_fast)
- beqz a2, 19f
+ sltui t0, a2, 9
+ bnez t0, .Lsmall
- ori a3, zero, 64
- blt a2, a3, 17f
+ add.d a3, a1, a2
+ add.d a2, a0, a2
+0: ld.d t0, a1, 0
+1: st.d t0, a0, 0
- /* copy 64 bytes at a time */
-1: ld.d t0, a1, 0
-2: ld.d t1, a1, 8
-3: ld.d t2, a1, 16
-4: ld.d t3, a1, 24
-5: ld.d t4, a1, 32
-6: ld.d t5, a1, 40
-7: ld.d t6, a1, 48
-8: ld.d t7, a1, 56
-9: st.d t0, a0, 0
-10: st.d t1, a0, 8
-11: st.d t2, a0, 16
-12: st.d t3, a0, 24
-13: st.d t4, a0, 32
-14: st.d t5, a0, 40
-15: st.d t6, a0, 48
-16: st.d t7, a0, 56
+ /* align up destination address */
+ andi t1, a0, 7
+ sub.d t0, zero, t1
+ addi.d t0, t0, 8
+ add.d a1, a1, t0
+ add.d a0, a0, t0
- addi.d a0, a0, 64
- addi.d a1, a1, 64
- addi.d a2, a2, -64
- bge a2, a3, 1b
+ addi.d a4, a3, -64
+ bgeu a1, a4, .Llt64
- beqz a2, 19f
+ /* copy 64 bytes at a time */
+.Lloop64:
+2: ld.d t0, a1, 0
+3: ld.d t1, a1, 8
+4: ld.d t2, a1, 16
+5: ld.d t3, a1, 24
+6: ld.d t4, a1, 32
+7: ld.d t5, a1, 40
+8: ld.d t6, a1, 48
+9: ld.d t7, a1, 56
+ addi.d a1, a1, 64
+10: st.d t0, a0, 0
+11: st.d t1, a0, 8
+12: st.d t2, a0, 16
+13: st.d t3, a0, 24
+14: st.d t4, a0, 32
+15: st.d t5, a0, 40
+16: st.d t6, a0, 48
+17: st.d t7, a0, 56
+ addi.d a0, a0, 64
+ bltu a1, a4, .Lloop64
/* copy the remaining bytes */
-17: ld.b t0, a1, 0
-18: st.b t0, a0, 0
- addi.d a0, a0, 1
- addi.d a1, a1, 1
- addi.d a2, a2, -1
- bgt a2, zero, 17b
+.Llt64:
+ addi.d a4, a3, -32
+ bgeu a1, a4, .Llt32
+18: ld.d t0, a1, 0
+19: ld.d t1, a1, 8
+20: ld.d t2, a1, 16
+21: ld.d t3, a1, 24
+ addi.d a1, a1, 32
+22: st.d t0, a0, 0
+23: st.d t1, a0, 8
+24: st.d t2, a0, 16
+25: st.d t3, a0, 24
+ addi.d a0, a0, 32
+
+.Llt32:
+ addi.d a4, a3, -16
+ bgeu a1, a4, .Llt16
+26: ld.d t0, a1, 0
+27: ld.d t1, a1, 8
+ addi.d a1, a1, 16
+28: st.d t0, a0, 0
+29: st.d t1, a0, 8
+ addi.d a0, a0, 16
+
+.Llt16:
+ addi.d a4, a3, -8
+ bgeu a1, a4, .Llt8
+30: ld.d t0, a1, 0
+31: st.d t0, a0, 0
+
+.Llt8:
+32: ld.d t0, a3, -8
+33: st.d t0, a2, -8
/* return */
-19: move a0, a2
+ move a0, zero
+ jr ra
+
+ .align 5
+.Lsmall:
+ pcaddi t0, 8
+ slli.d a3, a2, 5
+ add.d t0, t0, a3
+ jr t0
+
+ .align 5
+ move a0, zero
+ jr ra
+
+ .align 5
+34: ld.b t0, a1, 0
+35: st.b t0, a0, 0
+ move a0, zero
+ jr ra
+
+ .align 5
+36: ld.h t0, a1, 0
+37: st.h t0, a0, 0
+ move a0, zero
+ jr ra
+
+ .align 5
+38: ld.h t0, a1, 0
+39: ld.b t1, a1, 2
+40: st.h t0, a0, 0
+41: st.b t1, a0, 2
+ move a0, zero
+ jr ra
+
+ .align 5
+42: ld.w t0, a1, 0
+43: st.w t0, a0, 0
+ move a0, zero
+ jr ra
+
+ .align 5
+44: ld.w t0, a1, 0
+45: ld.b t1, a1, 4
+46: st.w t0, a0, 0
+47: st.b t1, a0, 4
+ move a0, zero
+ jr ra
+
+ .align 5
+48: ld.w t0, a1, 0
+49: ld.h t1, a1, 4
+50: st.w t0, a0, 0
+51: st.h t1, a0, 4
+ move a0, zero
+ jr ra
+
+ .align 5
+52: ld.w t0, a1, 0
+53: ld.w t1, a1, 3
+54: st.w t0, a0, 0
+55: st.w t1, a0, 3
+ move a0, zero
+ jr ra
+
+ .align 5
+56: ld.d t0, a1, 0
+57: st.d t0, a0, 0
+ move a0, zero
jr ra
/* fixup and ex_table */
+ _asm_extable 0b, .L_fixup_handle_0
_asm_extable 1b, .L_fixup_handle_0
- _asm_extable 2b, .L_fixup_handle_1
- _asm_extable 3b, .L_fixup_handle_2
- _asm_extable 4b, .L_fixup_handle_3
- _asm_extable 5b, .L_fixup_handle_4
- _asm_extable 6b, .L_fixup_handle_5
- _asm_extable 7b, .L_fixup_handle_6
- _asm_extable 8b, .L_fixup_handle_7
+ _asm_extable 2b, .L_fixup_handle_0
+ _asm_extable 3b, .L_fixup_handle_0
+ _asm_extable 4b, .L_fixup_handle_0
+ _asm_extable 5b, .L_fixup_handle_0
+ _asm_extable 6b, .L_fixup_handle_0
+ _asm_extable 7b, .L_fixup_handle_0
+ _asm_extable 8b, .L_fixup_handle_0
_asm_extable 9b, .L_fixup_handle_0
- _asm_extable 10b, .L_fixup_handle_1
- _asm_extable 11b, .L_fixup_handle_2
- _asm_extable 12b, .L_fixup_handle_3
- _asm_extable 13b, .L_fixup_handle_4
- _asm_extable 14b, .L_fixup_handle_5
- _asm_extable 15b, .L_fixup_handle_6
- _asm_extable 16b, .L_fixup_handle_7
- _asm_extable 17b, .L_fixup_handle_0
+ _asm_extable 10b, .L_fixup_handle_0
+ _asm_extable 11b, .L_fixup_handle_1
+ _asm_extable 12b, .L_fixup_handle_2
+ _asm_extable 13b, .L_fixup_handle_3
+ _asm_extable 14b, .L_fixup_handle_4
+ _asm_extable 15b, .L_fixup_handle_5
+ _asm_extable 16b, .L_fixup_handle_6
+ _asm_extable 17b, .L_fixup_handle_7
_asm_extable 18b, .L_fixup_handle_0
+ _asm_extable 19b, .L_fixup_handle_0
+ _asm_extable 20b, .L_fixup_handle_0
+ _asm_extable 21b, .L_fixup_handle_0
+ _asm_extable 22b, .L_fixup_handle_0
+ _asm_extable 23b, .L_fixup_handle_1
+ _asm_extable 24b, .L_fixup_handle_2
+ _asm_extable 25b, .L_fixup_handle_3
+ _asm_extable 26b, .L_fixup_handle_0
+ _asm_extable 27b, .L_fixup_handle_0
+ _asm_extable 28b, .L_fixup_handle_0
+ _asm_extable 29b, .L_fixup_handle_1
+ _asm_extable 30b, .L_fixup_handle_0
+ _asm_extable 31b, .L_fixup_handle_0
+ _asm_extable 32b, .L_fixup_handle_0
+ _asm_extable 33b, .L_fixup_handle_1
+ _asm_extable 34b, .L_fixup_handle_s0
+ _asm_extable 35b, .L_fixup_handle_s0
+ _asm_extable 36b, .L_fixup_handle_s0
+ _asm_extable 37b, .L_fixup_handle_s0
+ _asm_extable 38b, .L_fixup_handle_s0
+ _asm_extable 39b, .L_fixup_handle_s0
+ _asm_extable 40b, .L_fixup_handle_s0
+ _asm_extable 41b, .L_fixup_handle_s2
+ _asm_extable 42b, .L_fixup_handle_s0
+ _asm_extable 43b, .L_fixup_handle_s0
+ _asm_extable 44b, .L_fixup_handle_s0
+ _asm_extable 45b, .L_fixup_handle_s0
+ _asm_extable 46b, .L_fixup_handle_s0
+ _asm_extable 47b, .L_fixup_handle_s4
+ _asm_extable 48b, .L_fixup_handle_s0
+ _asm_extable 49b, .L_fixup_handle_s0
+ _asm_extable 50b, .L_fixup_handle_s0
+ _asm_extable 51b, .L_fixup_handle_s4
+ _asm_extable 52b, .L_fixup_handle_s0
+ _asm_extable 53b, .L_fixup_handle_s0
+ _asm_extable 54b, .L_fixup_handle_s0
+ _asm_extable 55b, .L_fixup_handle_s4
+ _asm_extable 56b, .L_fixup_handle_s0
+ _asm_extable 57b, .L_fixup_handle_s0
SYM_FUNC_END(__copy_user_fast)