summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-05-24 06:59:19 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2022-05-24 06:59:19 +0300
commit67c642e0d9aa927c1340638e472f2467fefd1dbf (patch)
treef8fe1374f35df395b18066a7e4305e7445f8296f /arch
parentfdd8f6585cef1c8c0fac745c1baa687301d55a90 (diff)
parent64d83f06774668081258bd7f3241267239bb9ab2 (diff)
downloadlinux-67c642e0d9aa927c1340638e472f2467fefd1dbf.tar.xz
Merge tag 'csky-for-linus-5.19-rc1' of https://github.com/c-sky/csky-linux
Pull arch/csky updates from Guo Ren: - Three atomic optimizations - memcpy/memcpy_io optimization - Some coding conventions for Kbuild, removing warnings * tag 'csky-for-linus-5.19-rc1' of https://github.com/c-sky/csky-linux: csky: Move $(core-y) into arch/csky/Kbuild csky: Remove unused core-y for dts csky: Remove unused $(dtb-y) from boot/Makefile csky: atomic: Add conditional atomic operations' optimization csky: atomic: Add custom atomic.h implementation csky: atomic: Optimize cmpxchg with acquire & release csky: optimize memcpy_{from,to}io() and memset_io() csky: Add C based string functions csky: Fix versioncheck warnings csky: patch_text: Fixup last cpu should be master csky: fix typos in comments
Diffstat (limited to 'arch')
-rw-r--r--arch/csky/Kbuild2
-rw-r--r--arch/csky/Kconfig8
-rw-r--r--arch/csky/Makefile3
-rw-r--r--arch/csky/abiv1/Makefile2
-rw-r--r--arch/csky/abiv1/memcpy.S347
-rw-r--r--arch/csky/abiv1/strksyms.c6
-rw-r--r--arch/csky/abiv2/Makefile2
-rw-r--r--arch/csky/abiv2/strksyms.c4
-rw-r--r--arch/csky/boot/Makefile1
-rw-r--r--arch/csky/include/asm/atomic.h237
-rw-r--r--arch/csky/include/asm/barrier.h11
-rw-r--r--arch/csky/include/asm/cmpxchg.h64
-rw-r--r--arch/csky/include/asm/io.h12
-rw-r--r--arch/csky/kernel/Makefile2
-rw-r--r--arch/csky/kernel/io.c91
-rw-r--r--arch/csky/kernel/module.c2
-rw-r--r--arch/csky/kernel/probes/kprobes.c2
-rw-r--r--arch/csky/kernel/probes/uprobes.c2
-rw-r--r--arch/csky/kernel/process.c1
-rw-r--r--arch/csky/lib/Makefile3
-rw-r--r--arch/csky/lib/string.c134
-rw-r--r--arch/csky/mm/dma-mapping.c1
22 files changed, 562 insertions, 375 deletions
diff --git a/arch/csky/Kbuild b/arch/csky/Kbuild
index 4e39f7abdeb6..0621eaea4196 100644
--- a/arch/csky/Kbuild
+++ b/arch/csky/Kbuild
@@ -1,4 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
+obj-y += kernel/ mm/
+
# for cleaning
subdir- += boot
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 75ef86605d69..21d72b078eef 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -320,6 +320,14 @@ config HOTPLUG_CPU
controlled through /sys/devices/system/cpu/cpu1/hotplug/target.
Say N if you want to disable CPU hotplug.
+
+config HAVE_EFFICIENT_UNALIGNED_STRING_OPS
+ bool "Enable EFFICIENT_UNALIGNED_STRING_OPS for abiv2"
+ depends on CPU_CK807 || CPU_CK810 || CPU_CK860
+ help
+ Say Y here to enable EFFICIENT_UNALIGNED_STRING_OPS. Some CPU models could
+ deal with unaligned access by hardware.
+
endmenu
source "arch/csky/Kconfig.platforms"
diff --git a/arch/csky/Makefile b/arch/csky/Makefile
index 866805077636..4e1d619fd5c6 100644
--- a/arch/csky/Makefile
+++ b/arch/csky/Makefile
@@ -61,15 +61,12 @@ KBUILD_AFLAGS += $(KBUILD_CFLAGS)
head-y := arch/csky/kernel/head.o
-core-y += arch/csky/kernel/
-core-y += arch/csky/mm/
core-y += arch/csky/$(CSKYABI)/
libs-y += arch/csky/lib/ \
$(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name)
boot := arch/csky/boot
-core-y += $(boot)/dts/
all: zImage
diff --git a/arch/csky/abiv1/Makefile b/arch/csky/abiv1/Makefile
index 601ce3b2fb85..a4b2ade0fc67 100644
--- a/arch/csky/abiv1/Makefile
+++ b/arch/csky/abiv1/Makefile
@@ -4,5 +4,3 @@ obj-y += bswapdi.o
obj-y += bswapsi.o
obj-y += cacheflush.o
obj-y += mmap.o
-obj-y += memcpy.o
-obj-y += strksyms.o
diff --git a/arch/csky/abiv1/memcpy.S b/arch/csky/abiv1/memcpy.S
deleted file mode 100644
index 5078eb5169fa..000000000000
--- a/arch/csky/abiv1/memcpy.S
+++ /dev/null
@@ -1,347 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-
-#include <linux/linkage.h>
-
-.macro GET_FRONT_BITS rx y
-#ifdef __cskyLE__
- lsri \rx, \y
-#else
- lsli \rx, \y
-#endif
-.endm
-
-.macro GET_AFTER_BITS rx y
-#ifdef __cskyLE__
- lsli \rx, \y
-#else
- lsri \rx, \y
-#endif
-.endm
-
-/* void *memcpy(void *dest, const void *src, size_t n); */
-ENTRY(memcpy)
- mov r7, r2
- cmplti r4, 4
- bt .L_copy_by_byte
- mov r6, r2
- andi r6, 3
- cmpnei r6, 0
- jbt .L_dest_not_aligned
- mov r6, r3
- andi r6, 3
- cmpnei r6, 0
- jbt .L_dest_aligned_but_src_not_aligned
-.L0:
- cmplti r4, 16
- jbt .L_aligned_and_len_less_16bytes
- subi sp, 8
- stw r8, (sp, 0)
-.L_aligned_and_len_larger_16bytes:
- ldw r1, (r3, 0)
- ldw r5, (r3, 4)
- ldw r8, (r3, 8)
- stw r1, (r7, 0)
- ldw r1, (r3, 12)
- stw r5, (r7, 4)
- stw r8, (r7, 8)
- stw r1, (r7, 12)
- subi r4, 16
- addi r3, 16
- addi r7, 16
- cmplti r4, 16
- jbf .L_aligned_and_len_larger_16bytes
- ldw r8, (sp, 0)
- addi sp, 8
- cmpnei r4, 0
- jbf .L_return
-
-.L_aligned_and_len_less_16bytes:
- cmplti r4, 4
- bt .L_copy_by_byte
-.L1:
- ldw r1, (r3, 0)
- stw r1, (r7, 0)
- subi r4, 4
- addi r3, 4
- addi r7, 4
- cmplti r4, 4
- jbf .L1
- br .L_copy_by_byte
-
-.L_return:
- rts
-
-.L_copy_by_byte: /* len less than 4 bytes */
- cmpnei r4, 0
- jbf .L_return
-.L4:
- ldb r1, (r3, 0)
- stb r1, (r7, 0)
- addi r3, 1
- addi r7, 1
- decne r4
- jbt .L4
- rts
-
-/*
- * If dest is not aligned, just copying some bytes makes the dest align.
- * Afther that, we judge whether the src is aligned.
- */
-.L_dest_not_aligned:
- mov r5, r3
- rsub r5, r5, r7
- abs r5, r5
- cmplt r5, r4
- bt .L_copy_by_byte
- mov r5, r7
- sub r5, r3
- cmphs r5, r4
- bf .L_copy_by_byte
- mov r5, r6
-.L5:
- ldb r1, (r3, 0) /* makes the dest align. */
- stb r1, (r7, 0)
- addi r5, 1
- subi r4, 1
- addi r3, 1
- addi r7, 1
- cmpnei r5, 4
- jbt .L5
- cmplti r4, 4
- jbt .L_copy_by_byte
- mov r6, r3 /* judge whether the src is aligned. */
- andi r6, 3
- cmpnei r6, 0
- jbf .L0
-
-/* Judge the number of misaligned, 1, 2, 3? */
-.L_dest_aligned_but_src_not_aligned:
- mov r5, r3
- rsub r5, r5, r7
- abs r5, r5
- cmplt r5, r4
- bt .L_copy_by_byte
- bclri r3, 0
- bclri r3, 1
- ldw r1, (r3, 0)
- addi r3, 4
- cmpnei r6, 2
- bf .L_dest_aligned_but_src_not_aligned_2bytes
- cmpnei r6, 3
- bf .L_dest_aligned_but_src_not_aligned_3bytes
-
-.L_dest_aligned_but_src_not_aligned_1byte:
- mov r5, r7
- sub r5, r3
- cmphs r5, r4
- bf .L_copy_by_byte
- cmplti r4, 16
- bf .L11
-.L10: /* If the len is less than 16 bytes */
- GET_FRONT_BITS r1 8
- mov r5, r1
- ldw r6, (r3, 0)
- mov r1, r6
- GET_AFTER_BITS r6 24
- or r5, r6
- stw r5, (r7, 0)
- subi r4, 4
- addi r3, 4
- addi r7, 4
- cmplti r4, 4
- bf .L10
- subi r3, 3
- br .L_copy_by_byte
-.L11:
- subi sp, 16
- stw r8, (sp, 0)
- stw r9, (sp, 4)
- stw r10, (sp, 8)
- stw r11, (sp, 12)
-.L12:
- ldw r5, (r3, 0)
- ldw r11, (r3, 4)
- ldw r8, (r3, 8)
- ldw r9, (r3, 12)
-
- GET_FRONT_BITS r1 8 /* little or big endian? */
- mov r10, r5
- GET_AFTER_BITS r5 24
- or r5, r1
-
- GET_FRONT_BITS r10 8
- mov r1, r11
- GET_AFTER_BITS r11 24
- or r11, r10
-
- GET_FRONT_BITS r1 8
- mov r10, r8
- GET_AFTER_BITS r8 24
- or r8, r1
-
- GET_FRONT_BITS r10 8
- mov r1, r9
- GET_AFTER_BITS r9 24
- or r9, r10
-
- stw r5, (r7, 0)
- stw r11, (r7, 4)
- stw r8, (r7, 8)
- stw r9, (r7, 12)
- subi r4, 16
- addi r3, 16
- addi r7, 16
- cmplti r4, 16
- jbf .L12
- ldw r8, (sp, 0)
- ldw r9, (sp, 4)
- ldw r10, (sp, 8)
- ldw r11, (sp, 12)
- addi sp , 16
- cmplti r4, 4
- bf .L10
- subi r3, 3
- br .L_copy_by_byte
-
-.L_dest_aligned_but_src_not_aligned_2bytes:
- cmplti r4, 16
- bf .L21
-.L20:
- GET_FRONT_BITS r1 16
- mov r5, r1
- ldw r6, (r3, 0)
- mov r1, r6
- GET_AFTER_BITS r6 16
- or r5, r6
- stw r5, (r7, 0)
- subi r4, 4
- addi r3, 4
- addi r7, 4
- cmplti r4, 4
- bf .L20
- subi r3, 2
- br .L_copy_by_byte
- rts
-
-.L21: /* n > 16 */
- subi sp, 16
- stw r8, (sp, 0)
- stw r9, (sp, 4)
- stw r10, (sp, 8)
- stw r11, (sp, 12)
-
-.L22:
- ldw r5, (r3, 0)
- ldw r11, (r3, 4)
- ldw r8, (r3, 8)
- ldw r9, (r3, 12)
-
- GET_FRONT_BITS r1 16
- mov r10, r5
- GET_AFTER_BITS r5 16
- or r5, r1
-
- GET_FRONT_BITS r10 16
- mov r1, r11
- GET_AFTER_BITS r11 16
- or r11, r10
-
- GET_FRONT_BITS r1 16
- mov r10, r8
- GET_AFTER_BITS r8 16
- or r8, r1
-
- GET_FRONT_BITS r10 16
- mov r1, r9
- GET_AFTER_BITS r9 16
- or r9, r10
-
- stw r5, (r7, 0)
- stw r11, (r7, 4)
- stw r8, (r7, 8)
- stw r9, (r7, 12)
- subi r4, 16
- addi r3, 16
- addi r7, 16
- cmplti r4, 16
- jbf .L22
- ldw r8, (sp, 0)
- ldw r9, (sp, 4)
- ldw r10, (sp, 8)
- ldw r11, (sp, 12)
- addi sp, 16
- cmplti r4, 4
- bf .L20
- subi r3, 2
- br .L_copy_by_byte
-
-
-.L_dest_aligned_but_src_not_aligned_3bytes:
- cmplti r4, 16
- bf .L31
-.L30:
- GET_FRONT_BITS r1 24
- mov r5, r1
- ldw r6, (r3, 0)
- mov r1, r6
- GET_AFTER_BITS r6 8
- or r5, r6
- stw r5, (r7, 0)
- subi r4, 4
- addi r3, 4
- addi r7, 4
- cmplti r4, 4
- bf .L30
- subi r3, 1
- br .L_copy_by_byte
-.L31:
- subi sp, 16
- stw r8, (sp, 0)
- stw r9, (sp, 4)
- stw r10, (sp, 8)
- stw r11, (sp, 12)
-.L32:
- ldw r5, (r3, 0)
- ldw r11, (r3, 4)
- ldw r8, (r3, 8)
- ldw r9, (r3, 12)
-
- GET_FRONT_BITS r1 24
- mov r10, r5
- GET_AFTER_BITS r5 8
- or r5, r1
-
- GET_FRONT_BITS r10 24
- mov r1, r11
- GET_AFTER_BITS r11 8
- or r11, r10
-
- GET_FRONT_BITS r1 24
- mov r10, r8
- GET_AFTER_BITS r8 8
- or r8, r1
-
- GET_FRONT_BITS r10 24
- mov r1, r9
- GET_AFTER_BITS r9 8
- or r9, r10
-
- stw r5, (r7, 0)
- stw r11, (r7, 4)
- stw r8, (r7, 8)
- stw r9, (r7, 12)
- subi r4, 16
- addi r3, 16
- addi r7, 16
- cmplti r4, 16
- jbf .L32
- ldw r8, (sp, 0)
- ldw r9, (sp, 4)
- ldw r10, (sp, 8)
- ldw r11, (sp, 12)
- addi sp, 16
- cmplti r4, 4
- bf .L30
- subi r3, 1
- br .L_copy_by_byte
diff --git a/arch/csky/abiv1/strksyms.c b/arch/csky/abiv1/strksyms.c
deleted file mode 100644
index c7ccbb27e8d7..000000000000
--- a/arch/csky/abiv1/strksyms.c
+++ /dev/null
@@ -1,6 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-
-#include <linux/module.h>
-
-EXPORT_SYMBOL(memcpy);
diff --git a/arch/csky/abiv2/Makefile b/arch/csky/abiv2/Makefile
index c561efa5533c..ea8005fe01a8 100644
--- a/arch/csky/abiv2/Makefile
+++ b/arch/csky/abiv2/Makefile
@@ -2,9 +2,11 @@
obj-y += cacheflush.o
obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
obj-y += memcmp.o
+ifeq ($(CONFIG_HAVE_EFFICIENT_UNALIGNED_STRING_OPS), y)
obj-y += memcpy.o
obj-y += memmove.o
obj-y += memset.o
+endif
obj-y += strcmp.o
obj-y += strcpy.o
obj-y += strlen.o
diff --git a/arch/csky/abiv2/strksyms.c b/arch/csky/abiv2/strksyms.c
index 06da723d8202..8d1fd28c6cf9 100644
--- a/arch/csky/abiv2/strksyms.c
+++ b/arch/csky/abiv2/strksyms.c
@@ -3,10 +3,12 @@
#include <linux/module.h>
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_STRING_OPS
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memcmp);
EXPORT_SYMBOL(memmove);
+#endif
+EXPORT_SYMBOL(memcmp);
EXPORT_SYMBOL(strcmp);
EXPORT_SYMBOL(strcpy);
EXPORT_SYMBOL(strlen);
diff --git a/arch/csky/boot/Makefile b/arch/csky/boot/Makefile
index dbc9b1bd72f0..c3cfde28f8e6 100644
--- a/arch/csky/boot/Makefile
+++ b/arch/csky/boot/Makefile
@@ -1,6 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
targets := Image zImage uImage
-targets += $(dtb-y)
$(obj)/Image: vmlinux FORCE
$(call if_changed,objcopy)
diff --git a/arch/csky/include/asm/atomic.h b/arch/csky/include/asm/atomic.h
new file mode 100644
index 000000000000..60406ef9c2bb
--- /dev/null
+++ b/arch/csky/include/asm/atomic.h
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_CSKY_ATOMIC_H
+#define __ASM_CSKY_ATOMIC_H
+
+#ifdef CONFIG_SMP
+#include <asm-generic/atomic64.h>
+
+#include <asm/cmpxchg.h>
+#include <asm/barrier.h>
+
+#define __atomic_acquire_fence() __bar_brarw()
+
+#define __atomic_release_fence() __bar_brwaw()
+
+static __always_inline int arch_atomic_read(const atomic_t *v)
+{
+ return READ_ONCE(v->counter);
+}
+static __always_inline void arch_atomic_set(atomic_t *v, int i)
+{
+ WRITE_ONCE(v->counter, i);
+}
+
+#define ATOMIC_OP(op) \
+static __always_inline \
+void arch_atomic_##op(int i, atomic_t *v) \
+{ \
+ unsigned long tmp; \
+ __asm__ __volatile__ ( \
+ "1: ldex.w %0, (%2) \n" \
+ " " #op " %0, %1 \n" \
+ " stex.w %0, (%2) \n" \
+ " bez %0, 1b \n" \
+ : "=&r" (tmp) \
+ : "r" (i), "r" (&v->counter) \
+ : "memory"); \
+}
+
+ATOMIC_OP(add)
+ATOMIC_OP(sub)
+ATOMIC_OP(and)
+ATOMIC_OP( or)
+ATOMIC_OP(xor)
+
+#undef ATOMIC_OP
+
+#define ATOMIC_FETCH_OP(op) \
+static __always_inline \
+int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
+{ \
+ register int ret, tmp; \
+ __asm__ __volatile__ ( \
+ "1: ldex.w %0, (%3) \n" \
+ " mov %1, %0 \n" \
+ " " #op " %0, %2 \n" \
+ " stex.w %0, (%3) \n" \
+ " bez %0, 1b \n" \
+ : "=&r" (tmp), "=&r" (ret) \
+ : "r" (i), "r"(&v->counter) \
+ : "memory"); \
+ return ret; \
+}
+
+#define ATOMIC_OP_RETURN(op, c_op) \
+static __always_inline \
+int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \
+{ \
+ return arch_atomic_fetch_##op##_relaxed(i, v) c_op i; \
+}
+
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_FETCH_OP(op) \
+ ATOMIC_OP_RETURN(op, c_op)
+
+ATOMIC_OPS(add, +)
+ATOMIC_OPS(sub, -)
+
+#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
+
+#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
+
+#undef ATOMIC_OPS
+#undef ATOMIC_OP_RETURN
+
+#define ATOMIC_OPS(op) \
+ ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS( or)
+ATOMIC_OPS(xor)
+
+#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
+
+#undef ATOMIC_OPS
+
+#undef ATOMIC_FETCH_OP
+
+static __always_inline int
+arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
+{
+ int prev, tmp;
+
+ __asm__ __volatile__ (
+ RELEASE_FENCE
+ "1: ldex.w %0, (%3) \n"
+ " cmpne %0, %4 \n"
+ " bf 2f \n"
+ " mov %1, %0 \n"
+ " add %1, %2 \n"
+ " stex.w %1, (%3) \n"
+ " bez %1, 1b \n"
+ FULL_FENCE
+ "2:\n"
+ : "=&r" (prev), "=&r" (tmp)
+ : "r" (a), "r" (&v->counter), "r" (u)
+ : "memory");
+
+ return prev;
+}
+#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
+
+static __always_inline bool
+arch_atomic_inc_unless_negative(atomic_t *v)
+{
+ int rc, tmp;
+
+ __asm__ __volatile__ (
+ RELEASE_FENCE
+ "1: ldex.w %0, (%2) \n"
+ " movi %1, 0 \n"
+ " blz %0, 2f \n"
+ " movi %1, 1 \n"
+ " addi %0, 1 \n"
+ " stex.w %0, (%2) \n"
+ " bez %0, 1b \n"
+ FULL_FENCE
+ "2:\n"
+ : "=&r" (tmp), "=&r" (rc)
+ : "r" (&v->counter)
+ : "memory");
+
+ return tmp ? true : false;
+
+}
+#define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative
+
+static __always_inline bool
+arch_atomic_dec_unless_positive(atomic_t *v)
+{
+ int rc, tmp;
+
+ __asm__ __volatile__ (
+ RELEASE_FENCE
+ "1: ldex.w %0, (%2) \n"
+ " movi %1, 0 \n"
+ " bhz %0, 2f \n"
+ " movi %1, 1 \n"
+ " subi %0, 1 \n"
+ " stex.w %0, (%2) \n"
+ " bez %0, 1b \n"
+ FULL_FENCE
+ "2:\n"
+ : "=&r" (tmp), "=&r" (rc)
+ : "r" (&v->counter)
+ : "memory");
+
+ return tmp ? true : false;
+}
+#define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive
+
+static __always_inline int
+arch_atomic_dec_if_positive(atomic_t *v)
+{
+ int dec, tmp;
+
+ __asm__ __volatile__ (
+ RELEASE_FENCE
+ "1: ldex.w %0, (%2) \n"
+ " subi %1, %0, 1 \n"
+ " blz %1, 2f \n"
+ " stex.w %1, (%2) \n"
+ " bez %1, 1b \n"
+ FULL_FENCE
+ "2:\n"
+ : "=&r" (dec), "=&r" (tmp)
+ : "r" (&v->counter)
+ : "memory");
+
+ return dec - 1;
+}
+#define arch_atomic_dec_if_positive arch_atomic_dec_if_positive
+
+#define ATOMIC_OP() \
+static __always_inline \
+int arch_atomic_xchg_relaxed(atomic_t *v, int n) \
+{ \
+ return __xchg_relaxed(n, &(v->counter), 4); \
+} \
+static __always_inline \
+int arch_atomic_cmpxchg_relaxed(atomic_t *v, int o, int n) \
+{ \
+ return __cmpxchg_relaxed(&(v->counter), o, n, 4); \
+} \
+static __always_inline \
+int arch_atomic_cmpxchg_acquire(atomic_t *v, int o, int n) \
+{ \
+ return __cmpxchg_acquire(&(v->counter), o, n, 4); \
+} \
+static __always_inline \
+int arch_atomic_cmpxchg(atomic_t *v, int o, int n) \
+{ \
+ return __cmpxchg(&(v->counter), o, n, 4); \
+}
+
+#define ATOMIC_OPS() \
+ ATOMIC_OP()
+
+ATOMIC_OPS()
+
+#define arch_atomic_xchg_relaxed arch_atomic_xchg_relaxed
+#define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed
+#define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire
+#define arch_atomic_cmpxchg arch_atomic_cmpxchg
+
+#undef ATOMIC_OPS
+#undef ATOMIC_OP
+
+#else
+#include <asm-generic/atomic.h>
+#endif
+
+#endif /* __ASM_CSKY_ATOMIC_H */
diff --git a/arch/csky/include/asm/barrier.h b/arch/csky/include/asm/barrier.h
index f4045dd53e17..15de58b10aec 100644
--- a/arch/csky/include/asm/barrier.h
+++ b/arch/csky/include/asm/barrier.h
@@ -37,17 +37,21 @@
* bar.brar
* bar.bwaw
*/
+#define FULL_FENCE ".long 0x842fc000\n"
+#define ACQUIRE_FENCE ".long 0x8427c000\n"
+#define RELEASE_FENCE ".long 0x842ec000\n"
+
#define __bar_brw() asm volatile (".long 0x842cc000\n":::"memory")
#define __bar_br() asm volatile (".long 0x8424c000\n":::"memory")
#define __bar_bw() asm volatile (".long 0x8428c000\n":::"memory")
#define __bar_arw() asm volatile (".long 0x8423c000\n":::"memory")
#define __bar_ar() asm volatile (".long 0x8421c000\n":::"memory")
#define __bar_aw() asm volatile (".long 0x8422c000\n":::"memory")
-#define __bar_brwarw() asm volatile (".long 0x842fc000\n":::"memory")
-#define __bar_brarw() asm volatile (".long 0x8427c000\n":::"memory")
+#define __bar_brwarw() asm volatile (FULL_FENCE:::"memory")
+#define __bar_brarw() asm volatile (ACQUIRE_FENCE:::"memory")
#define __bar_bwarw() asm volatile (".long 0x842bc000\n":::"memory")
#define __bar_brwar() asm volatile (".long 0x842dc000\n":::"memory")
-#define __bar_brwaw() asm volatile (".long 0x842ec000\n":::"memory")
+#define __bar_brwaw() asm volatile (RELEASE_FENCE:::"memory")
#define __bar_brar() asm volatile (".long 0x8425c000\n":::"memory")
#define __bar_brar() asm volatile (".long 0x8425c000\n":::"memory")
#define __bar_bwaw() asm volatile (".long 0x842ac000\n":::"memory")
@@ -56,7 +60,6 @@
#define __smp_rmb() __bar_brar()
#define __smp_wmb() __bar_bwaw()
-#define ACQUIRE_FENCE ".long 0x8427c000\n"
#define __smp_acquire_fence() __bar_brarw()
#define __smp_release_fence() __bar_brwaw()
diff --git a/arch/csky/include/asm/cmpxchg.h b/arch/csky/include/asm/cmpxchg.h
index d1bef11f8dc9..5b8faccd65e4 100644
--- a/arch/csky/include/asm/cmpxchg.h
+++ b/arch/csky/include/asm/cmpxchg.h
@@ -64,15 +64,71 @@ extern void __bad_xchg(void);
#define arch_cmpxchg_relaxed(ptr, o, n) \
(__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
-#define arch_cmpxchg(ptr, o, n) \
+#define __cmpxchg_acquire(ptr, old, new, size) \
({ \
+ __typeof__(ptr) __ptr = (ptr); \
+ __typeof__(new) __new = (new); \
+ __typeof__(new) __tmp; \
+ __typeof__(old) __old = (old); \
+ __typeof__(*(ptr)) __ret; \
+ switch (size) { \
+ case 4: \
+ asm volatile ( \
+ "1: ldex.w %0, (%3) \n" \
+ " cmpne %0, %4 \n" \
+ " bt 2f \n" \
+ " mov %1, %2 \n" \
+ " stex.w %1, (%3) \n" \
+ " bez %1, 1b \n" \
+ ACQUIRE_FENCE \
+ "2: \n" \
+ : "=&r" (__ret), "=&r" (__tmp) \
+ : "r" (__new), "r"(__ptr), "r"(__old) \
+ :); \
+ break; \
+ default: \
+ __bad_xchg(); \
+ } \
+ __ret; \
+})
+
+#define arch_cmpxchg_acquire(ptr, o, n) \
+ (__cmpxchg_acquire((ptr), (o), (n), sizeof(*(ptr))))
+
+#define __cmpxchg(ptr, old, new, size) \
+({ \
+ __typeof__(ptr) __ptr = (ptr); \
+ __typeof__(new) __new = (new); \
+ __typeof__(new) __tmp; \
+ __typeof__(old) __old = (old); \
__typeof__(*(ptr)) __ret; \
- __smp_release_fence(); \
- __ret = arch_cmpxchg_relaxed(ptr, o, n); \
- __smp_acquire_fence(); \
+ switch (size) { \
+ case 4: \
+ asm volatile ( \
+ RELEASE_FENCE \
+ "1: ldex.w %0, (%3) \n" \
+ " cmpne %0, %4 \n" \
+ " bt 2f \n" \
+ " mov %1, %2 \n" \
+ " stex.w %1, (%3) \n" \
+ " bez %1, 1b \n" \
+ FULL_FENCE \
+ "2: \n" \
+ : "=&r" (__ret), "=&r" (__tmp) \
+ : "r" (__new), "r"(__ptr), "r"(__old) \
+ :); \
+ break; \
+ default: \
+ __bad_xchg(); \
+ } \
__ret; \
})
+#define arch_cmpxchg(ptr, o, n) \
+ (__cmpxchg((ptr), (o), (n), sizeof(*(ptr))))
+
+#define arch_cmpxchg_local(ptr, o, n) \
+ (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
#else
#include <asm-generic/cmpxchg.h>
#endif
diff --git a/arch/csky/include/asm/io.h b/arch/csky/include/asm/io.h
index f82654053dc0..4725bb977b0f 100644
--- a/arch/csky/include/asm/io.h
+++ b/arch/csky/include/asm/io.h
@@ -5,7 +5,6 @@
#include <linux/pgtable.h>
#include <linux/types.h>
-#include <linux/version.h>
/*
* I/O memory access primitives. Reads are ordered relative to any
@@ -33,6 +32,17 @@
#endif
/*
+ * String version of I/O memory access operations.
+ */
+extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
+extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
+extern void __memset_io(volatile void __iomem *, int, size_t);
+
+#define memset_io(c,v,l) __memset_io((c),(v),(l))
+#define memcpy_fromio(a,c,l) __memcpy_fromio((a),(c),(l))
+#define memcpy_toio(c,a,l) __memcpy_toio((c),(a),(l))
+
+/*
* I/O memory mapping functions.
*/
#define ioremap_wc(addr, size) \
diff --git a/arch/csky/kernel/Makefile b/arch/csky/kernel/Makefile
index 6c0f36010ed0..4eb41421ca5b 100644
--- a/arch/csky/kernel/Makefile
+++ b/arch/csky/kernel/Makefile
@@ -2,7 +2,7 @@
extra-y := head.o vmlinux.lds
obj-y += entry.o atomic.o signal.o traps.o irq.o time.o vdso.o vdso/
-obj-y += power.o syscall.o syscall_table.o setup.o
+obj-y += power.o syscall.o syscall_table.o setup.o io.o
obj-y += process.o cpu-probe.o ptrace.o stacktrace.o
obj-y += probes/
diff --git a/arch/csky/kernel/io.c b/arch/csky/kernel/io.c
new file mode 100644
index 000000000000..5883f13fa2b1
--- /dev/null
+++ b/arch/csky/kernel/io.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/io.h>
+
+/*
+ * Copy data from IO memory space to "real" memory space.
+ */
+void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
+{
+ while (count && !IS_ALIGNED((unsigned long)from, 4)) {
+ *(u8 *)to = __raw_readb(from);
+ from++;
+ to++;
+ count--;
+ }
+
+ while (count >= 4) {
+ *(u32 *)to = __raw_readl(from);
+ from += 4;
+ to += 4;
+ count -= 4;
+ }
+
+ while (count) {
+ *(u8 *)to = __raw_readb(from);
+ from++;
+ to++;
+ count--;
+ }
+}
+EXPORT_SYMBOL(__memcpy_fromio);
+
+/*
+ * Copy data from "real" memory space to IO memory space.
+ */
+void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
+{
+ while (count && !IS_ALIGNED((unsigned long)to, 4)) {
+ __raw_writeb(*(u8 *)from, to);
+ from++;
+ to++;
+ count--;
+ }
+
+ while (count >= 4) {
+ __raw_writel(*(u32 *)from, to);
+ from += 4;
+ to += 4;
+ count -= 4;
+ }
+
+ while (count) {
+ __raw_writeb(*(u8 *)from, to);
+ from++;
+ to++;
+ count--;
+ }
+}
+EXPORT_SYMBOL(__memcpy_toio);
+
+/*
+ * "memset" on IO memory space.
+ */
+void __memset_io(volatile void __iomem *dst, int c, size_t count)
+{
+ u32 qc = (u8)c;
+
+ qc |= qc << 8;
+ qc |= qc << 16;
+
+ while (count && !IS_ALIGNED((unsigned long)dst, 4)) {
+ __raw_writeb(c, dst);
+ dst++;
+ count--;
+ }
+
+ while (count >= 4) {
+ __raw_writel(qc, dst);
+ dst += 4;
+ count -= 4;
+ }
+
+ while (count) {
+ __raw_writeb(c, dst);
+ dst++;
+ count--;
+ }
+}
+EXPORT_SYMBOL(__memset_io);
diff --git a/arch/csky/kernel/module.c b/arch/csky/kernel/module.c
index 6cd82d69c655..f11b3e573344 100644
--- a/arch/csky/kernel/module.c
+++ b/arch/csky/kernel/module.c
@@ -68,7 +68,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
*location = rel[i].r_addend + sym->st_value;
break;
case R_CSKY_PC32:
- /* Add the value, subtract its postition */
+ /* Add the value, subtract its position */
*location = rel[i].r_addend + sym->st_value
- (uint32_t)location;
break;
diff --git a/arch/csky/kernel/probes/kprobes.c b/arch/csky/kernel/probes/kprobes.c
index 42920f25e73c..34ba684d5962 100644
--- a/arch/csky/kernel/probes/kprobes.c
+++ b/arch/csky/kernel/probes/kprobes.c
@@ -30,7 +30,7 @@ static int __kprobes patch_text_cb(void *priv)
struct csky_insn_patch *param = priv;
unsigned int addr = (unsigned int)param->addr;
- if (atomic_inc_return(&param->cpu_count) == 1) {
+ if (atomic_inc_return(&param->cpu_count) == num_online_cpus()) {
*(u16 *) addr = cpu_to_le16(param->opcode);
dcache_wb_range(addr, addr + 2);
atomic_inc(&param->cpu_count);
diff --git a/arch/csky/kernel/probes/uprobes.c b/arch/csky/kernel/probes/uprobes.c
index 1a9e0961b2b5..2d31a12e46cf 100644
--- a/arch/csky/kernel/probes/uprobes.c
+++ b/arch/csky/kernel/probes/uprobes.c
@@ -102,7 +102,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
struct uprobe_task *utask = current->utask;
/*
- * Task has received a fatal signal, so reset back to probbed
+ * Task has received a fatal signal, so reset back to probed
* address.
*/
instruction_pointer_set(regs, utask->vaddr);
diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c
index 3d0ca22cd0e2..5de04707aa07 100644
--- a/arch/csky/kernel/process.c
+++ b/arch/csky/kernel/process.c
@@ -2,7 +2,6 @@
// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#include <linux/module.h>
-#include <linux/version.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/debug.h>
diff --git a/arch/csky/lib/Makefile b/arch/csky/lib/Makefile
index 7fbdbb2c4d12..d0ce6e2d7ab2 100644
--- a/arch/csky/lib/Makefile
+++ b/arch/csky/lib/Makefile
@@ -1,3 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
lib-y := usercopy.o delay.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+ifneq ($(CONFIG_HAVE_EFFICIENT_UNALIGNED_STRING_OPS), y)
+lib-y += string.o
+endif
diff --git a/arch/csky/lib/string.c b/arch/csky/lib/string.c
new file mode 100644
index 000000000000..d65626fcaeac
--- /dev/null
+++ b/arch/csky/lib/string.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * String functions optimized for hardware which doesn't
+ * handle unaligned memory accesses efficiently.
+ *
+ * Copyright (C) 2021 Matteo Croce
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+
+/* Minimum size for a word copy to be convenient */
+#define BYTES_LONG sizeof(long)
+#define WORD_MASK (BYTES_LONG - 1)
+#define MIN_THRESHOLD (BYTES_LONG * 2)
+
+/* convenience union to avoid cast between different pointer types */
+union types {
+ u8 *as_u8;
+ unsigned long *as_ulong;
+ uintptr_t as_uptr;
+};
+
+union const_types {
+ const u8 *as_u8;
+ unsigned long *as_ulong;
+ uintptr_t as_uptr;
+};
+
+void *memcpy(void *dest, const void *src, size_t count)
+{
+ union const_types s = { .as_u8 = src };
+ union types d = { .as_u8 = dest };
+ int distance = 0;
+
+ if (count < MIN_THRESHOLD)
+ goto copy_remainder;
+
+ /* Copy a byte at time until destination is aligned. */
+ for (; d.as_uptr & WORD_MASK; count--)
+ *d.as_u8++ = *s.as_u8++;
+
+ distance = s.as_uptr & WORD_MASK;
+
+ if (distance) {
+ unsigned long last, next;
+
+ /*
+ * s is distance bytes ahead of d, and d just reached
+ * the alignment boundary. Move s backward to word align it
+ * and shift data to compensate for distance, in order to do
+ * word-by-word copy.
+ */
+ s.as_u8 -= distance;
+
+ next = s.as_ulong[0];
+ for (; count >= BYTES_LONG; count -= BYTES_LONG) {
+ last = next;
+ next = s.as_ulong[1];
+
+ d.as_ulong[0] = last >> (distance * 8) |
+ next << ((BYTES_LONG - distance) * 8);
+
+ d.as_ulong++;
+ s.as_ulong++;
+ }
+
+ /* Restore s with the original offset. */
+ s.as_u8 += distance;
+ } else {
+ /*
+ * If the source and dest lower bits are the same, do a simple
+ * 32/64 bit wide copy.
+ */
+ for (; count >= BYTES_LONG; count -= BYTES_LONG)
+ *d.as_ulong++ = *s.as_ulong++;
+ }
+
+copy_remainder:
+ while (count--)
+ *d.as_u8++ = *s.as_u8++;
+
+ return dest;
+}
+EXPORT_SYMBOL(memcpy);
+
+/*
+ * Simply check if the buffer overlaps an call memcpy() in case,
+ * otherwise do a simple one byte at time backward copy.
+ */
+void *memmove(void *dest, const void *src, size_t count)
+{
+ if (dest < src || src + count <= dest)
+ return memcpy(dest, src, count);
+
+ if (dest > src) {
+ const char *s = src + count;
+ char *tmp = dest + count;
+
+ while (count--)
+ *--tmp = *--s;
+ }
+ return dest;
+}
+EXPORT_SYMBOL(memmove);
+
+void *memset(void *s, int c, size_t count)
+{
+ union types dest = { .as_u8 = s };
+
+ if (count >= MIN_THRESHOLD) {
+ unsigned long cu = (unsigned long)c;
+
+ /* Compose an ulong with 'c' repeated 4/8 times */
+ cu |= cu << 8;
+ cu |= cu << 16;
+ /* Suppress warning on 32 bit machines */
+ cu |= (cu << 16) << 16;
+
+ for (; count && dest.as_uptr & WORD_MASK; count--)
+ *dest.as_u8++ = c;
+
+ /* Copy using the largest size allowed */
+ for (; count >= BYTES_LONG; count -= BYTES_LONG)
+ *dest.as_ulong++ = cu;
+ }
+
+ /* copy the remainder */
+ while (count--)
+ *dest.as_u8++ = c;
+
+ return s;
+}
+EXPORT_SYMBOL(memset);
diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c
index c3a775a7e8f9..82447029feb4 100644
--- a/arch/csky/mm/dma-mapping.c
+++ b/arch/csky/mm/dma-mapping.c
@@ -9,7 +9,6 @@
#include <linux/mm.h>
#include <linux/scatterlist.h>
#include <linux/types.h>
-#include <linux/version.h>
#include <asm/cache.h>
static inline void cache_op(phys_addr_t paddr, size_t size,