From 819671ff849b07b9831b91de879ddc5da4b333d4 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:25 +0100 Subject: syscalls: define and explain goal to not call syscalls in the kernel The syscall entry points to the kernel defined by SYSCALL_DEFINEx() and COMPAT_SYSCALL_DEFINEx() should only be called from userspace through kernel entry points, but not from the kernel itself. This will allow cleanups and optimizations to the entry paths *and* to the parts of the kernel code which currently need to pretend to be userspace in order to make use of syscalls. Signed-off-by: Dominik Brodowski --- Documentation/process/adding-syscalls.rst | 32 +++++++++++++++++++++++++++++++ include/linux/syscalls.h | 7 +++++++ 2 files changed, 39 insertions(+) diff --git a/Documentation/process/adding-syscalls.rst b/Documentation/process/adding-syscalls.rst index 8cc25a06f353..556613744556 100644 --- a/Documentation/process/adding-syscalls.rst +++ b/Documentation/process/adding-syscalls.rst @@ -487,6 +487,38 @@ patchset, for the convenience of reviewers. The man page should be cc'ed to linux-man@vger.kernel.org For more details, see https://www.kernel.org/doc/man-pages/patches.html + +Do not call System Calls in the Kernel +-------------------------------------- + +System calls are, as stated above, interaction points between userspace and +the kernel. Therefore, system call functions such as ``sys_xyzzy()`` or +``compat_sys_xyzzy()`` should only be called from userspace via the syscall +table, but not from elsewhere in the kernel. If the syscall functionality is +useful to be used within the kernel, needs to be shared between an old and a +new syscall, or needs to be shared between a syscall and its compatibility +variant, it should be implemented by means of a "helper" function (such as +``kern_xyzzy()``). This kernel function may then be called within the +syscall stub (``sys_xyzzy()``), the compatibility syscall stub +(``compat_sys_xyzzy()``), and/or other kernel code. + +At least on 64-bit x86, it will be a hard requirement from v4.17 onwards to not +call system call functions in the kernel. It uses a different calling +convention for system calls where ``struct pt_regs`` is decoded on-the-fly in a +syscall wrapper which then hands processing over to the actual syscall function. +This means that only those parameters which are actually needed for a specific +syscall are passed on during syscall entry, instead of filling in six CPU +registers with random user space content all the time (which may cause serious +trouble down the call chain). + +Moreover, rules on how data may be accessed may differ between kernel data and +user data. This is another reason why calling ``sys_xyzzy()`` is generally a +bad idea. + +Exceptions to this rule are only allowed in architecture-specific overrides, +architecture-specific compatibility wrappers, or other code in arch/. + + References and Sources ---------------------- diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a78186d826d7..0526286a0314 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -941,4 +941,11 @@ asmlinkage long sys_pkey_free(int pkey); asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, unsigned mask, struct statx __user *buffer); + +/* + * Kernel code should not call syscalls (i.e., sys_xyzyyz()) directly. + * Instead, use one of the functions which work equivalently, such as + * the ksys_xyzyyz() functions prototyped below. + */ + #endif -- cgit v1.2.3 From d300b610812f3c10d146db4c18f98eba38834c70 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:26 +0100 Subject: kernel: use kernel_wait4() instead of sys_wait4() All call sites of sys_wait4() set *rusage to NULL. Therefore, there is no need for the copy_to_user() handling of *rusage, and we can use kernel_wait4() directly. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Acked-by: Luis R. Rodriguez Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- kernel/exit.c | 2 +- kernel/pid_namespace.c | 6 +++--- kernel/umh.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index 995453d9fb55..c3c7ac560114 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1691,7 +1691,7 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, */ SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options) { - return sys_wait4(pid, stat_addr, options, NULL); + return kernel_wait4(pid, stat_addr, options, NULL); } #endif diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 0b53eef7d34b..93b57f026688 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -242,16 +242,16 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) /* * Reap the EXIT_ZOMBIE children we had before we ignored SIGCHLD. - * sys_wait4() will also block until our children traced from the + * kernel_wait4() will also block until our children traced from the * parent namespace are detached and become EXIT_DEAD. */ do { clear_thread_flag(TIF_SIGPENDING); - rc = sys_wait4(-1, NULL, __WALL, NULL); + rc = kernel_wait4(-1, NULL, __WALL, NULL); } while (rc != -ECHILD); /* - * sys_wait4() above can't reap the EXIT_DEAD children but we do not + * kernel_wait4() above can't reap the EXIT_DEAD children but we do not * really care, we could reparent them to the global init. We could * exit and reap ->child_reaper even if it is not the last thread in * this pid_ns, free_pid(pid_allocated == 0) calls proc_cleanup_work(), diff --git a/kernel/umh.c b/kernel/umh.c index 18e5fa4b0e71..f76b3ff876cf 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -118,7 +118,7 @@ static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info) { pid_t pid; - /* If SIGCLD is ignored sys_wait4 won't populate the status. */ + /* If SIGCLD is ignored kernel_wait4 won't populate the status. */ kernel_sigaction(SIGCHLD, SIG_DFL); pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD); if (pid < 0) { @@ -135,7 +135,7 @@ static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info) * * Thus the __user pointer cast is valid here. */ - sys_wait4(pid, (int __user *)&ret, 0, NULL); + kernel_wait4(pid, (int __user *)&ret, 0, NULL); /* * If ret is 0, either call_usermodehelper_exec_async failed and -- cgit v1.2.3 From d53238cd51a80f6f2e5b9d64830c62e2086787bd Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:37 +0100 Subject: kernel: open-code sys_rt_sigpending() in sys_sigpending() A similar but not fully equivalent code path is already open-coded three times (in sys_rt_sigpending and in the two compat stubs), so do it a fourth time here. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 2 +- kernel/signal.c | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0526286a0314..a63e21e7a3af 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -288,7 +288,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data); asmlinkage long sys_personality(unsigned int personality); -asmlinkage long sys_sigpending(old_sigset_t __user *set); +asmlinkage long sys_sigpending(old_sigset_t __user *uset); asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set, old_sigset_t __user *oset); asmlinkage long sys_sigaltstack(const struct sigaltstack __user *uss, diff --git a/kernel/signal.c b/kernel/signal.c index c6e4c83dc090..985c61749bcf 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3629,11 +3629,20 @@ int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp) /** * sys_sigpending - examine pending signals - * @set: where mask of pending signal is returned + * @uset: where mask of pending signal is returned */ -SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set) +SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, uset) { - return sys_rt_sigpending((sigset_t __user *)set, sizeof(old_sigset_t)); + sigset_t set; + int err; + + if (sizeof(old_sigset_t) > sizeof(*uset)) + return -EINVAL; + + err = do_sigpending(&set); + if (!err && copy_to_user(uset, &set, sizeof(old_sigset_t))) + err = -EFAULT; + return err; } #ifdef CONFIG_COMPAT -- cgit v1.2.3 From 6b27aef09fea32b805a8c81287b1bb80362dadb0 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 17 Mar 2018 15:18:30 +0100 Subject: kexec: call do_kexec_load() in compat syscall directly do_kexec_load() can be called directly by compat_sys_kexec() as long as the same parameters checks are completed which are currently handled (also) by sys_kexec(). Therefore, move those to kexec_load_check(), call that newly introduced helper function from both sys_kexec() and compat_sys_kexec(), and duplicate the remaining code from sys_kexec() in compat_sys_kexec(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Eric Biederman Cc: kexec@lists.infradead.org Signed-off-by: Dominik Brodowski --- kernel/kexec.c | 52 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/kernel/kexec.c b/kernel/kexec.c index e62ec4dc6620..aed8fb2564b3 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -192,11 +192,9 @@ out: * that to happen you need to do that yourself. */ -SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, - struct kexec_segment __user *, segments, unsigned long, flags) +static inline int kexec_load_check(unsigned long nr_segments, + unsigned long flags) { - int result; - /* We only trust the superuser with rebooting the system. */ if (!capable(CAP_SYS_BOOT) || kexec_load_disabled) return -EPERM; @@ -208,17 +206,29 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK)) return -EINVAL; - /* Verify we are on the appropriate architecture */ - if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) && - ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) - return -EINVAL; - /* Put an artificial cap on the number * of segments passed to kexec_load. */ if (nr_segments > KEXEC_SEGMENT_MAX) return -EINVAL; + return 0; +} + +SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, + struct kexec_segment __user *, segments, unsigned long, flags) +{ + int result; + + result = kexec_load_check(nr_segments, flags); + if (result) + return result; + + /* Verify we are on the appropriate architecture */ + if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) && + ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) + return -EINVAL; + /* Because we write directly to the reserved memory * region when loading crash kernels we need a mutex here to * prevent multiple crash kernels from attempting to load @@ -247,15 +257,16 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, struct kexec_segment out, __user *ksegments; unsigned long i, result; + result = kexec_load_check(nr_segments, flags); + if (result) + return result; + /* Don't allow clients that don't understand the native * architecture to do anything. */ if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT) return -EINVAL; - if (nr_segments > KEXEC_SEGMENT_MAX) - return -EINVAL; - ksegments = compat_alloc_user_space(nr_segments * sizeof(out)); for (i = 0; i < nr_segments; i++) { result = copy_from_user(&in, &segments[i], sizeof(in)); @@ -272,6 +283,21 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, return -EFAULT; } - return sys_kexec_load(entry, nr_segments, ksegments, flags); + /* Because we write directly to the reserved memory + * region when loading crash kernels we need a mutex here to + * prevent multiple crash kernels from attempting to load + * simultaneously, and to prevent a crash kernel from loading + * over the top of a in use crash kernel. + * + * KISS: always take the mutex. + */ + if (!mutex_trylock(&kexec_mutex)) + return -EBUSY; + + result = do_kexec_load(entry, nr_segments, ksegments, flags); + + mutex_unlock(&kexec_mutex); + + return result; } #endif -- cgit v1.2.3 From 2de0db992de189fccc83fed57c30875144821491 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:26 +0100 Subject: mm: use do_futex() instead of sys_futex() in mm_release() sys_futex() is a wrapper to do_futex() which does not modify any values here: - uaddr, val and val3 are kept the same - op is masked with FUTEX_CMD_MASK, but is always set to FUTEX_WAKE. Therefore, val2 is always 0. - as utime is set to NULL, *timeout is NULL This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Darren Hart Cc: Andrew Morton Reviewed-by: Thomas Gleixner Signed-off-by: Dominik Brodowski --- include/linux/futex.h | 13 ++++++++++--- kernel/fork.c | 4 ++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/include/linux/futex.h b/include/linux/futex.h index c0fb9a24bbd2..821ae502d3d8 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -9,9 +9,6 @@ struct inode; struct mm_struct; struct task_struct; -long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, - u32 __user *uaddr2, u32 val2, u32 val3); - extern int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi); @@ -55,6 +52,9 @@ union futex_key { #ifdef CONFIG_FUTEX extern void exit_robust_list(struct task_struct *curr); + +long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, + u32 __user *uaddr2, u32 val2, u32 val3); #ifdef CONFIG_HAVE_FUTEX_CMPXCHG #define futex_cmpxchg_enabled 1 #else @@ -64,6 +64,13 @@ extern int futex_cmpxchg_enabled; static inline void exit_robust_list(struct task_struct *curr) { } + +static inline long do_futex(u32 __user *uaddr, int op, u32 val, + ktime_t *timeout, u32 __user *uaddr2, + u32 val2, u32 val3) +{ + return -EINVAL; +} #endif #ifdef CONFIG_FUTEX_PI diff --git a/kernel/fork.c b/kernel/fork.c index e5d9d405ae4e..b1e031aac9db 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1198,8 +1198,8 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) * not set up a proper pointer then tough luck. */ put_user(0, tsk->clear_child_tid); - sys_futex(tsk->clear_child_tid, FUTEX_WAKE, - 1, NULL, NULL, 0); + do_futex(tsk->clear_child_tid, FUTEX_WAKE, + 1, NULL, NULL, 0, 0); } tsk->clear_child_tid = NULL; } -- cgit v1.2.3 From 37db219ef9ccff280c83f10e5d7c1e5a1b6abc1b Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 18 Mar 2018 08:11:26 +0100 Subject: x86: use _do_fork() in compat_sys_x86_clone() It is trivial to directly call _do_fork() instead of the sys_clone() syscall in compat_sys_x86_clone(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Ingo Molnar Cc: Jiri Slaby Cc: x86@kernel.org Reviewed-by: Thomas Gleixner Signed-off-by: Dominik Brodowski --- arch/x86/ia32/sys_ia32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 6512498bbef6..aa024a2dd34f 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -241,6 +242,6 @@ COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags, unsigned long, newsp, int __user *, parent_tidptr, unsigned long, tls_val, int __user *, child_tidptr) { - return sys_clone(clone_flags, newsp, parent_tidptr, child_tidptr, + return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, tls_val); } -- cgit v1.2.3 From b51d3cdf44d5ca9000de1e3d64551337d67b1cad Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 18 Mar 2018 08:03:01 +0100 Subject: x86: remove compat_sys_x86_waitpid() compat_sys_x86_waitpid() is not needed, as it takes the same parameters (int, *int, int) as the native syscall. Suggested-by: Al Viro Cc: Ingo Molnar Cc: Jiri Slaby Cc: x86@kernel.org Reviewed-by: Thomas Gleixner Signed-off-by: Dominik Brodowski --- arch/x86/entry/syscalls/syscall_32.tbl | 2 +- arch/x86/ia32/sys_ia32.c | 6 ------ arch/x86/include/asm/sys_ia32.h | 3 --- 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 2a5e99cff859..ef6edaf285cd 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -13,7 +13,7 @@ 4 i386 write sys_write 5 i386 open sys_open compat_sys_open 6 i386 close sys_close -7 i386 waitpid sys_waitpid compat_sys_x86_waitpid +7 i386 waitpid sys_waitpid 8 i386 creat sys_creat 9 i386 link sys_link 10 i386 unlink sys_unlink diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index aa024a2dd34f..23a5260eae67 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -169,12 +169,6 @@ COMPAT_SYSCALL_DEFINE1(x86_mmap, struct mmap_arg_struct32 __user *, arg) a.offset>>PAGE_SHIFT); } -COMPAT_SYSCALL_DEFINE3(x86_waitpid, compat_pid_t, pid, unsigned int __user *, - stat_addr, int, options) -{ - return compat_sys_wait4(pid, stat_addr, options, NULL); -} - /* warning: next two assume little endian */ COMPAT_SYSCALL_DEFINE5(x86_pread, unsigned int, fd, char __user *, ubuf, u32, count, u32, poslo, u32, poshi) diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 906794aa034e..2ee6e3b96656 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -35,9 +35,6 @@ asmlinkage long compat_sys_x86_fstatat(unsigned int, const char __user *, struct mmap_arg_struct32; asmlinkage long compat_sys_x86_mmap(struct mmap_arg_struct32 __user *); -asmlinkage long compat_sys_x86_waitpid(compat_pid_t, unsigned int __user *, - int); - asmlinkage long compat_sys_x86_pread(unsigned int, char __user *, u32, u32, u32); asmlinkage long compat_sys_x86_pwrite(unsigned int, const char __user *, u32, -- cgit v1.2.3 From 7a09e1eb9c1e5179797e0f3341ba7315c7626a0c Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:10:06 +0100 Subject: net: socket: add __sys_recvfrom() helper; remove in-kernel call to syscall Using the net-internal helper __sys_recvfrom() allows us to avoid the internal calls to the sys_recvfrom() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 6 ++++++ net/compat.c | 3 ++- net/socket.c | 21 +++++++++++++-------- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index 9286a5a8c60c..40cc93b91628 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -353,4 +353,10 @@ extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen unsigned int flags, struct timespec *timeout); extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags); + +/* helpers which do the actual work for syscalls */ +extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, + unsigned int flags, struct sockaddr __user *addr, + int __user *addr_len); + #endif /* _LINUX_SOCKET_H */ diff --git a/net/compat.c b/net/compat.c index 22381719718c..2d8186c277b2 100644 --- a/net/compat.c +++ b/net/compat.c @@ -760,7 +760,8 @@ COMPAT_SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, buf, compat_size_t, len unsigned int, flags, struct sockaddr __user *, addr, int __user *, addrlen) { - return sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen); + return __sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, + addrlen); } COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, diff --git a/net/socket.c b/net/socket.c index a93c99b518ca..712d99d8680f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1767,10 +1767,8 @@ SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len, * sender. We verify the buffers are writable and if needed move the * sender address from kernel to user space. */ - -SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, - unsigned int, flags, struct sockaddr __user *, addr, - int __user *, addr_len) +int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags, + struct sockaddr __user *addr, int __user *addr_len) { struct socket *sock; struct iovec iov; @@ -1810,6 +1808,13 @@ out: return err; } +SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, + unsigned int, flags, struct sockaddr __user *, addr, + int __user *, addr_len) +{ + return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len); +} + /* * Receive a datagram from a socket. */ @@ -1817,7 +1822,7 @@ out: SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size, unsigned int, flags) { - return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); + return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); } /* @@ -2486,9 +2491,9 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) err = sys_recv(a0, (void __user *)a1, a[2], a[3]); break; case SYS_RECVFROM: - err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], - (struct sockaddr __user *)a[4], - (int __user *)a[5]); + err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3], + (struct sockaddr __user *)a[4], + (int __user *)a[5]); break; case SYS_SHUTDOWN: err = sys_shutdown(a0, a1); -- cgit v1.2.3 From 211b634b7f1ebc3436d5e8a34810a8eaa1f269d9 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:18:52 +0100 Subject: net: socket: add __sys_sendto() helper; remove in-kernel call to syscall Using the net-internal helper __sys_sendto() allows us to avoid the internal calls to the sys_sendto() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 3 +++ net/compat.c | 3 ++- net/socket.c | 19 ++++++++++++------- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index 40cc93b91628..54b85abc7265 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -358,5 +358,8 @@ extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags, struct sockaddr __user *addr, int __user *addr_len); +extern int __sys_sendto(int fd, void __user *buff, size_t len, + unsigned int flags, struct sockaddr __user *addr, + int addr_len); #endif /* _LINUX_SOCKET_H */ diff --git a/net/compat.c b/net/compat.c index 2d8186c277b2..fc82982d9b84 100644 --- a/net/compat.c +++ b/net/compat.c @@ -838,7 +838,8 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) ret = sys_send(a0, compat_ptr(a1), a[2], a[3]); break; case SYS_SENDTO: - ret = sys_sendto(a0, compat_ptr(a1), a[2], a[3], compat_ptr(a[4]), a[5]); + ret = __sys_sendto(a0, compat_ptr(a1), a[2], a[3], + compat_ptr(a[4]), a[5]); break; case SYS_RECV: ret = compat_sys_recv(a0, compat_ptr(a1), a[2], a[3]); diff --git a/net/socket.c b/net/socket.c index 712d99d8680f..3f037a21ba5e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1711,10 +1711,8 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, * space and check the user space data area is readable before invoking * the protocol. */ - -SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, - unsigned int, flags, struct sockaddr __user *, addr, - int, addr_len) +int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags, + struct sockaddr __user *addr, int addr_len) { struct socket *sock; struct sockaddr_storage address; @@ -1752,6 +1750,13 @@ out: return err; } +SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, + unsigned int, flags, struct sockaddr __user *, addr, + int, addr_len) +{ + return __sys_sendto(fd, buff, len, flags, addr, addr_len); +} + /* * Send a datagram down a socket. */ @@ -1759,7 +1764,7 @@ out: SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len, unsigned int, flags) { - return sys_sendto(fd, buff, len, flags, NULL, 0); + return __sys_sendto(fd, buff, len, flags, NULL, 0); } /* @@ -2484,8 +2489,8 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) err = sys_send(a0, (void __user *)a1, a[2], a[3]); break; case SYS_SENDTO: - err = sys_sendto(a0, (void __user *)a1, a[2], a[3], - (struct sockaddr __user *)a[4], a[5]); + err = __sys_sendto(a0, (void __user *)a1, a[2], a[3], + (struct sockaddr __user *)a[4], a[5]); break; case SYS_RECV: err = sys_recv(a0, (void __user *)a1, a[2], a[3]); -- cgit v1.2.3 From 4541e80560ca56d63348f40f2e34b044a5eb3dd7 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:24:23 +0100 Subject: net: socket: add __sys_accept4() helper; remove in-kernel call to syscall Using the net-internal helper __sys_accept4() allows us to avoid the internal calls to the sys_accept4() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 2 ++ net/compat.c | 4 ++-- net/socket.c | 20 +++++++++++++------- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index 54b85abc7265..6a9840271676 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -361,5 +361,7 @@ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, extern int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags, struct sockaddr __user *addr, int addr_len); +extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags); #endif /* _LINUX_SOCKET_H */ diff --git a/net/compat.c b/net/compat.c index fc82982d9b84..0ff9f7451b6f 100644 --- a/net/compat.c +++ b/net/compat.c @@ -823,7 +823,7 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) ret = sys_listen(a0, a1); break; case SYS_ACCEPT: - ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0); + ret = __sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0); break; case SYS_GETSOCKNAME: ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2])); @@ -873,7 +873,7 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) compat_ptr(a[4])); break; case SYS_ACCEPT4: - ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]); + ret = __sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]); break; default: ret = -EINVAL; diff --git a/net/socket.c b/net/socket.c index 3f037a21ba5e..45f6ea0d57a5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1519,8 +1519,8 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog) * clean when we restucture accept also. */ -SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, - int __user *, upeer_addrlen, int, flags) +int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags) { struct socket *sock, *newsock; struct file *newfile; @@ -1599,10 +1599,16 @@ out_fd: goto out_put; } +SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, + int __user *, upeer_addrlen, int, flags) +{ + return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags); +} + SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr, int __user *, upeer_addrlen) { - return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0); + return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0); } /* @@ -2469,8 +2475,8 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) err = sys_listen(a0, a1); break; case SYS_ACCEPT: - err = sys_accept4(a0, (struct sockaddr __user *)a1, - (int __user *)a[2], 0); + err = __sys_accept4(a0, (struct sockaddr __user *)a1, + (int __user *)a[2], 0); break; case SYS_GETSOCKNAME: err = @@ -2525,8 +2531,8 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) (struct timespec __user *)a[4]); break; case SYS_ACCEPT4: - err = sys_accept4(a0, (struct sockaddr __user *)a1, - (int __user *)a[2], a[3]); + err = __sys_accept4(a0, (struct sockaddr __user *)a1, + (int __user *)a[2], a[3]); break; default: err = -EINVAL; -- cgit v1.2.3 From 9d6a15c3f23bcf0b0e6c6efd7d19f52d960a8697 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:29:43 +0100 Subject: net: socket: add __sys_socket() helper; remove in-kernel call to syscall Using the net-internal helper __sys_socket() allows us to avoid the internal calls to the sys_socket() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 1 + net/compat.c | 2 +- net/socket.c | 9 +++++++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index 6a9840271676..f8d040434a13 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -363,5 +363,6 @@ extern int __sys_sendto(int fd, void __user *buff, size_t len, int addr_len); extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); +extern int __sys_socket(int family, int type, int protocol); #endif /* _LINUX_SOCKET_H */ diff --git a/net/compat.c b/net/compat.c index 0ff9f7451b6f..5b3b74c5812e 100644 --- a/net/compat.c +++ b/net/compat.c @@ -811,7 +811,7 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) switch (call) { case SYS_SOCKET: - ret = sys_socket(a0, a1, a[2]); + ret = __sys_socket(a0, a1, a[2]); break; case SYS_BIND: ret = sys_bind(a0, compat_ptr(a1), a[2]); diff --git a/net/socket.c b/net/socket.c index 45f6ea0d57a5..07f379e50def 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1332,7 +1332,7 @@ int sock_create_kern(struct net *net, int family, int type, int protocol, struct } EXPORT_SYMBOL(sock_create_kern); -SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) +int __sys_socket(int family, int type, int protocol) { int retval; struct socket *sock; @@ -1359,6 +1359,11 @@ SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); } +SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) +{ + return __sys_socket(family, type, protocol); +} + /* * Create a pair of connected sockets. */ @@ -2463,7 +2468,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) switch (call) { case SYS_SOCKET: - err = sys_socket(a0, a1, a[2]); + err = __sys_socket(a0, a1, a[2]); break; case SYS_BIND: err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]); -- cgit v1.2.3 From a87d35d87a3e4f2a0b0968d1f06703c909138b62 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:33:09 +0100 Subject: net: socket: add __sys_bind() helper; remove in-kernel call to syscall Using the net-internal helper __sys_bind() allows us to avoid the internal calls to the sys_bind() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 1 + net/compat.c | 2 +- net/socket.c | 9 +++++++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index f8d040434a13..e9cee272da13 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -364,5 +364,6 @@ extern int __sys_sendto(int fd, void __user *buff, size_t len, extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); extern int __sys_socket(int family, int type, int protocol); +extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); #endif /* _LINUX_SOCKET_H */ diff --git a/net/compat.c b/net/compat.c index 5b3b74c5812e..bba555b1d863 100644 --- a/net/compat.c +++ b/net/compat.c @@ -814,7 +814,7 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) ret = __sys_socket(a0, a1, a[2]); break; case SYS_BIND: - ret = sys_bind(a0, compat_ptr(a1), a[2]); + ret = __sys_bind(a0, compat_ptr(a1), a[2]); break; case SYS_CONNECT: ret = sys_connect(a0, compat_ptr(a1), a[2]); diff --git a/net/socket.c b/net/socket.c index 07f379e50def..291cdae97341 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1462,7 +1462,7 @@ out: * the protocol layer (having also checked the address is ok). */ -SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) +int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) { struct socket *sock; struct sockaddr_storage address; @@ -1485,6 +1485,11 @@ SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) return err; } +SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) +{ + return __sys_bind(fd, umyaddr, addrlen); +} + /* * Perform a listen. Basically, we allow the protocol to do anything * necessary for a listen, and if that works, we mark the socket as @@ -2471,7 +2476,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) err = __sys_socket(a0, a1, a[2]); break; case SYS_BIND: - err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]); + err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]); break; case SYS_CONNECT: err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); -- cgit v1.2.3 From 1387c2c2f988f8180c6189d5083eaeeb8f120d44 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:35:09 +0100 Subject: net: socket: add __sys_connect() helper; remove in-kernel call to syscall Using the net-internal helper __sys_connect() allows us to avoid the internal calls to the sys_connect() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 2 ++ net/compat.c | 2 +- net/socket.c | 11 ++++++++--- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index e9cee272da13..7daa344d7320 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -365,5 +365,7 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); extern int __sys_socket(int family, int type, int protocol); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); +extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, + int addrlen); #endif /* _LINUX_SOCKET_H */ diff --git a/net/compat.c b/net/compat.c index bba555b1d863..7ab6352268f3 100644 --- a/net/compat.c +++ b/net/compat.c @@ -817,7 +817,7 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) ret = __sys_bind(a0, compat_ptr(a1), a[2]); break; case SYS_CONNECT: - ret = sys_connect(a0, compat_ptr(a1), a[2]); + ret = __sys_connect(a0, compat_ptr(a1), a[2]); break; case SYS_LISTEN: ret = sys_listen(a0, a1); diff --git a/net/socket.c b/net/socket.c index 291cdae97341..64bdfdf6c6e7 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1633,8 +1633,7 @@ SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr, * include the -EINPROGRESS status for such sockets. */ -SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, - int, addrlen) +int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen) { struct socket *sock; struct sockaddr_storage address; @@ -1660,6 +1659,12 @@ out: return err; } +SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, + int, addrlen) +{ + return __sys_connect(fd, uservaddr, addrlen); +} + /* * Get the local address ('name') of a socket object. Move the obtained * name to user space. @@ -2479,7 +2484,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]); break; case SYS_CONNECT: - err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); + err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]); break; case SYS_LISTEN: err = sys_listen(a0, a1); -- cgit v1.2.3 From 25e290eed9c653cc90ac675d64b30b66cffce82f Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:36:54 +0100 Subject: net: socket: add __sys_listen() helper; remove in-kernel call to syscall Using the net-internal helper __sys_listen() allows us to avoid the internal calls to the sys_listen() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 1 + net/compat.c | 2 +- net/socket.c | 9 +++++++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index 7daa344d7320..7e37af25509d 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -367,5 +367,6 @@ extern int __sys_socket(int family, int type, int protocol); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen); +extern int __sys_listen(int fd, int backlog); #endif /* _LINUX_SOCKET_H */ diff --git a/net/compat.c b/net/compat.c index 7ab6352268f3..c80cb973f383 100644 --- a/net/compat.c +++ b/net/compat.c @@ -820,7 +820,7 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) ret = __sys_connect(a0, compat_ptr(a1), a[2]); break; case SYS_LISTEN: - ret = sys_listen(a0, a1); + ret = __sys_listen(a0, a1); break; case SYS_ACCEPT: ret = __sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0); diff --git a/net/socket.c b/net/socket.c index 64bdfdf6c6e7..67d9d70a4734 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1496,7 +1496,7 @@ SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) * ready for listening. */ -SYSCALL_DEFINE2(listen, int, fd, int, backlog) +int __sys_listen(int fd, int backlog) { struct socket *sock; int err, fput_needed; @@ -1517,6 +1517,11 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog) return err; } +SYSCALL_DEFINE2(listen, int, fd, int, backlog) +{ + return __sys_listen(fd, backlog); +} + /* * For accept, we attempt to create a new socket, set up the link * with the client, wake up the client, then return the new @@ -2487,7 +2492,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]); break; case SYS_LISTEN: - err = sys_listen(a0, a1); + err = __sys_listen(a0, a1); break; case SYS_ACCEPT: err = __sys_accept4(a0, (struct sockaddr __user *)a1, -- cgit v1.2.3 From 8882a107b3062c6222cdbeadb284ea054ae50a3f Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:43:14 +0100 Subject: net: socket: add __sys_getsockname() helper; remove in-kernel call to syscall Using the net-internal helper __sys_getsockname() allows us to avoid the internal calls to the sys_getsockname() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 2 ++ net/compat.c | 2 +- net/socket.c | 14 ++++++++++---- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index 7e37af25509d..ef0226a61b03 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -368,5 +368,7 @@ extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen); extern int __sys_listen(int fd, int backlog); +extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, + int __user *usockaddr_len); #endif /* _LINUX_SOCKET_H */ diff --git a/net/compat.c b/net/compat.c index c80cb973f383..efd28d02608c 100644 --- a/net/compat.c +++ b/net/compat.c @@ -826,7 +826,7 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) ret = __sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0); break; case SYS_GETSOCKNAME: - ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2])); + ret = __sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2])); break; case SYS_GETPEERNAME: ret = sys_getpeername(a0, compat_ptr(a1), compat_ptr(a[2])); diff --git a/net/socket.c b/net/socket.c index 67d9d70a4734..b61e0d20f37b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1675,8 +1675,8 @@ SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, * name to user space. */ -SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, - int __user *, usockaddr_len) +int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, + int __user *usockaddr_len) { struct socket *sock; struct sockaddr_storage address; @@ -1701,6 +1701,12 @@ out: return err; } +SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, + int __user *, usockaddr_len) +{ + return __sys_getsockname(fd, usockaddr, usockaddr_len); +} + /* * Get the remote address ('name') of a socket object. Move the obtained * name to user space. @@ -2500,8 +2506,8 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) break; case SYS_GETSOCKNAME: err = - sys_getsockname(a0, (struct sockaddr __user *)a1, - (int __user *)a[2]); + __sys_getsockname(a0, (struct sockaddr __user *)a1, + (int __user *)a[2]); break; case SYS_GETPEERNAME: err = -- cgit v1.2.3 From b21c8f838a0e5b84c0d78fd9ed6b9f2cf97afbe9 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:47:00 +0100 Subject: net: socket: add __sys_getpeername() helper; remove in-kernel call to syscall Using the net-internal helper __sys_getpeername() allows us to avoid the internal calls to the sys_getpeername() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 2 ++ net/compat.c | 2 +- net/socket.c | 14 ++++++++++---- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index ef0226a61b03..9ba003e92fea 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -370,5 +370,7 @@ extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, extern int __sys_listen(int fd, int backlog); extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); +extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, + int __user *usockaddr_len); #endif /* _LINUX_SOCKET_H */ diff --git a/net/compat.c b/net/compat.c index efd28d02608c..74017f618eb1 100644 --- a/net/compat.c +++ b/net/compat.c @@ -829,7 +829,7 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) ret = __sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2])); break; case SYS_GETPEERNAME: - ret = sys_getpeername(a0, compat_ptr(a1), compat_ptr(a[2])); + ret = __sys_getpeername(a0, compat_ptr(a1), compat_ptr(a[2])); break; case SYS_SOCKETPAIR: ret = sys_socketpair(a0, a1, a[2], compat_ptr(a[3])); diff --git a/net/socket.c b/net/socket.c index b61e0d20f37b..007fb9483279 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1712,8 +1712,8 @@ SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, * name to user space. */ -SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, - int __user *, usockaddr_len) +int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, + int __user *usockaddr_len) { struct socket *sock; struct sockaddr_storage address; @@ -1738,6 +1738,12 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, return err; } +SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, + int __user *, usockaddr_len) +{ + return __sys_getpeername(fd, usockaddr, usockaddr_len); +} + /* * Send a datagram to a given address. We move the address into kernel * space and check the user space data area is readable before invoking @@ -2511,8 +2517,8 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) break; case SYS_GETPEERNAME: err = - sys_getpeername(a0, (struct sockaddr __user *)a1, - (int __user *)a[2]); + __sys_getpeername(a0, (struct sockaddr __user *)a1, + (int __user *)a[2]); break; case SYS_SOCKETPAIR: err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]); -- cgit v1.2.3 From 6debc8d834ebc73566e5255c565c8fb307be22c5 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:49:23 +0100 Subject: net: socket: add __sys_socketpair() helper; remove in-kernel call to syscall Using the net-internal helper __sys_socketpair() allows us to avoid the internal calls to the sys_socketpair() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 2 ++ net/compat.c | 2 +- net/socket.c | 11 ++++++++--- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index 9ba003e92fea..dbdddf0d079e 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -372,5 +372,7 @@ extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); +extern int __sys_socketpair(int family, int type, int protocol, + int __user *usockvec); #endif /* _LINUX_SOCKET_H */ diff --git a/net/compat.c b/net/compat.c index 74017f618eb1..04db26316438 100644 --- a/net/compat.c +++ b/net/compat.c @@ -832,7 +832,7 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) ret = __sys_getpeername(a0, compat_ptr(a1), compat_ptr(a[2])); break; case SYS_SOCKETPAIR: - ret = sys_socketpair(a0, a1, a[2], compat_ptr(a[3])); + ret = __sys_socketpair(a0, a1, a[2], compat_ptr(a[3])); break; case SYS_SEND: ret = sys_send(a0, compat_ptr(a1), a[2], a[3]); diff --git a/net/socket.c b/net/socket.c index 007fb9483279..5861821f46f5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1368,8 +1368,7 @@ SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) * Create a pair of connected sockets. */ -SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, - int __user *, usockvec) +int __sys_socketpair(int family, int type, int protocol, int __user *usockvec) { struct socket *sock1, *sock2; int fd1, fd2, err; @@ -1454,6 +1453,12 @@ out: return err; } +SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, + int __user *, usockvec) +{ + return __sys_socketpair(family, type, protocol, usockvec); +} + /* * Bind a name to a socket. Nothing much to do here since it's * the protocol's responsibility to handle the local address. @@ -2521,7 +2526,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) (int __user *)a[2]); break; case SYS_SOCKETPAIR: - err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]); + err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]); break; case SYS_SEND: err = sys_send(a0, (void __user *)a1, a[2], a[3]); -- cgit v1.2.3 From 005a1aeac46666e2805f316596893ebd2a3c12e7 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 20:07:05 +0100 Subject: net: socket: add __sys_shutdown() helper; remove in-kernel call to syscall Using the net-internal helper __sys_shutdown() allows us to avoid the internal calls to the sys_shutdown() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 1 + net/compat.c | 2 +- net/socket.c | 9 +++++++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index dbdddf0d079e..b205138b69f1 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -374,5 +374,6 @@ extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); extern int __sys_socketpair(int family, int type, int protocol, int __user *usockvec); +extern int __sys_shutdown(int fd, int how); #endif /* _LINUX_SOCKET_H */ diff --git a/net/compat.c b/net/compat.c index 04db26316438..f1ec23e9dfce 100644 --- a/net/compat.c +++ b/net/compat.c @@ -849,7 +849,7 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) compat_ptr(a[4]), compat_ptr(a[5])); break; case SYS_SHUTDOWN: - ret = sys_shutdown(a0, a1); + ret = __sys_shutdown(a0, a1); break; case SYS_SETSOCKOPT: ret = compat_sys_setsockopt(a0, a1, a[2], diff --git a/net/socket.c b/net/socket.c index 5861821f46f5..ad5dfd6a1d59 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1942,7 +1942,7 @@ out_put: * Shutdown a socket. */ -SYSCALL_DEFINE2(shutdown, int, fd, int, how) +int __sys_shutdown(int fd, int how) { int err, fput_needed; struct socket *sock; @@ -1957,6 +1957,11 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how) return err; } +SYSCALL_DEFINE2(shutdown, int, fd, int, how) +{ + return __sys_shutdown(fd, how); +} + /* A couple of helpful macros for getting the address of the 32/64 bit * fields which are the same type (int / unsigned) on our platforms. */ @@ -2544,7 +2549,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) (int __user *)a[5]); break; case SYS_SHUTDOWN: - err = sys_shutdown(a0, a1); + err = __sys_shutdown(a0, a1); break; case SYS_SETSOCKOPT: err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); -- cgit v1.2.3 From cc36dca0dffad991135d0e28938ba2b4c0b786f7 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 20:10:59 +0100 Subject: net: socket: add __sys_setsockopt() helper; remove in-kernel call to syscall Using the net-internal helper __sys_setsockopt() allows us to avoid the internal calls to the sys_setsockopt() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 1 + net/socket.c | 13 ++++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index b205138b69f1..cad120e4ed4b 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -376,4 +376,5 @@ extern int __sys_socketpair(int family, int type, int protocol, int __user *usockvec); extern int __sys_shutdown(int fd, int how); + #endif /* _LINUX_SOCKET_H */ diff --git a/net/socket.c b/net/socket.c index ad5dfd6a1d59..5dd2e39a6cd4 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1878,8 +1878,8 @@ SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size, * to pass the user mode parameter for the protocols to sort out. */ -SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, - char __user *, optval, int, optlen) +static int __sys_setsockopt(int fd, int level, int optname, + char __user *optval, int optlen) { int err, fput_needed; struct socket *sock; @@ -1907,6 +1907,12 @@ out_put: return err; } +SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, + char __user *, optval, int, optlen) +{ + return __sys_setsockopt(fd, level, optname, optval, optlen); +} + /* * Get a socket option. Because we don't know the option lengths we have * to pass a user mode parameter for the protocols to sort out. @@ -2552,7 +2558,8 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) err = __sys_shutdown(a0, a1); break; case SYS_SETSOCKOPT: - err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); + err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3], + a[4]); break; case SYS_GETSOCKOPT: err = -- cgit v1.2.3 From 13a2d70e2b5218dde4b551a9b7adc5a11e359a05 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 20:15:04 +0100 Subject: net: socket: add __sys_getsockopt() helper; remove in-kernel call to syscall Using the net-internal helper __sys_getsockopt() allows us to avoid the internal calls to the sys_getsockopt() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- net/socket.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/socket.c b/net/socket.c index 5dd2e39a6cd4..a05289b1f863 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1918,8 +1918,8 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, * to pass a user mode parameter for the protocols to sort out. */ -SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, - char __user *, optval, int __user *, optlen) +static int __sys_getsockopt(int fd, int level, int optname, + char __user *optval, int __user *optlen) { int err, fput_needed; struct socket *sock; @@ -1944,6 +1944,12 @@ out_put: return err; } +SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, + char __user *, optval, int __user *, optlen) +{ + return __sys_getsockopt(fd, level, optname, optval, optlen); +} + /* * Shutdown a socket. */ @@ -2563,8 +2569,8 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) break; case SYS_GETSOCKOPT: err = - sys_getsockopt(a0, a1, a[2], (char __user *)a[3], - (int __user *)a[4]); + __sys_getsockopt(a0, a1, a[2], (char __user *)a[3], + (int __user *)a[4]); break; case SYS_SENDMSG: err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]); -- cgit v1.2.3 From 1255e2690689ba5b72814c8df7bfd9ecf50175aa Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 20:44:21 +0100 Subject: net: socket: add do_sys_recvmmsg() helper; remove in-kernel call to syscall Using the net-internal helper do_sys_recvmmsg() allows us to avoid the internal calls to the sys_getsockopt() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- net/socket.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/net/socket.c b/net/socket.c index a05289b1f863..54d19b0edab1 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2445,9 +2445,9 @@ out_put: return datagrams; } -SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, - unsigned int, vlen, unsigned int, flags, - struct timespec __user *, timeout) +static int do_sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, + unsigned int vlen, unsigned int flags, + struct timespec __user *timeout) { int datagrams; struct timespec timeout_sys; @@ -2470,6 +2470,13 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, return datagrams; } +SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, + unsigned int, vlen, unsigned int, flags, + struct timespec __user *, timeout) +{ + return do_sys_recvmmsg(fd, mmsg, vlen, flags, timeout); +} + #ifdef __ARCH_WANT_SYS_SOCKETCALL /* Argument list sizes for sys_socketcall */ #define AL(x) ((x) * sizeof(unsigned long)) @@ -2582,8 +2589,8 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]); break; case SYS_RECVMMSG: - err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3], - (struct timespec __user *)a[4]); + err = do_sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], + a[3], (struct timespec __user *)a[4]); break; case SYS_ACCEPT4: err = __sys_accept4(a0, (struct sockaddr __user *)a1, -- cgit v1.2.3 From e1834a329d6bb5659c14e9e537bd1f750fe3b85e Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 20:35:57 +0100 Subject: net: socket: move check for forbid_cmsg_compat to __sys_...msg() The non-compat codepaths for sys_...msg() verify that MSG_CMSG_COMPAT is not set. By moving this check to the __sys_...msg() functions (and making it dependent on a static flag passed to this function), we can call the __sys...msg() functions instead of the syscall functions in all cases. __sys_recvmmsg() does not need this trickery, as the check is handled within the do_sys_recvmmsg() function internal to net/socket.c. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 13 +++++++++---- net/compat.c | 8 +++++--- net/socket.c | 38 +++++++++++++++++++++++--------------- 3 files changed, 37 insertions(+), 22 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index cad120e4ed4b..e2b6bd4fe977 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -346,13 +346,18 @@ extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); struct timespec; -/* The __sys_...msg variants allow MSG_CMSG_COMPAT */ -extern long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags); -extern long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags); +/* The __sys_...msg variants allow MSG_CMSG_COMPAT iff + * forbid_cmsg_compat==false + */ +extern long __sys_recvmsg(int fd, struct user_msghdr __user *msg, + unsigned int flags, bool forbid_cmsg_compat); +extern long __sys_sendmsg(int fd, struct user_msghdr __user *msg, + unsigned int flags, bool forbid_cmsg_compat); extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags, struct timespec *timeout); extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, - unsigned int vlen, unsigned int flags); + unsigned int vlen, unsigned int flags, + bool forbid_cmsg_compat); /* helpers which do the actual work for syscalls */ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, diff --git a/net/compat.c b/net/compat.c index f1ec23e9dfce..5caa48987bb2 100644 --- a/net/compat.c +++ b/net/compat.c @@ -736,19 +736,21 @@ static unsigned char nas[21] = { COMPAT_SYSCALL_DEFINE3(sendmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags) { - return __sys_sendmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT); + return __sys_sendmsg(fd, (struct user_msghdr __user *)msg, + flags | MSG_CMSG_COMPAT, false); } COMPAT_SYSCALL_DEFINE4(sendmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, unsigned int, vlen, unsigned int, flags) { return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, - flags | MSG_CMSG_COMPAT); + flags | MSG_CMSG_COMPAT, false); } COMPAT_SYSCALL_DEFINE3(recvmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags) { - return __sys_recvmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT); + return __sys_recvmsg(fd, (struct user_msghdr __user *)msg, + flags | MSG_CMSG_COMPAT, false); } COMPAT_SYSCALL_DEFINE4(recv, int, fd, void __user *, buf, compat_size_t, len, unsigned int, flags) diff --git a/net/socket.c b/net/socket.c index 54d19b0edab1..a70793a7ce78 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2137,12 +2137,16 @@ out_freeiov: * BSD sendmsg interface */ -long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags) +long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, + bool forbid_cmsg_compat) { int fput_needed, err; struct msghdr msg_sys; struct socket *sock; + if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT)) + return -EINVAL; + sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -2156,9 +2160,7 @@ out: SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags) { - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; - return __sys_sendmsg(fd, msg, flags); + return __sys_sendmsg(fd, msg, flags, true); } /* @@ -2166,7 +2168,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int */ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, - unsigned int flags) + unsigned int flags, bool forbid_cmsg_compat) { int fput_needed, err, datagrams; struct socket *sock; @@ -2176,6 +2178,9 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, struct used_address used_address; unsigned int oflags = flags; + if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT)) + return -EINVAL; + if (vlen > UIO_MAXIOV) vlen = UIO_MAXIOV; @@ -2232,9 +2237,7 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg, unsigned int, vlen, unsigned int, flags) { - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; - return __sys_sendmmsg(fd, mmsg, vlen, flags); + return __sys_sendmmsg(fd, mmsg, vlen, flags, true); } static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, @@ -2307,12 +2310,16 @@ out_freeiov: * BSD recvmsg interface */ -long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags) +long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, + bool forbid_cmsg_compat) { int fput_needed, err; struct msghdr msg_sys; struct socket *sock; + if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT)) + return -EINVAL; + sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -2327,9 +2334,7 @@ out: SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags) { - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; - return __sys_recvmsg(fd, msg, flags); + return __sys_recvmsg(fd, msg, flags, true); } /* @@ -2580,13 +2585,16 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) (int __user *)a[4]); break; case SYS_SENDMSG: - err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]); + err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1, + a[2], true); break; case SYS_SENDMMSG: - err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]); + err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], + a[3], true); break; case SYS_RECVMSG: - err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]); + err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1, + a[2], true); break; case SYS_RECVMMSG: err = do_sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], -- cgit v1.2.3 From f3bf896b1d041153ea1c6cfa58f6f4658c91089f Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:52:00 +0100 Subject: net: socket: replace calls to sys_send() with __sys_sendto() sys_send() merely expands the parameters to __sys_sendto() by NULL and 0. Open-code this in the two places which used sys_send() as a wrapper to __sys_sendto(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- net/compat.c | 2 +- net/socket.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/compat.c b/net/compat.c index 5caa48987bb2..d55982ff5c59 100644 --- a/net/compat.c +++ b/net/compat.c @@ -837,7 +837,7 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) ret = __sys_socketpair(a0, a1, a[2], compat_ptr(a[3])); break; case SYS_SEND: - ret = sys_send(a0, compat_ptr(a1), a[2], a[3]); + ret = __sys_sendto(a0, compat_ptr(a1), a[2], a[3], NULL, 0); break; case SYS_SENDTO: ret = __sys_sendto(a0, compat_ptr(a1), a[2], a[3], diff --git a/net/socket.c b/net/socket.c index a70793a7ce78..92de21bb1a2e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2558,7 +2558,8 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]); break; case SYS_SEND: - err = sys_send(a0, (void __user *)a1, a[2], a[3]); + err = __sys_sendto(a0, (void __user *)a1, a[2], a[3], + NULL, 0); break; case SYS_SENDTO: err = __sys_sendto(a0, (void __user *)a1, a[2], a[3], -- cgit v1.2.3 From d27e9afc64206b5a2fe02561716cee5be9aacf01 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:54:17 +0100 Subject: net: socket: replace call to sys_recv() with __sys_recvfrom() sys_recv() merely expands the parameters to __sys_recvfrom() by NULL and NULL. Open-code this in the two places which used sys_recv() as a wrapper to __sys_recvfrom(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- net/compat.c | 3 ++- net/socket.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/compat.c b/net/compat.c index d55982ff5c59..9e0d030063ad 100644 --- a/net/compat.c +++ b/net/compat.c @@ -755,7 +755,8 @@ COMPAT_SYSCALL_DEFINE3(recvmsg, int, fd, struct compat_msghdr __user *, msg, uns COMPAT_SYSCALL_DEFINE4(recv, int, fd, void __user *, buf, compat_size_t, len, unsigned int, flags) { - return sys_recv(fd, buf, len, flags | MSG_CMSG_COMPAT); + return __sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, NULL, + NULL); } COMPAT_SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, buf, compat_size_t, len, diff --git a/net/socket.c b/net/socket.c index 92de21bb1a2e..03702f08aa62 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2566,7 +2566,8 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) (struct sockaddr __user *)a[4], a[5]); break; case SYS_RECV: - err = sys_recv(a0, (void __user *)a1, a[2], a[3]); + err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3], + NULL, NULL); break; case SYS_RECVFROM: err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3], -- cgit v1.2.3 From fd4e82f5b8612ce90bf85f062f2b7954044a1db7 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 16 Mar 2018 16:48:34 +0100 Subject: net: socket: add __compat_sys_recvfrom() helper; remove in-kernel call to compat syscall Using the net-internal helper __compat_sys_recvfrom() allows us to avoid the internal calls to the compat_sys_recvfrom() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- net/compat.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/net/compat.c b/net/compat.c index 9e0d030063ad..513adc8d0e0f 100644 --- a/net/compat.c +++ b/net/compat.c @@ -753,18 +753,25 @@ COMPAT_SYSCALL_DEFINE3(recvmsg, int, fd, struct compat_msghdr __user *, msg, uns flags | MSG_CMSG_COMPAT, false); } +static inline long __compat_sys_recvfrom(int fd, void __user *buf, + compat_size_t len, unsigned int flags, + struct sockaddr __user *addr, + int __user *addrlen) +{ + return __sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, + addrlen); +} + COMPAT_SYSCALL_DEFINE4(recv, int, fd, void __user *, buf, compat_size_t, len, unsigned int, flags) { - return __sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, NULL, - NULL); + return __compat_sys_recvfrom(fd, buf, len, flags, NULL, NULL); } COMPAT_SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, buf, compat_size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int __user *, addrlen) { - return __sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, - addrlen); + return __compat_sys_recvfrom(fd, buf, len, flags, addr, addrlen); } COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, @@ -845,11 +852,13 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) compat_ptr(a[4]), a[5]); break; case SYS_RECV: - ret = compat_sys_recv(a0, compat_ptr(a1), a[2], a[3]); + ret = __compat_sys_recvfrom(a0, compat_ptr(a1), a[2], a[3], + NULL, NULL); break; case SYS_RECVFROM: - ret = compat_sys_recvfrom(a0, compat_ptr(a1), a[2], a[3], - compat_ptr(a[4]), compat_ptr(a[5])); + ret = __compat_sys_recvfrom(a0, compat_ptr(a1), a[2], a[3], + compat_ptr(a[4]), + compat_ptr(a[5])); break; case SYS_SHUTDOWN: ret = __sys_shutdown(a0, a1); -- cgit v1.2.3 From 73ee3eafd5d1037af9762d28d17611bcf64834d6 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 16 Mar 2018 16:58:54 +0100 Subject: net: socket: add __compat_sys_setsockopt() helper; remove in-kernel call to compat syscall Using the net-internal helper __compat_sys_setsockopt() allows us to avoid the internal calls to the compat_sys_setsockopt() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- net/compat.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/compat.c b/net/compat.c index 513adc8d0e0f..75bfcbbb2e3e 100644 --- a/net/compat.c +++ b/net/compat.c @@ -383,8 +383,8 @@ static int compat_sock_setsockopt(struct socket *sock, int level, int optname, return sock_setsockopt(sock, level, optname, optval, optlen); } -COMPAT_SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, - char __user *, optval, unsigned int, optlen) +static int __compat_sys_setsockopt(int fd, int level, int optname, + char __user *optval, unsigned int optlen) { int err; struct socket *sock = sockfd_lookup(fd, &err); @@ -410,6 +410,12 @@ COMPAT_SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, return err; } +COMPAT_SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, + char __user *, optval, unsigned int, optlen) +{ + return __compat_sys_setsockopt(fd, level, optname, optval, optlen); +} + static int do_get_sock_timeout(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { @@ -864,8 +870,8 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) ret = __sys_shutdown(a0, a1); break; case SYS_SETSOCKOPT: - ret = compat_sys_setsockopt(a0, a1, a[2], - compat_ptr(a[3]), a[4]); + ret = __compat_sys_setsockopt(a0, a1, a[2], + compat_ptr(a[3]), a[4]); break; case SYS_GETSOCKOPT: ret = compat_sys_getsockopt(a0, a1, a[2], -- cgit v1.2.3 From 8770cf4a5858c4f7c109a3e47dfd3715fda45f2c Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 16 Mar 2018 17:01:41 +0100 Subject: net: socket: add __compat_sys_getsockopt() helper; remove in-kernel call to compat syscall Using the net-internal helper __compat_sys_getsockopt() allows us to avoid the internal calls to the compat_sys_getsockopt() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- net/compat.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/net/compat.c b/net/compat.c index 75bfcbbb2e3e..cdf5b0c1b962 100644 --- a/net/compat.c +++ b/net/compat.c @@ -509,8 +509,9 @@ int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *usersta } EXPORT_SYMBOL(compat_sock_get_timestampns); -COMPAT_SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, - char __user *, optval, int __user *, optlen) +static int __compat_sys_getsockopt(int fd, int level, int optname, + char __user *optval, + int __user *optlen) { int err; struct socket *sock = sockfd_lookup(fd, &err); @@ -536,6 +537,12 @@ COMPAT_SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, return err; } +COMPAT_SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, + char __user *, optval, int __user *, optlen) +{ + return __compat_sys_getsockopt(fd, level, optname, optval, optlen); +} + struct compat_group_req { __u32 gr_interface; struct __kernel_sockaddr_storage gr_group @@ -874,8 +881,9 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) compat_ptr(a[3]), a[4]); break; case SYS_GETSOCKOPT: - ret = compat_sys_getsockopt(a0, a1, a[2], - compat_ptr(a[3]), compat_ptr(a[4])); + ret = __compat_sys_getsockopt(a0, a1, a[2], + compat_ptr(a[3]), + compat_ptr(a[4])); break; case SYS_SENDMSG: ret = compat_sys_sendmsg(a0, compat_ptr(a1), a[2]); -- cgit v1.2.3 From 157b334aa84dc5a4edcbf056b7254263e4112ba8 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 16 Mar 2018 17:10:50 +0100 Subject: net: socket: add __compat_sys_recvmmsg() helper; remove in-kernel call to compat syscall Using the net-internal helper __compat_sys_recvmmsg() allows us to avoid the internal calls to the compat_sys_recvmmsg() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- net/compat.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/net/compat.c b/net/compat.c index cdf5b0c1b962..7b2ae42a1598 100644 --- a/net/compat.c +++ b/net/compat.c @@ -787,9 +787,9 @@ COMPAT_SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, buf, compat_size_t, len return __compat_sys_recvfrom(fd, buf, len, flags, addr, addrlen); } -COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, - unsigned int, vlen, unsigned int, flags, - struct compat_timespec __user *, timeout) +static int __compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, + unsigned int vlen, unsigned int flags, + struct compat_timespec __user *timeout) { int datagrams; struct timespec ktspec; @@ -809,6 +809,13 @@ COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, return datagrams; } +COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, + unsigned int, vlen, unsigned int, flags, + struct compat_timespec __user *, timeout) +{ + return __compat_sys_recvmmsg(fd, mmsg, vlen, flags, timeout); +} + COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) { u32 a[AUDITSC_ARGS]; @@ -895,8 +902,8 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); break; case SYS_RECVMMSG: - ret = compat_sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3], - compat_ptr(a[4])); + ret = __compat_sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3], + compat_ptr(a[4])); break; case SYS_ACCEPT4: ret = __sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]); -- cgit v1.2.3 From 6df354653e8cc07be1f057d9207e1092c0b3963b Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 16 Mar 2018 17:07:03 +0100 Subject: net: socket: add __compat_sys_...msg() helpers; remove in-kernel calls to compat syscalls Using the net-internal helpers __compat_sys_...msg() allows us to avoid the internal calls to the compat_sys_...msg() syscalls. compat_sys_recvmmsg() is handled in a different patch. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- net/compat.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/net/compat.c b/net/compat.c index 7b2ae42a1598..5ae7437d3853 100644 --- a/net/compat.c +++ b/net/compat.c @@ -747,25 +747,48 @@ static unsigned char nas[21] = { }; #undef AL -COMPAT_SYSCALL_DEFINE3(sendmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags) +static inline long __compat_sys_sendmsg(int fd, + struct compat_msghdr __user *msg, + unsigned int flags) { return __sys_sendmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT, false); } -COMPAT_SYSCALL_DEFINE4(sendmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, - unsigned int, vlen, unsigned int, flags) +COMPAT_SYSCALL_DEFINE3(sendmsg, int, fd, struct compat_msghdr __user *, msg, + unsigned int, flags) +{ + return __compat_sys_sendmsg(fd, msg, flags); +} + +static inline long __compat_sys_sendmmsg(int fd, + struct compat_mmsghdr __user *mmsg, + unsigned int vlen, unsigned int flags) { return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT, false); } -COMPAT_SYSCALL_DEFINE3(recvmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags) +COMPAT_SYSCALL_DEFINE4(sendmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, + unsigned int, vlen, unsigned int, flags) +{ + return __compat_sys_sendmmsg(fd, mmsg, vlen, flags); +} + +static inline long __compat_sys_recvmsg(int fd, + struct compat_msghdr __user *msg, + unsigned int flags) { return __sys_recvmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT, false); } +COMPAT_SYSCALL_DEFINE3(recvmsg, int, fd, struct compat_msghdr __user *, msg, + unsigned int, flags) +{ + return __compat_sys_recvmsg(fd, msg, flags); +} + static inline long __compat_sys_recvfrom(int fd, void __user *buf, compat_size_t len, unsigned int flags, struct sockaddr __user *addr, @@ -893,13 +916,13 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) compat_ptr(a[4])); break; case SYS_SENDMSG: - ret = compat_sys_sendmsg(a0, compat_ptr(a1), a[2]); + ret = __compat_sys_sendmsg(a0, compat_ptr(a1), a[2]); break; case SYS_SENDMMSG: - ret = compat_sys_sendmmsg(a0, compat_ptr(a1), a[2], a[3]); + ret = __compat_sys_sendmmsg(a0, compat_ptr(a1), a[2], a[3]); break; case SYS_RECVMSG: - ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); + ret = __compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); break; case SYS_RECVMMSG: ret = __compat_sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3], -- cgit v1.2.3 From 41f4f0e2f5f4cd060885405c04214851ffe7b299 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Mar 2018 19:48:14 +0100 Subject: ipc: add semtimedop syscall/compat_syscall wrappers Provide ksys_semtimedop() and compat_ksys_semtimedop() wrappers to avoid in-kernel calls to these syscalls. The ksys_ prefix denotes that these functions are meant as a drop-in replacement for the syscalls. In particular, they use the same calling convention as sys_semtimedop() and compat_sys_semtimedop(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- ipc/sem.c | 23 ++++++++++++++++++----- ipc/syscall.c | 17 ++++++++++------- ipc/util.h | 13 +++++++++++++ 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/ipc/sem.c b/ipc/sem.c index a4af04979fd2..e21ceb8b4af1 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -2120,8 +2120,8 @@ out_free: return error; } -SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, - unsigned, nsops, const struct timespec __user *, timeout) +long ksys_semtimedop(int semid, struct sembuf __user *tsops, + unsigned int nsops, const struct timespec __user *timeout) { if (timeout) { struct timespec64 ts; @@ -2132,10 +2132,16 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, return do_semtimedop(semid, tsops, nsops, NULL); } +SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, + unsigned int, nsops, const struct timespec __user *, timeout) +{ + return ksys_semtimedop(semid, tsops, nsops, timeout); +} + #ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems, - unsigned, nsops, - const struct compat_timespec __user *, timeout) +long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, + unsigned int nsops, + const struct compat_timespec __user *timeout) { if (timeout) { struct timespec64 ts; @@ -2145,6 +2151,13 @@ COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems, } return do_semtimedop(semid, tsems, nsops, NULL); } + +COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems, + unsigned int, nsops, + const struct compat_timespec __user *, timeout) +{ + return compat_ksys_semtimedop(semid, tsems, nsops, timeout); +} #endif SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops, diff --git a/ipc/syscall.c b/ipc/syscall.c index 3763b4293b74..84d6a7691baa 100644 --- a/ipc/syscall.c +++ b/ipc/syscall.c @@ -7,6 +7,9 @@ */ #include #include +#include +#include +#include "util.h" #ifdef __ARCH_WANT_SYS_IPC #include @@ -24,12 +27,12 @@ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second, switch (call) { case SEMOP: - return sys_semtimedop(first, (struct sembuf __user *)ptr, - second, NULL); + return ksys_semtimedop(first, (struct sembuf __user *)ptr, + second, NULL); case SEMTIMEDOP: - return sys_semtimedop(first, (struct sembuf __user *)ptr, - second, - (const struct timespec __user *)fifth); + return ksys_semtimedop(first, (struct sembuf __user *)ptr, + second, + (const struct timespec __user *)fifth); case SEMGET: return sys_semget(first, second, third); @@ -124,9 +127,9 @@ COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, switch (call) { case SEMOP: /* struct sembuf is the same on 32 and 64bit :)) */ - return sys_semtimedop(first, compat_ptr(ptr), second, NULL); + return ksys_semtimedop(first, compat_ptr(ptr), second, NULL); case SEMTIMEDOP: - return compat_sys_semtimedop(first, compat_ptr(ptr), second, + return compat_ksys_semtimedop(first, compat_ptr(ptr), second, compat_ptr(fifth)); case SEMGET: return sys_semget(first, second, third); diff --git a/ipc/util.h b/ipc/util.h index 89b8ec176fc4..6deadf77547e 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -235,4 +235,17 @@ static inline int compat_ipc_parse_version(int *cmd) #endif } #endif + +/* for __ARCH_WANT_SYS_IPC */ +long ksys_semtimedop(int semid, struct sembuf __user *tsops, + unsigned int nsops, + const struct timespec __user *timeout); + +/* for CONFIG_ARCH_WANT_OLD_COMPAT_IPC */ +#ifdef CONFIG_COMPAT +long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, + unsigned int nsops, + const struct compat_timespec __user *timeout); +#endif /* CONFIG_COMPAT */ + #endif -- cgit v1.2.3 From 69894718a515fef7ff633cf354fcd7ed73a88891 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Mar 2018 19:53:58 +0100 Subject: ipc: add semget syscall wrapper Provide ksys_semget() wrapper to avoid in-kernel calls to this syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_semget(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- ipc/sem.c | 7 ++++++- ipc/syscall.c | 4 ++-- ipc/util.h | 1 + 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/ipc/sem.c b/ipc/sem.c index e21ceb8b4af1..2e5f7ec7a7db 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -556,7 +556,7 @@ static inline int sem_more_checks(struct kern_ipc_perm *ipcp, return 0; } -SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) +long ksys_semget(key_t key, int nsems, int semflg) { struct ipc_namespace *ns; static const struct ipc_ops sem_ops = { @@ -578,6 +578,11 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); } +SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) +{ + return ksys_semget(key, nsems, semflg); +} + /** * perform_atomic_semop[_slow] - Attempt to perform semaphore * operations on a given array. diff --git a/ipc/syscall.c b/ipc/syscall.c index 84d6a7691baa..21fcdf0b4836 100644 --- a/ipc/syscall.c +++ b/ipc/syscall.c @@ -35,7 +35,7 @@ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second, (const struct timespec __user *)fifth); case SEMGET: - return sys_semget(first, second, third); + return ksys_semget(first, second, third); case SEMCTL: { unsigned long arg; if (!ptr) @@ -132,7 +132,7 @@ COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, return compat_ksys_semtimedop(first, compat_ptr(ptr), second, compat_ptr(fifth)); case SEMGET: - return sys_semget(first, second, third); + return ksys_semget(first, second, third); case SEMCTL: if (!ptr) return -EINVAL; diff --git a/ipc/util.h b/ipc/util.h index 6deadf77547e..0f07056e5a73 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -240,6 +240,7 @@ static inline int compat_ipc_parse_version(int *cmd) long ksys_semtimedop(int semid, struct sembuf __user *tsops, unsigned int nsops, const struct timespec __user *timeout); +long ksys_semget(key_t key, int nsems, int semflg); /* for CONFIG_ARCH_WANT_OLD_COMPAT_IPC */ #ifdef CONFIG_COMPAT -- cgit v1.2.3 From d969c6fa7263c8fc1928f528bb68587872350b6c Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Mar 2018 20:00:39 +0100 Subject: ipc: add semctl syscall/compat_syscall wrappers Provide ksys_semctl() and compat_ksys_semctl() wrappers to avoid in-kernel calls to these syscalls. The ksys_ prefix denotes that these functions are meant as a drop-in replacement for the syscalls. In particular, they use the same calling convention as sys_semctl() and compat_sys_semctl(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- ipc/sem.c | 14 ++++++++++++-- ipc/syscall.c | 4 ++-- ipc/util.h | 2 ++ 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/ipc/sem.c b/ipc/sem.c index 2e5f7ec7a7db..1cf56279a84c 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -1581,7 +1581,7 @@ out_up: return err; } -SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) +long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg) { int version; struct ipc_namespace *ns; @@ -1635,6 +1635,11 @@ SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) } } +SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) +{ + return ksys_semctl(semid, semnum, cmd, arg); +} + #ifdef CONFIG_COMPAT struct compat_semid_ds { @@ -1683,7 +1688,7 @@ static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in, } } -COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg) +long compat_ksys_semctl(int semid, int semnum, int cmd, int arg) { void __user *p = compat_ptr(arg); struct ipc_namespace *ns; @@ -1727,6 +1732,11 @@ COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg) return -EINVAL; } } + +COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg) +{ + return compat_ksys_semctl(semid, semnum, cmd, arg); +} #endif /* If the task doesn't already have a undo_list, then allocate one diff --git a/ipc/syscall.c b/ipc/syscall.c index 21fcdf0b4836..a536cca37661 100644 --- a/ipc/syscall.c +++ b/ipc/syscall.c @@ -42,7 +42,7 @@ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second, return -EINVAL; if (get_user(arg, (unsigned long __user *) ptr)) return -EFAULT; - return sys_semctl(first, second, third, arg); + return ksys_semctl(first, second, third, arg); } case MSGSND: @@ -138,7 +138,7 @@ COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, return -EINVAL; if (get_user(pad, (u32 __user *) compat_ptr(ptr))) return -EFAULT; - return compat_sys_semctl(first, second, third, pad); + return compat_ksys_semctl(first, second, third, pad); case MSGSND: return compat_sys_msgsnd(first, ptr, second, third); diff --git a/ipc/util.h b/ipc/util.h index 0f07056e5a73..1f1109b83437 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -241,12 +241,14 @@ long ksys_semtimedop(int semid, struct sembuf __user *tsops, unsigned int nsops, const struct timespec __user *timeout); long ksys_semget(key_t key, int nsems, int semflg); +long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg); /* for CONFIG_ARCH_WANT_OLD_COMPAT_IPC */ #ifdef CONFIG_COMPAT long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, unsigned int nsops, const struct compat_timespec __user *timeout); +long compat_ksys_semctl(int semid, int semnum, int cmd, int arg); #endif /* CONFIG_COMPAT */ #endif -- cgit v1.2.3 From 3d65661a494a11266500c2532b4f163537c379db Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Mar 2018 20:06:04 +0100 Subject: ipc: add msgget syscall wrapper Provide ksys_msgget() wrapper to avoid in-kernel calls to this syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_msgget(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- ipc/msg.c | 7 ++++++- ipc/syscall.c | 4 ++-- ipc/util.h | 1 + 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/ipc/msg.c b/ipc/msg.c index 0dcc6699dc53..64e8276be164 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -263,7 +263,7 @@ static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) return security_msg_queue_associate(msq, msgflg); } -SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) +long ksys_msgget(key_t key, int msgflg) { struct ipc_namespace *ns; static const struct ipc_ops msg_ops = { @@ -280,6 +280,11 @@ SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params); } +SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) +{ + return ksys_msgget(key, msgflg); +} + static inline unsigned long copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) { diff --git a/ipc/syscall.c b/ipc/syscall.c index a536cca37661..355c4a644bbf 100644 --- a/ipc/syscall.c +++ b/ipc/syscall.c @@ -68,7 +68,7 @@ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second, second, fifth, third); } case MSGGET: - return sys_msgget((key_t) first, second); + return ksys_msgget((key_t) first, second); case MSGCTL: return sys_msgctl(first, second, (struct msqid_ds __user *)ptr); @@ -161,7 +161,7 @@ COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, return compat_sys_msgrcv(first, ptr, second, fifth, third); } case MSGGET: - return sys_msgget(first, second); + return ksys_msgget(first, second); case MSGCTL: return compat_sys_msgctl(first, second, compat_ptr(ptr)); diff --git a/ipc/util.h b/ipc/util.h index 1f1109b83437..b35c0dfe3bc3 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -242,6 +242,7 @@ long ksys_semtimedop(int semid, struct sembuf __user *tsops, const struct timespec __user *timeout); long ksys_semget(key_t key, int nsems, int semflg); long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg); +long ksys_msgget(key_t key, int msgflg); /* for CONFIG_ARCH_WANT_OLD_COMPAT_IPC */ #ifdef CONFIG_COMPAT -- cgit v1.2.3 From 65749e0bb5e7de876ee43d3f601e32afe17e9248 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Mar 2018 20:07:53 +0100 Subject: ipc: add shmget syscall wrapper Provide ksys_shmget() wrapper to avoid in-kernel calls to this syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_shmget(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- ipc/shm.c | 7 ++++++- ipc/syscall.c | 4 ++-- ipc/util.h | 1 + 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/ipc/shm.c b/ipc/shm.c index 4643865e9171..9f3cdb259a51 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -656,7 +656,7 @@ static inline int shm_more_checks(struct kern_ipc_perm *ipcp, return 0; } -SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg) +long ksys_shmget(key_t key, size_t size, int shmflg) { struct ipc_namespace *ns; static const struct ipc_ops shm_ops = { @@ -675,6 +675,11 @@ SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg) return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params); } +SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg) +{ + return ksys_shmget(key, size, shmflg); +} + static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version) { switch (version) { diff --git a/ipc/syscall.c b/ipc/syscall.c index 355c4a644bbf..60bceb19b6f0 100644 --- a/ipc/syscall.c +++ b/ipc/syscall.c @@ -92,7 +92,7 @@ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second, case SHMDT: return sys_shmdt((char __user *)ptr); case SHMGET: - return sys_shmget(first, second, third); + return ksys_shmget(first, second, third); case SHMCTL: return sys_shmctl(first, second, (struct shmid_ds __user *) ptr); @@ -180,7 +180,7 @@ COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, case SHMDT: return sys_shmdt(compat_ptr(ptr)); case SHMGET: - return sys_shmget(first, (unsigned)second, third); + return ksys_shmget(first, (unsigned int)second, third); case SHMCTL: return compat_sys_shmctl(first, second, compat_ptr(ptr)); } diff --git a/ipc/util.h b/ipc/util.h index b35c0dfe3bc3..51002c0b2a21 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -243,6 +243,7 @@ long ksys_semtimedop(int semid, struct sembuf __user *tsops, long ksys_semget(key_t key, int nsems, int semflg); long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg); long ksys_msgget(key_t key, int msgflg); +long ksys_shmget(key_t key, size_t size, int shmflg); /* for CONFIG_ARCH_WANT_OLD_COMPAT_IPC */ #ifdef CONFIG_COMPAT -- cgit v1.2.3 From da1e2744341542e404c172bcf6a321f509408b14 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Mar 2018 20:09:48 +0100 Subject: ipc: add shmdt syscall wrapper Provide ksys_shmdt() wrapper to avoid in-kernel calls to this syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_shmdt(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- ipc/shm.c | 7 ++++++- ipc/syscall.c | 4 ++-- ipc/util.h | 1 + 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/ipc/shm.c b/ipc/shm.c index 9f3cdb259a51..e5838e3328dc 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1481,7 +1481,7 @@ COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg) * detach and kill segment if marked destroyed. * The work is done in shm_close. */ -SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) +long ksys_shmdt(char __user *shmaddr) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; @@ -1588,6 +1588,11 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) return retval; } +SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) +{ + return ksys_shmdt(shmaddr); +} + #ifdef CONFIG_PROC_FS static int sysvipc_shm_proc_show(struct seq_file *s, void *it) { diff --git a/ipc/syscall.c b/ipc/syscall.c index 60bceb19b6f0..b3aa71564815 100644 --- a/ipc/syscall.c +++ b/ipc/syscall.c @@ -90,7 +90,7 @@ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second, return -EINVAL; } case SHMDT: - return sys_shmdt((char __user *)ptr); + return ksys_shmdt((char __user *)ptr); case SHMGET: return ksys_shmget(first, second, third); case SHMCTL: @@ -178,7 +178,7 @@ COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, return put_user(raddr, (compat_ulong_t __user *)compat_ptr(third)); } case SHMDT: - return sys_shmdt(compat_ptr(ptr)); + return ksys_shmdt(compat_ptr(ptr)); case SHMGET: return ksys_shmget(first, (unsigned int)second, third); case SHMCTL: diff --git a/ipc/util.h b/ipc/util.h index 51002c0b2a21..7770bcad1168 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -244,6 +244,7 @@ long ksys_semget(key_t key, int nsems, int semflg); long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg); long ksys_msgget(key_t key, int msgflg); long ksys_shmget(key_t key, size_t size, int shmflg); +long ksys_shmdt(char __user *shmaddr); /* for CONFIG_ARCH_WANT_OLD_COMPAT_IPC */ #ifdef CONFIG_COMPAT -- cgit v1.2.3 From c84d0791dfa7fe8f051082c09a558eb3e2d01931 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Mar 2018 20:12:33 +0100 Subject: ipc: add shmctl syscall/compat_syscall wrappers Provide ksys_shmctl() and compat_ksys_shmctl() wrappers to avoid in-kernel calls to these syscalls. The ksys_ prefix denotes that these functions are meant as a drop-in replacement for the syscalls. In particular, they use the same calling convention as sys_shmctl() and compat_sys_shmctl(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- ipc/shm.c | 14 ++++++++++++-- ipc/syscall.c | 4 ++-- ipc/util.h | 2 ++ 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/ipc/shm.c b/ipc/shm.c index e5838e3328dc..0aae3e55bc56 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1045,7 +1045,7 @@ out_unlock1: return err; } -SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) +long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) { int err, version; struct ipc_namespace *ns; @@ -1099,6 +1099,11 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) } } +SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) +{ + return ksys_shmctl(shmid, cmd, buf); +} + #ifdef CONFIG_COMPAT struct compat_shmid_ds { @@ -1218,7 +1223,7 @@ static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf, } } -COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr) +long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr) { struct ipc_namespace *ns; struct shmid64_ds sem64; @@ -1273,6 +1278,11 @@ COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr) } return err; } + +COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr) +{ + return compat_ksys_shmctl(shmid, cmd, uptr); +} #endif /* diff --git a/ipc/syscall.c b/ipc/syscall.c index b3aa71564815..34bbabc9e672 100644 --- a/ipc/syscall.c +++ b/ipc/syscall.c @@ -94,7 +94,7 @@ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second, case SHMGET: return ksys_shmget(first, second, third); case SHMCTL: - return sys_shmctl(first, second, + return ksys_shmctl(first, second, (struct shmid_ds __user *) ptr); default: return -ENOSYS; @@ -182,7 +182,7 @@ COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, case SHMGET: return ksys_shmget(first, (unsigned int)second, third); case SHMCTL: - return compat_sys_shmctl(first, second, compat_ptr(ptr)); + return compat_ksys_shmctl(first, second, compat_ptr(ptr)); } return -ENOSYS; diff --git a/ipc/util.h b/ipc/util.h index 7770bcad1168..16e8b5b8c416 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -245,6 +245,7 @@ long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg); long ksys_msgget(key_t key, int msgflg); long ksys_shmget(key_t key, size_t size, int shmflg); long ksys_shmdt(char __user *shmaddr); +long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); /* for CONFIG_ARCH_WANT_OLD_COMPAT_IPC */ #ifdef CONFIG_COMPAT @@ -252,6 +253,7 @@ long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, unsigned int nsops, const struct compat_timespec __user *timeout); long compat_ksys_semctl(int semid, int semnum, int cmd, int arg); +long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr); #endif /* CONFIG_COMPAT */ #endif -- cgit v1.2.3 From e340db56483b6e10bd5e5f281071876808801a41 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Mar 2018 20:15:28 +0100 Subject: ipc: add msgctl syscall/compat_syscall wrappers Provide ksys_msgctl() and compat_ksys_msgctl() wrappers to avoid in-kernel calls to these syscalls. The ksys_ prefix denotes that these functions are meant as a drop-in replacement for the syscalls. In particular, they use the same calling convention as sys_msgctl() and compat_sys_msgctl(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- ipc/msg.c | 14 ++++++++++++-- ipc/syscall.c | 5 +++-- ipc/util.h | 2 ++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/ipc/msg.c b/ipc/msg.c index 64e8276be164..5b026868df07 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -538,7 +538,7 @@ out_unlock: return err; } -SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) +long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) { int version; struct ipc_namespace *ns; @@ -581,6 +581,11 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) } } +SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) +{ + return ksys_msgctl(msqid, cmd, buf); +} + #ifdef CONFIG_COMPAT struct compat_msqid_ds { @@ -651,7 +656,7 @@ static int copy_compat_msqid_to_user(void __user *buf, struct msqid64_ds *in, } } -COMPAT_SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, void __user *, uptr) +long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr) { struct ipc_namespace *ns; int err; @@ -692,6 +697,11 @@ COMPAT_SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, void __user *, uptr) return -EINVAL; } } + +COMPAT_SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, void __user *, uptr) +{ + return compat_ksys_msgctl(msqid, cmd, uptr); +} #endif static int testmsg(struct msg_msg *msg, long type, int mode) diff --git a/ipc/syscall.c b/ipc/syscall.c index 34bbabc9e672..aa29b0802e26 100644 --- a/ipc/syscall.c +++ b/ipc/syscall.c @@ -70,7 +70,8 @@ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second, case MSGGET: return ksys_msgget((key_t) first, second); case MSGCTL: - return sys_msgctl(first, second, (struct msqid_ds __user *)ptr); + return ksys_msgctl(first, second, + (struct msqid_ds __user *)ptr); case SHMAT: switch (version) { @@ -163,7 +164,7 @@ COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, case MSGGET: return ksys_msgget(first, second); case MSGCTL: - return compat_sys_msgctl(first, second, compat_ptr(ptr)); + return compat_ksys_msgctl(first, second, compat_ptr(ptr)); case SHMAT: { int err; diff --git a/ipc/util.h b/ipc/util.h index 16e8b5b8c416..47837b4af3f2 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -243,6 +243,7 @@ long ksys_semtimedop(int semid, struct sembuf __user *tsops, long ksys_semget(key_t key, int nsems, int semflg); long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg); long ksys_msgget(key_t key, int msgflg); +long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); long ksys_shmget(key_t key, size_t size, int shmflg); long ksys_shmdt(char __user *shmaddr); long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); @@ -253,6 +254,7 @@ long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, unsigned int nsops, const struct compat_timespec __user *timeout); long compat_ksys_semctl(int semid, int semnum, int cmd, int arg); +long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr); long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr); #endif /* CONFIG_COMPAT */ -- cgit v1.2.3 From 078faac9e8b6c8124bc012bbf97cca59caf6d4ea Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Mar 2018 21:25:57 +0100 Subject: ipc: add msgrcv syscall/compat_syscall wrappers Provide ksys_msgrcv() and compat_ksys_msgrcv() wrappers to avoid in-kernel calls to these syscalls. The ksys_ prefix denotes that these functions are meant as a drop-in replacement for the syscalls. In particular, they use the same calling convention as sys_msgrcv() and compat_sys_msgrcv(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- ipc/msg.c | 19 ++++++++++++++++--- ipc/syscall.c | 8 ++++---- ipc/util.h | 4 ++++ 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/ipc/msg.c b/ipc/msg.c index 5b026868df07..abc5826270a6 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -1150,10 +1150,16 @@ out_unlock1: return bufsz; } +long ksys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, + long msgtyp, int msgflg) +{ + return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill); +} + SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, long, msgtyp, int, msgflg) { - return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill); + return ksys_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg); } #ifdef CONFIG_COMPAT @@ -1171,12 +1177,19 @@ static long compat_do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bu return msgsz; } -COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp, - compat_ssize_t, msgsz, compat_long_t, msgtyp, int, msgflg) +long compat_ksys_msgrcv(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz, + compat_long_t msgtyp, int msgflg) { return do_msgrcv(msqid, compat_ptr(msgp), (ssize_t)msgsz, (long)msgtyp, msgflg, compat_do_msg_fill); } + +COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp, + compat_ssize_t, msgsz, compat_long_t, msgtyp, + int, msgflg) +{ + return compat_ksys_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg); +} #endif int msg_init_ns(struct ipc_namespace *ns) diff --git a/ipc/syscall.c b/ipc/syscall.c index aa29b0802e26..0228c7afd882 100644 --- a/ipc/syscall.c +++ b/ipc/syscall.c @@ -59,11 +59,11 @@ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second, (struct ipc_kludge __user *) ptr, sizeof(tmp))) return -EFAULT; - return sys_msgrcv(first, tmp.msgp, second, + return ksys_msgrcv(first, tmp.msgp, second, tmp.msgtyp, third); } default: - return sys_msgrcv(first, + return ksys_msgrcv(first, (struct msgbuf __user *) ptr, second, fifth, third); } @@ -156,10 +156,10 @@ COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, return -EINVAL; if (copy_from_user(&ipck, uptr, sizeof(ipck))) return -EFAULT; - return compat_sys_msgrcv(first, ipck.msgp, second, + return compat_ksys_msgrcv(first, ipck.msgp, second, ipck.msgtyp, third); } - return compat_sys_msgrcv(first, ptr, second, fifth, third); + return compat_ksys_msgrcv(first, ptr, second, fifth, third); } case MSGGET: return ksys_msgget(first, second); diff --git a/ipc/util.h b/ipc/util.h index 47837b4af3f2..c16aceb1bdec 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -244,6 +244,8 @@ long ksys_semget(key_t key, int nsems, int semflg); long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg); long ksys_msgget(key_t key, int msgflg); long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); +long ksys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, + long msgtyp, int msgflg); long ksys_shmget(key_t key, size_t size, int shmflg); long ksys_shmdt(char __user *shmaddr); long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); @@ -255,6 +257,8 @@ long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, const struct compat_timespec __user *timeout); long compat_ksys_semctl(int semid, int semnum, int cmd, int arg); long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr); +long compat_ksys_msgrcv(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz, + compat_long_t msgtyp, int msgflg); long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr); #endif /* CONFIG_COMPAT */ -- cgit v1.2.3 From 31c213f2106b7ea06f7fdc94ef8b785ed5342cf7 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Mar 2018 21:29:00 +0100 Subject: ipc: add msgsnd syscall/compat_syscall wrappers Provide ksys_msgsnd() and compat_ksys_msgsnd() wrappers to avoid in-kernel calls to these syscalls. The ksys_ prefix denotes that these functions are meant as a drop-in replacement for the syscalls. In particular, they use the same calling convention as sys_msgsnd() and compat_sys_msgsnd(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- ipc/msg.c | 20 ++++++++++++++++---- ipc/syscall.c | 4 ++-- ipc/util.h | 4 ++++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/ipc/msg.c b/ipc/msg.c index abc5826270a6..9de48065c1ac 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -867,8 +867,8 @@ out_unlock1: return err; } -SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, - int, msgflg) +long ksys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, + int msgflg) { long mtype; @@ -877,6 +877,12 @@ SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); } +SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, + int, msgflg) +{ + return ksys_msgsnd(msqid, msgp, msgsz, msgflg); +} + #ifdef CONFIG_COMPAT struct compat_msgbuf { @@ -884,8 +890,8 @@ struct compat_msgbuf { char mtext[1]; }; -COMPAT_SYSCALL_DEFINE4(msgsnd, int, msqid, compat_uptr_t, msgp, - compat_ssize_t, msgsz, int, msgflg) +long compat_ksys_msgsnd(int msqid, compat_uptr_t msgp, + compat_ssize_t msgsz, int msgflg) { struct compat_msgbuf __user *up = compat_ptr(msgp); compat_long_t mtype; @@ -894,6 +900,12 @@ COMPAT_SYSCALL_DEFINE4(msgsnd, int, msqid, compat_uptr_t, msgp, return -EFAULT; return do_msgsnd(msqid, mtype, up->mtext, (ssize_t)msgsz, msgflg); } + +COMPAT_SYSCALL_DEFINE4(msgsnd, int, msqid, compat_uptr_t, msgp, + compat_ssize_t, msgsz, int, msgflg) +{ + return compat_ksys_msgsnd(msqid, msgp, msgsz, msgflg); +} #endif static inline int convert_mode(long *msgtyp, int msgflg) diff --git a/ipc/syscall.c b/ipc/syscall.c index 0228c7afd882..77a883ef2eca 100644 --- a/ipc/syscall.c +++ b/ipc/syscall.c @@ -46,7 +46,7 @@ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second, } case MSGSND: - return sys_msgsnd(first, (struct msgbuf __user *) ptr, + return ksys_msgsnd(first, (struct msgbuf __user *) ptr, second, third); case MSGRCV: switch (version) { @@ -142,7 +142,7 @@ COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, return compat_ksys_semctl(first, second, third, pad); case MSGSND: - return compat_sys_msgsnd(first, ptr, second, third); + return compat_ksys_msgsnd(first, ptr, second, third); case MSGRCV: { void __user *uptr = compat_ptr(ptr); diff --git a/ipc/util.h b/ipc/util.h index c16aceb1bdec..51853dc2f340 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -246,6 +246,8 @@ long ksys_msgget(key_t key, int msgflg); long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); long ksys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, long msgtyp, int msgflg); +long ksys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, + int msgflg); long ksys_shmget(key_t key, size_t size, int shmflg); long ksys_shmdt(char __user *shmaddr); long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); @@ -259,6 +261,8 @@ long compat_ksys_semctl(int semid, int semnum, int cmd, int arg); long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr); long compat_ksys_msgrcv(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz, compat_long_t msgtyp, int msgflg); +long compat_ksys_msgsnd(int msqid, compat_uptr_t msgp, + compat_ssize_t msgsz, int msgflg); long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr); #endif /* CONFIG_COMPAT */ -- cgit v1.2.3 From 192c58073d9abb1c507c89f109da5dc9f130ba70 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:27 +0100 Subject: kernel: add do_getpgid() helper; remove internal call to sys_getpgid() Using the do_getpgid() helper removes an in-kernel call to the sys_getpgid() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Signed-off-by: Dominik Brodowski --- kernel/sys.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index f2289de20e19..ebb138b841c8 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1027,7 +1027,7 @@ out: return err; } -SYSCALL_DEFINE1(getpgid, pid_t, pid) +static int do_getpgid(pid_t pid) { struct task_struct *p; struct pid *grp; @@ -1055,11 +1055,16 @@ out: return retval; } +SYSCALL_DEFINE1(getpgid, pid_t, pid) +{ + return do_getpgid(pid); +} + #ifdef __ARCH_WANT_SYS_GETPGRP SYSCALL_DEFINE0(getpgrp) { - return sys_getpgid(0); + return do_getpgid(0); } #endif -- cgit v1.2.3 From 6203deb0a76f35b6dc476fecb228c850099a1bc4 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 17 Mar 2018 17:11:51 +0100 Subject: kernel: add do_compat_sigaltstack() helper; remove in-kernel call to compat syscall Using this helper allows us to avoid the in-kernel call to the compat_sys_sigaltstack() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: "Eric W. Biederman" Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- kernel/signal.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index 985c61749bcf..f04466655238 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3573,9 +3573,8 @@ int __save_altstack(stack_t __user *uss, unsigned long sp) } #ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE2(sigaltstack, - const compat_stack_t __user *, uss_ptr, - compat_stack_t __user *, uoss_ptr) +static int do_compat_sigaltstack(const compat_stack_t __user *uss_ptr, + compat_stack_t __user *uoss_ptr) { stack_t uss, uoss; int ret; @@ -3602,9 +3601,16 @@ COMPAT_SYSCALL_DEFINE2(sigaltstack, return ret; } +COMPAT_SYSCALL_DEFINE2(sigaltstack, + const compat_stack_t __user *, uss_ptr, + compat_stack_t __user *, uoss_ptr) +{ + return do_compat_sigaltstack(uss_ptr, uoss_ptr); +} + int compat_restore_altstack(const compat_stack_t __user *uss) { - int err = compat_sys_sigaltstack(uss, NULL); + int err = do_compat_sigaltstack(uss, NULL); /* squash all but -EFAULT for now */ return err == -EFAULT ? err : 0; } -- cgit v1.2.3 From e530dca584a9aa4aedf26adf0ed3c1c9b80e2767 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 19 Mar 2018 18:09:27 +0100 Subject: kernel: provide ksys_*() wrappers for syscalls called by kernel/uid16.c Using these helpers allows us to avoid the in-kernel calls to these syscalls: sys_setregid(), sys_setgid(), sys_setreuid(), sys_setuid(), sys_setresuid(), sys_setresgid(), sys_setfsuid(), and sys_setfsgid(). The ksys_ prefix denotes that these function are meant as a drop-in replacement for the syscall. In particular, they use the same calling convention. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Eric W. Biederman Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- kernel/sys.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++-------- kernel/uid16.c | 19 ++++++++++--------- kernel/uid16.h | 14 ++++++++++++++ 3 files changed, 74 insertions(+), 17 deletions(-) create mode 100644 kernel/uid16.h diff --git a/kernel/sys.c b/kernel/sys.c index ebb138b841c8..550f47788ae4 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -69,6 +69,8 @@ #include #include +#include "uid16.h" + #ifndef SET_UNALIGN_CTL # define SET_UNALIGN_CTL(a, b) (-EINVAL) #endif @@ -340,7 +342,7 @@ out_unlock: * operations (as far as semantic preservation is concerned). */ #ifdef CONFIG_MULTIUSER -SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) +long __sys_setregid(gid_t rgid, gid_t egid) { struct user_namespace *ns = current_user_ns(); const struct cred *old; @@ -392,12 +394,17 @@ error: return retval; } +SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) +{ + return __sys_setregid(rgid, egid); +} + /* * setgid() is implemented like SysV w/ SAVED_IDS * * SMP: Same implicit races as above. */ -SYSCALL_DEFINE1(setgid, gid_t, gid) +long __sys_setgid(gid_t gid) { struct user_namespace *ns = current_user_ns(); const struct cred *old; @@ -429,6 +436,11 @@ error: return retval; } +SYSCALL_DEFINE1(setgid, gid_t, gid) +{ + return __sys_setgid(gid); +} + /* * change the user struct in a credentials set to match the new UID */ @@ -473,7 +485,7 @@ static int set_user(struct cred *new) * 100% compatible with BSD. A program which uses just setuid() will be * 100% compatible with POSIX with saved IDs. */ -SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) +long __sys_setreuid(uid_t ruid, uid_t euid) { struct user_namespace *ns = current_user_ns(); const struct cred *old; @@ -533,6 +545,11 @@ error: return retval; } +SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) +{ + return __sys_setreuid(ruid, euid); +} + /* * setuid() is implemented like SysV with SAVED_IDS * @@ -544,7 +561,7 @@ error: * will allow a root program to temporarily drop privileges and be able to * regain them by swapping the real and effective uid. */ -SYSCALL_DEFINE1(setuid, uid_t, uid) +long __sys_setuid(uid_t uid) { struct user_namespace *ns = current_user_ns(); const struct cred *old; @@ -586,12 +603,17 @@ error: return retval; } +SYSCALL_DEFINE1(setuid, uid_t, uid) +{ + return __sys_setuid(uid); +} + /* * This function implements a generic ability to update ruid, euid, * and suid. This allows you to implement the 4.4 compatible seteuid(). */ -SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) +long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) { struct user_namespace *ns = current_user_ns(); const struct cred *old; @@ -656,6 +678,11 @@ error: return retval; } +SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) +{ + return __sys_setresuid(ruid, euid, suid); +} + SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp) { const struct cred *cred = current_cred(); @@ -678,7 +705,7 @@ SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t _ /* * Same as above, but for rgid, egid, sgid. */ -SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) +long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) { struct user_namespace *ns = current_user_ns(); const struct cred *old; @@ -730,6 +757,11 @@ error: return retval; } +SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) +{ + return __sys_setresgid(rgid, egid, sgid); +} + SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp) { const struct cred *cred = current_cred(); @@ -757,7 +789,7 @@ SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t _ * whatever uid it wants to). It normally shadows "euid", except when * explicitly set by setfsuid() or for access.. */ -SYSCALL_DEFINE1(setfsuid, uid_t, uid) +long __sys_setfsuid(uid_t uid) { const struct cred *old; struct cred *new; @@ -793,10 +825,15 @@ change_okay: return old_fsuid; } +SYSCALL_DEFINE1(setfsuid, uid_t, uid) +{ + return __sys_setfsuid(uid); +} + /* * Samma pÃ¥ svenska.. */ -SYSCALL_DEFINE1(setfsgid, gid_t, gid) +long __sys_setfsgid(gid_t gid) { const struct cred *old; struct cred *new; @@ -830,6 +867,11 @@ change_okay: commit_creds(new); return old_fsgid; } + +SYSCALL_DEFINE1(setfsgid, gid_t, gid) +{ + return __sys_setfsgid(gid); +} #endif /* CONFIG_MULTIUSER */ /** diff --git a/kernel/uid16.c b/kernel/uid16.c index ef1da2a5f9bd..7b930edfe461 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -18,6 +18,8 @@ #include +#include "uid16.h" + SYSCALL_DEFINE3(chown16, const char __user *, filename, old_uid_t, user, old_gid_t, group) { return sys_chown(filename, low2highuid(user), low2highgid(group)); @@ -35,27 +37,27 @@ SYSCALL_DEFINE3(fchown16, unsigned int, fd, old_uid_t, user, old_gid_t, group) SYSCALL_DEFINE2(setregid16, old_gid_t, rgid, old_gid_t, egid) { - return sys_setregid(low2highgid(rgid), low2highgid(egid)); + return __sys_setregid(low2highgid(rgid), low2highgid(egid)); } SYSCALL_DEFINE1(setgid16, old_gid_t, gid) { - return sys_setgid(low2highgid(gid)); + return __sys_setgid(low2highgid(gid)); } SYSCALL_DEFINE2(setreuid16, old_uid_t, ruid, old_uid_t, euid) { - return sys_setreuid(low2highuid(ruid), low2highuid(euid)); + return __sys_setreuid(low2highuid(ruid), low2highuid(euid)); } SYSCALL_DEFINE1(setuid16, old_uid_t, uid) { - return sys_setuid(low2highuid(uid)); + return __sys_setuid(low2highuid(uid)); } SYSCALL_DEFINE3(setresuid16, old_uid_t, ruid, old_uid_t, euid, old_uid_t, suid) { - return sys_setresuid(low2highuid(ruid), low2highuid(euid), + return __sys_setresuid(low2highuid(ruid), low2highuid(euid), low2highuid(suid)); } @@ -78,11 +80,10 @@ SYSCALL_DEFINE3(getresuid16, old_uid_t __user *, ruidp, old_uid_t __user *, euid SYSCALL_DEFINE3(setresgid16, old_gid_t, rgid, old_gid_t, egid, old_gid_t, sgid) { - return sys_setresgid(low2highgid(rgid), low2highgid(egid), + return __sys_setresgid(low2highgid(rgid), low2highgid(egid), low2highgid(sgid)); } - SYSCALL_DEFINE3(getresgid16, old_gid_t __user *, rgidp, old_gid_t __user *, egidp, old_gid_t __user *, sgidp) { const struct cred *cred = current_cred(); @@ -102,12 +103,12 @@ SYSCALL_DEFINE3(getresgid16, old_gid_t __user *, rgidp, old_gid_t __user *, egid SYSCALL_DEFINE1(setfsuid16, old_uid_t, uid) { - return sys_setfsuid(low2highuid(uid)); + return __sys_setfsuid(low2highuid(uid)); } SYSCALL_DEFINE1(setfsgid16, old_gid_t, gid) { - return sys_setfsgid(low2highgid(gid)); + return __sys_setfsgid(low2highgid(gid)); } static int groups16_to_user(old_gid_t __user *grouplist, diff --git a/kernel/uid16.h b/kernel/uid16.h new file mode 100644 index 000000000000..cdca040f7602 --- /dev/null +++ b/kernel/uid16.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_UID16_H +#define LINUX_UID16_H + +long __sys_setuid(uid_t uid); +long __sys_setgid(gid_t gid); +long __sys_setreuid(uid_t ruid, uid_t euid); +long __sys_setregid(gid_t rgid, gid_t egid); +long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid); +long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid); +long __sys_setfsuid(uid_t uid); +long __sys_setfsgid(gid_t gid); + +#endif /* LINUX_UID16_H */ -- cgit v1.2.3 From 7d4dd4f159b94003655b1688d9a4c0e2b6268ff8 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Wed, 14 Mar 2018 22:40:35 +0100 Subject: sched: add do_sched_yield() helper; remove in-kernel call to sched_yield() Using the sched-internal do_sched_yield() helper allows us to get rid of the sched-internal call to the sys_sched_yield() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Signed-off-by: Dominik Brodowski --- kernel/sched/core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e7c535eee0a6..8de4919c889a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4892,7 +4892,7 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, * * Return: 0. */ -SYSCALL_DEFINE0(sched_yield) +static void do_sched_yield(void) { struct rq_flags rf; struct rq *rq; @@ -4913,7 +4913,11 @@ SYSCALL_DEFINE0(sched_yield) sched_preempt_enable_no_resched(); schedule(); +} +SYSCALL_DEFINE0(sched_yield) +{ + do_sched_yield(); return 0; } @@ -4997,7 +5001,7 @@ EXPORT_SYMBOL(__cond_resched_softirq); void __sched yield(void) { set_current_state(TASK_RUNNING); - sys_sched_yield(); + do_sched_yield(); } EXPORT_SYMBOL(yield); -- cgit v1.2.3 From b6e9b0babb7a02ae4f00f053974609000f00950e Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 17 Mar 2018 16:00:25 +0100 Subject: mm: add kernel_migrate_pages() helper, move compat syscall to mm/mempolicy.c Move compat_sys_migrate_pages() to mm/mempolicy.c and make it call a newly introduced helper -- kernel_migrate_pages() -- instead of the syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: linux-mm@kvack.org Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- kernel/compat.c | 33 --------------------------------- mm/mempolicy.c | 48 ++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 44 insertions(+), 37 deletions(-) diff --git a/kernel/compat.c b/kernel/compat.c index 3f5fa8902e7d..51bdf1808943 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -508,39 +508,6 @@ COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages, } return sys_move_pages(pid, nr_pages, pages, nodes, status, flags); } - -COMPAT_SYSCALL_DEFINE4(migrate_pages, compat_pid_t, pid, - compat_ulong_t, maxnode, - const compat_ulong_t __user *, old_nodes, - const compat_ulong_t __user *, new_nodes) -{ - unsigned long __user *old = NULL; - unsigned long __user *new = NULL; - nodemask_t tmp_mask; - unsigned long nr_bits; - unsigned long size; - - nr_bits = min_t(unsigned long, maxnode - 1, MAX_NUMNODES); - size = ALIGN(nr_bits, BITS_PER_LONG) / 8; - if (old_nodes) { - if (compat_get_bitmap(nodes_addr(tmp_mask), old_nodes, nr_bits)) - return -EFAULT; - old = compat_alloc_user_space(new_nodes ? size * 2 : size); - if (new_nodes) - new = old + size / sizeof(unsigned long); - if (copy_to_user(old, nodes_addr(tmp_mask), size)) - return -EFAULT; - } - if (new_nodes) { - if (compat_get_bitmap(nodes_addr(tmp_mask), new_nodes, nr_bits)) - return -EFAULT; - if (new == NULL) - new = compat_alloc_user_space(size); - if (copy_to_user(new, nodes_addr(tmp_mask), size)) - return -EFAULT; - } - return sys_migrate_pages(pid, nr_bits + 1, old, new); -} #endif /* diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d879f1d8a44a..7399ede02b5f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1377,9 +1377,9 @@ SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask, return do_set_mempolicy(mode, flags, &nodes); } -SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode, - const unsigned long __user *, old_nodes, - const unsigned long __user *, new_nodes) +static int kernel_migrate_pages(pid_t pid, unsigned long maxnode, + const unsigned long __user *old_nodes, + const unsigned long __user *new_nodes) { struct mm_struct *mm = NULL; struct task_struct *task; @@ -1469,6 +1469,13 @@ out_put: } +SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode, + const unsigned long __user *, old_nodes, + const unsigned long __user *, new_nodes) +{ + return kernel_migrate_pages(pid, maxnode, old_nodes, new_nodes); +} + /* Retrieve NUMA policy */ SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, @@ -1571,7 +1578,40 @@ COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len, return sys_mbind(start, len, mode, nm, nr_bits+1, flags); } -#endif +COMPAT_SYSCALL_DEFINE4(migrate_pages, compat_pid_t, pid, + compat_ulong_t, maxnode, + const compat_ulong_t __user *, old_nodes, + const compat_ulong_t __user *, new_nodes) +{ + unsigned long __user *old = NULL; + unsigned long __user *new = NULL; + nodemask_t tmp_mask; + unsigned long nr_bits; + unsigned long size; + + nr_bits = min_t(unsigned long, maxnode - 1, MAX_NUMNODES); + size = ALIGN(nr_bits, BITS_PER_LONG) / 8; + if (old_nodes) { + if (compat_get_bitmap(nodes_addr(tmp_mask), old_nodes, nr_bits)) + return -EFAULT; + old = compat_alloc_user_space(new_nodes ? size * 2 : size); + if (new_nodes) + new = old + size / sizeof(unsigned long); + if (copy_to_user(old, nodes_addr(tmp_mask), size)) + return -EFAULT; + } + if (new_nodes) { + if (compat_get_bitmap(nodes_addr(tmp_mask), new_nodes, nr_bits)) + return -EFAULT; + if (new == NULL) + new = compat_alloc_user_space(size); + if (copy_to_user(new, nodes_addr(tmp_mask), size)) + return -EFAULT; + } + return kernel_migrate_pages(pid, nr_bits + 1, old, new); +} + +#endif /* CONFIG_COMPAT */ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, unsigned long addr) -- cgit v1.2.3 From 7addf44388255f6fa99c83e3e2ad79cef0813698 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 17 Mar 2018 16:08:03 +0100 Subject: mm: add kernel_move_pages() helper, move compat syscall to mm/migrate.c Move compat_sys_move_pages() to mm/migrate.c and make it call a newly introduced helper -- kernel_move_pages() -- instead of the syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: linux-mm@kvack.org Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- kernel/compat.c | 22 ---------------------- mm/migrate.c | 39 +++++++++++++++++++++++++++++++++++---- 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/kernel/compat.c b/kernel/compat.c index 51bdf1808943..6d21894806b4 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -488,28 +488,6 @@ get_compat_sigset(sigset_t *set, const compat_sigset_t __user *compat) } EXPORT_SYMBOL_GPL(get_compat_sigset); -#ifdef CONFIG_NUMA -COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages, - compat_uptr_t __user *, pages32, - const int __user *, nodes, - int __user *, status, - int, flags) -{ - const void __user * __user *pages; - int i; - - pages = compat_alloc_user_space(nr_pages * sizeof(void *)); - for (i = 0; i < nr_pages; i++) { - compat_uptr_t p; - - if (get_user(p, pages32 + i) || - put_user(compat_ptr(p), pages + i)) - return -EFAULT; - } - return sys_move_pages(pid, nr_pages, pages, nodes, status, flags); -} -#endif - /* * Allocate user-space memory for the duration of a single system call, * in order to marshall parameters inside a compat thunk. diff --git a/mm/migrate.c b/mm/migrate.c index 1e5525a25691..003886606a22 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -1745,10 +1746,10 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, * Move a list of pages in the address space of the currently executing * process. */ -SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, - const void __user * __user *, pages, - const int __user *, nodes, - int __user *, status, int, flags) +static int kernel_move_pages(pid_t pid, unsigned long nr_pages, + const void __user * __user *pages, + const int __user *nodes, + int __user *status, int flags) { struct task_struct *task; struct mm_struct *mm; @@ -1807,6 +1808,36 @@ out: return err; } +SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, + const void __user * __user *, pages, + const int __user *, nodes, + int __user *, status, int, flags) +{ + return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags); +} + +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages, + compat_uptr_t __user *, pages32, + const int __user *, nodes, + int __user *, status, + int, flags) +{ + const void __user * __user *pages; + int i; + + pages = compat_alloc_user_space(nr_pages * sizeof(void *)); + for (i = 0; i < nr_pages; i++) { + compat_uptr_t p; + + if (get_user(p, pages32 + i) || + put_user(compat_ptr(p), pages + i)) + return -EFAULT; + } + return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags); +} +#endif /* CONFIG_COMPAT */ + #ifdef CONFIG_NUMA_BALANCING /* * Returns true if this is a safe migration target node for misplaced NUMA -- cgit v1.2.3 From e7dc9ad6e98eb8cc49b454d54e361f91aebc395f Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 17 Mar 2018 16:12:22 +0100 Subject: mm: add kernel_mbind() helper; remove in-kernel call to syscall Using the mm-internal kernel_mbind() helper allows us to get rid of the mm-internal call to the sys_mbind() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: linux-mm@kvack.org Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- mm/mempolicy.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 7399ede02b5f..e4d7d4c0b253 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1336,9 +1336,9 @@ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode, return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0; } -SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len, - unsigned long, mode, const unsigned long __user *, nmask, - unsigned long, maxnode, unsigned, flags) +static long kernel_mbind(unsigned long start, unsigned long len, + unsigned long mode, const unsigned long __user *nmask, + unsigned long maxnode, unsigned int flags) { nodemask_t nodes; int err; @@ -1357,6 +1357,13 @@ SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len, return do_mbind(start, len, mode, mode_flags, &nodes, flags); } +SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len, + unsigned long, mode, const unsigned long __user *, nmask, + unsigned long, maxnode, unsigned int, flags) +{ + return kernel_mbind(start, len, mode, nmask, maxnode, flags); +} + /* Set the process memory policy */ SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask, unsigned long, maxnode) @@ -1575,7 +1582,7 @@ COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len, return -EFAULT; } - return sys_mbind(start, len, mode, nm, nr_bits+1, flags); + return kernel_mbind(start, len, mode, nm, nr_bits+1, flags); } COMPAT_SYSCALL_DEFINE4(migrate_pages, compat_pid_t, pid, -- cgit v1.2.3 From af03c4acb728dd9ed850d329a1cff71d52e7f3a8 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 17 Mar 2018 16:20:01 +0100 Subject: mm: add kernel_[sg]et_mempolicy() helpers; remove in-kernel calls to syscalls Using the mm-internal kernel_[sg]et_mempolicy() helper allows us to get rid of the mm-internal calls to the sys_[sg]et_mempolicy() syscalls. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: linux-mm@kvack.org Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- mm/mempolicy.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e4d7d4c0b253..ca817e768d0e 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1365,8 +1365,8 @@ SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len, } /* Set the process memory policy */ -SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask, - unsigned long, maxnode) +static long kernel_set_mempolicy(int mode, const unsigned long __user *nmask, + unsigned long maxnode) { int err; nodemask_t nodes; @@ -1384,6 +1384,12 @@ SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask, return do_set_mempolicy(mode, flags, &nodes); } +SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask, + unsigned long, maxnode) +{ + return kernel_set_mempolicy(mode, nmask, maxnode); +} + static int kernel_migrate_pages(pid_t pid, unsigned long maxnode, const unsigned long __user *old_nodes, const unsigned long __user *new_nodes) @@ -1485,9 +1491,11 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode, /* Retrieve NUMA policy */ -SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, - unsigned long __user *, nmask, unsigned long, maxnode, - unsigned long, addr, unsigned long, flags) +static int kernel_get_mempolicy(int __user *policy, + unsigned long __user *nmask, + unsigned long maxnode, + unsigned long addr, + unsigned long flags) { int err; int uninitialized_var(pval); @@ -1510,6 +1518,13 @@ SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, return err; } +SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, + unsigned long __user *, nmask, unsigned long, maxnode, + unsigned long, addr, unsigned long, flags) +{ + return kernel_get_mempolicy(policy, nmask, maxnode, addr, flags); +} + #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, @@ -1528,7 +1543,7 @@ COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, if (nmask) nm = compat_alloc_user_space(alloc_size); - err = sys_get_mempolicy(policy, nm, nr_bits+1, addr, flags); + err = kernel_get_mempolicy(policy, nm, nr_bits+1, addr, flags); if (!err && nmask) { unsigned long copy_size; @@ -1560,7 +1575,7 @@ COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask, return -EFAULT; } - return sys_set_mempolicy(mode, nm, nr_bits+1); + return kernel_set_mempolicy(mode, nm, nr_bits+1); } COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len, -- cgit v1.2.3 From 2dae0248061e6a8a2cccfb2ad01a76f674e40d72 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:27 +0100 Subject: fs: add do_readlinkat() helper; remove internal call to sys_readlinkat() Using the do_readlinkat() helper removes an in-kernel call to the sys_readlinkat() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- fs/stat.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/stat.c b/fs/stat.c index 873785dae022..f8e6fb2c3657 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -379,8 +379,8 @@ SYSCALL_DEFINE2(newfstat, unsigned int, fd, struct stat __user *, statbuf) return error; } -SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname, - char __user *, buf, int, bufsiz) +static int do_readlinkat(int dfd, const char __user *pathname, + char __user *buf, int bufsiz) { struct path path; int error; @@ -415,10 +415,16 @@ retry: return error; } +SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname, + char __user *, buf, int, bufsiz) +{ + return do_readlinkat(dfd, pathname, buf, bufsiz); +} + SYSCALL_DEFINE3(readlink, const char __user *, path, char __user *, buf, int, bufsiz) { - return sys_readlinkat(AT_FDCWD, path, buf, bufsiz); + return do_readlinkat(AT_FDCWD, path, buf, bufsiz); } -- cgit v1.2.3 From 0a216dd1cfe8a4966767e7e8c0bb90c3db163def Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:28 +0100 Subject: fs: add do_pipe2() helper; remove internal call to sys_pipe2() Using this helper removes an in-kernel call to the sys_pipe2() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- fs/pipe.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 7b1954caf388..39d6f431da83 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -841,7 +841,7 @@ int do_pipe_flags(int *fd, int flags) * sys_pipe() is the normal C calling standard for creating * a pipe. It's not the way Unix traditionally does this, though. */ -SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) +static int do_pipe2(int __user *fildes, int flags) { struct file *files[2]; int fd[2]; @@ -863,9 +863,14 @@ SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) return error; } +SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) +{ + return do_pipe2(fildes, flags); +} + SYSCALL_DEFINE1(pipe, int __user *, fildes) { - return sys_pipe2(fildes, 0); + return do_pipe2(fildes, 0); } static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt) -- cgit v1.2.3 From ee81feb64ead8e4bed0ebc94c980e6cd836aaafd Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:28 +0100 Subject: fs: add do_renameat2() helper; remove internal call to sys_renameat2() Using this helper removes in-kernel calls to the sys_renameat2() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- fs/namei.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 921ae32dbc80..524e829ffc7d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4478,8 +4478,8 @@ out: } EXPORT_SYMBOL(vfs_rename); -SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, - int, newdfd, const char __user *, newname, unsigned int, flags) +static int do_renameat2(int olddfd, const char __user *oldname, int newdfd, + const char __user *newname, unsigned int flags) { struct dentry *old_dentry, *new_dentry; struct dentry *trap; @@ -4621,15 +4621,21 @@ exit: return error; } +SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, + int, newdfd, const char __user *, newname, unsigned int, flags) +{ + return do_renameat2(olddfd, oldname, newdfd, newname, flags); +} + SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname) { - return sys_renameat2(olddfd, oldname, newdfd, newname, 0); + return do_renameat2(olddfd, oldname, newdfd, newname, 0); } SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname) { - return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); + return do_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); } int vfs_whiteout(struct inode *dir, struct dentry *dentry) -- cgit v1.2.3 From f13903587c189fa73da47ddd416eb31704c48486 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:29 +0100 Subject: fs: add do_futimesat() helper; remove internal call to sys_futimesat() Using this helper removes the in-kernel call to the sys_futimesat() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- fs/utimes.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/utimes.c b/fs/utimes.c index e4b3d7c2c9f5..5be035ed26c0 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -184,8 +184,8 @@ SYSCALL_DEFINE4(utimensat, int, dfd, const char __user *, filename, return do_utimes(dfd, filename, utimes ? tstimes : NULL, flags); } -SYSCALL_DEFINE3(futimesat, int, dfd, const char __user *, filename, - struct timeval __user *, utimes) +static long do_futimesat(int dfd, const char __user *filename, + struct timeval __user *utimes) { struct timeval times[2]; struct timespec64 tstimes[2]; @@ -212,10 +212,17 @@ SYSCALL_DEFINE3(futimesat, int, dfd, const char __user *, filename, return do_utimes(dfd, filename, utimes ? tstimes : NULL, 0); } + +SYSCALL_DEFINE3(futimesat, int, dfd, const char __user *, filename, + struct timeval __user *, utimes) +{ + return do_futimesat(dfd, filename, utimes); +} + SYSCALL_DEFINE2(utimes, char __user *, filename, struct timeval __user *, utimes) { - return sys_futimesat(AT_FDCWD, filename, utimes); + return do_futimesat(AT_FDCWD, filename, utimes); } #ifdef CONFIG_COMPAT -- cgit v1.2.3 From 791eb22eef0d077df4ddcf633ee6eac038f0431e Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:30 +0100 Subject: fs: add do_epoll_*() helpers; remove internal calls to sys_epoll_*() Using the helper functions do_epoll_create() and do_epoll_wait() allows us to remove in-kernel calls to the related syscall functions. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- fs/eventpoll.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 0f3494ed3ed0..602ca4285b2e 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1936,7 +1936,7 @@ static void clear_tfile_check_list(void) /* * Open an eventpoll file descriptor. */ -SYSCALL_DEFINE1(epoll_create1, int, flags) +static int do_epoll_create(int flags) { int error, fd; struct eventpoll *ep = NULL; @@ -1979,12 +1979,17 @@ out_free_ep: return error; } +SYSCALL_DEFINE1(epoll_create1, int, flags) +{ + return do_epoll_create(flags); +} + SYSCALL_DEFINE1(epoll_create, int, size) { if (size <= 0) return -EINVAL; - return sys_epoll_create1(0); + return do_epoll_create(0); } /* @@ -2148,8 +2153,8 @@ error_return: * Implement the event wait interface for the eventpoll file. It is the kernel * part of the user space epoll_wait(2). */ -SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, - int, maxevents, int, timeout) +static int do_epoll_wait(int epfd, struct epoll_event __user *events, + int maxevents, int timeout) { int error; struct fd f; @@ -2190,6 +2195,12 @@ error_fput: return error; } +SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, + int, maxevents, int, timeout) +{ + return do_epoll_wait(epfd, events, maxevents, timeout); +} + /* * Implement the event wait interface for the eventpoll file. It is the kernel * part of the user space epoll_pwait(2). @@ -2214,7 +2225,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, set_current_blocked(&ksigmask); } - error = sys_epoll_wait(epfd, events, maxevents, timeout); + error = do_epoll_wait(epfd, events, maxevents, timeout); /* * If we changed the signal mask, we need to restore the original one. @@ -2257,7 +2268,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, set_current_blocked(&ksigmask); } - err = sys_epoll_wait(epfd, events, maxevents, timeout); + err = do_epoll_wait(epfd, events, maxevents, timeout); /* * If we changed the signal mask, we need to restore the original one. -- cgit v1.2.3 From 52fb6db0fd6f50ac71475f02b96098fbb1fb3417 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:36 +0100 Subject: fs: add do_signalfd4() helper; remove internal calls to sys_signalfd4() Using this helper removes in-kernel calls to the sys_signalfd4() syscall function. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- fs/signalfd.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/signalfd.c b/fs/signalfd.c index 76bf9cc62074..501c41f3351f 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -256,8 +256,8 @@ static const struct file_operations signalfd_fops = { .llseek = noop_llseek, }; -SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, - size_t, sizemask, int, flags) +static int do_signalfd4(int ufd, sigset_t __user *user_mask, size_t sizemask, + int flags) { sigset_t sigmask; struct signalfd_ctx *ctx; @@ -310,10 +310,16 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, return ufd; } +SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, + size_t, sizemask, int, flags) +{ + return do_signalfd4(ufd, user_mask, sizemask, flags); +} + SYSCALL_DEFINE3(signalfd, int, ufd, sigset_t __user *, user_mask, size_t, sizemask) { - return sys_signalfd4(ufd, user_mask, sizemask, 0); + return do_signalfd4(ufd, user_mask, sizemask, 0); } #ifdef CONFIG_COMPAT @@ -333,7 +339,7 @@ COMPAT_SYSCALL_DEFINE4(signalfd4, int, ufd, if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t))) return -EFAULT; - return sys_signalfd4(ufd, ksigmask, sizeof(sigset_t), flags); + return do_signalfd4(ufd, ksigmask, sizeof(sigset_t), flags); } COMPAT_SYSCALL_DEFINE3(signalfd, int, ufd, -- cgit v1.2.3 From 2fc96f8331ba7d08ddc126d154a1504b9a79b79e Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:37 +0100 Subject: fs: add do_eventfd() helper; remove internal call to sys_eventfd() Using this helper removes an in-kernel call to the sys_eventfd() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- fs/eventfd.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/eventfd.c b/fs/eventfd.c index 012f5bd46dfa..08d3bd602f73 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -380,7 +380,7 @@ struct eventfd_ctx *eventfd_ctx_fileget(struct file *file) } EXPORT_SYMBOL_GPL(eventfd_ctx_fileget); -SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) +static int do_eventfd(unsigned int count, int flags) { struct eventfd_ctx *ctx; int fd; @@ -409,8 +409,13 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) return fd; } +SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) +{ + return do_eventfd(count, flags); +} + SYSCALL_DEFINE1(eventfd, unsigned int, count) { - return sys_eventfd2(count, 0); + return do_eventfd(count, 0); } -- cgit v1.2.3 From 98e5f7bd2c67f4028a0797393092f8730118d713 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 17 Mar 2018 14:53:38 +0100 Subject: fs: add do_lookup_dcookie() helper; remove in-kernel call to syscall Using the fs-internal do_lookup_dcookie() helper allows us to get rid of fs-internal calls to the sys_lookup_dcookie() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- fs/dcookies.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/dcookies.c b/fs/dcookies.c index 0d0461cf2431..57bc96435feb 100644 --- a/fs/dcookies.c +++ b/fs/dcookies.c @@ -146,7 +146,7 @@ out: /* And here is where the userspace process can look up the cookie value * to retrieve the path. */ -SYSCALL_DEFINE3(lookup_dcookie, u64, cookie64, char __user *, buf, size_t, len) +static int do_lookup_dcookie(u64 cookie64, char __user *buf, size_t len) { unsigned long cookie = (unsigned long)cookie64; int err = -EINVAL; @@ -203,13 +203,18 @@ out: return err; } +SYSCALL_DEFINE3(lookup_dcookie, u64, cookie64, char __user *, buf, size_t, len) +{ + return do_lookup_dcookie(cookie64, buf, len); +} + #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE4(lookup_dcookie, u32, w0, u32, w1, char __user *, buf, compat_size_t, len) { #ifdef __BIG_ENDIAN - return sys_lookup_dcookie(((u64)w0 << 32) | w1, buf, len); + return do_lookup_dcookie(((u64)w0 << 32) | w1, buf, len); #else - return sys_lookup_dcookie(((u64)w1 << 32) | w0, buf, len); + return do_lookup_dcookie(((u64)w1 << 32) | w0, buf, len); #endif } #endif -- cgit v1.2.3 From 30cfe4ef8b8948842a48de7955ca0522568eeb6d Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 17 Mar 2018 15:00:24 +0100 Subject: fs: add do_vmsplice() helper; remove in-kernel call to syscall Using the fs-internal do_vmsplice() helper allows us to get rid of the fs-internal call to the sys_vmsplice() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- fs/splice.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 39e2dc01ac12..005d09cf3fa8 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1331,8 +1331,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *uiov, * Currently we punt and implement it as a normal copy, see pipe_to_user(). * */ -SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, - unsigned long, nr_segs, unsigned int, flags) +static long do_vmsplice(int fd, const struct iovec __user *iov, + unsigned long nr_segs, unsigned int flags) { struct fd f; long error; @@ -1358,6 +1358,12 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, return error; } +SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, + unsigned long, nr_segs, unsigned int, flags) +{ + return do_vmsplice(fd, iov, nr_segs, flags); +} + #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, iov32, unsigned int, nr_segs, unsigned int, flags) @@ -1375,7 +1381,7 @@ COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, io put_user(v.iov_len, &iov[i].iov_len)) return -EFAULT; } - return sys_vmsplice(fd, iov, nr_segs, flags); + return do_vmsplice(fd, iov, nr_segs, flags); } #endif -- cgit v1.2.3 From 4bdb9acabff2dfcb856c40b6936269d87f490c8d Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 18 Mar 2018 07:53:04 +0100 Subject: fs: add kern_select() helper; remove in-kernel call to sys_select() Using this helper allows us to avoid the in-kernel call to the sys_umount() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- fs/select.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/select.c b/fs/select.c index b6c36254028a..b5df01c4587d 100644 --- a/fs/select.c +++ b/fs/select.c @@ -675,8 +675,8 @@ out_nofds: return ret; } -SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp, - fd_set __user *, exp, struct timeval __user *, tvp) +static int kern_select(int n, fd_set __user *inp, fd_set __user *outp, + fd_set __user *exp, struct timeval __user *tvp) { struct timespec64 end_time, *to = NULL; struct timeval tv; @@ -699,6 +699,12 @@ SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp, return ret; } +SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp, + fd_set __user *, exp, struct timeval __user *, tvp) +{ + return kern_select(n, inp, outp, exp, tvp); +} + static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timespec __user *tsp, const sigset_t __user *sigmask, size_t sigsetsize) @@ -784,7 +790,7 @@ SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg) if (copy_from_user(&a, arg, sizeof(a))) return -EFAULT; - return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp); + return kern_select(a.n, a.inp, a.outp, a.exp, a.tvp); } #endif -- cgit v1.2.3 From e02af2ff654806c5dc93412fffd77d67d1125ccc Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Mar 2018 19:29:53 +0100 Subject: fs: add do_compat_fcntl64() helper; remove in-kernel call to compat syscall Using the fs-internal do_compat_fcntl64() helper allows us to get rid of the fs-internal call to the compat_sys_fcntl64() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- fs/fcntl.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index 1e97f1fda90c..d737ff082472 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -607,8 +607,8 @@ static int fixup_compat_flock(struct flock *flock) return 0; } -COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, - compat_ulong_t, arg) +static long do_compat_fcntl64(unsigned int fd, unsigned int cmd, + compat_ulong_t arg) { struct fd f = fdget_raw(fd); struct flock flock; @@ -672,6 +672,12 @@ out_put: return err; } +COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, + compat_ulong_t, arg) +{ + return do_compat_fcntl64(fd, cmd, arg); +} + COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, compat_ulong_t, arg) { @@ -684,7 +690,7 @@ COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, case F_OFD_SETLKW: return -EINVAL; } - return compat_sys_fcntl64(fd, cmd, arg); + return do_compat_fcntl64(fd, cmd, arg); } #endif -- cgit v1.2.3 From 05585e449572d7bdb798a87a732f86760c6b3c77 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Mar 2018 19:33:48 +0100 Subject: fs: add do_compat_select() helper; remove in-kernel call to compat syscall Using the fs-internal do_compat_select() helper allows us to get rid of the fs-internal call to the compat_sys_select() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- fs/select.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/select.c b/fs/select.c index b5df01c4587d..ba879c51288f 100644 --- a/fs/select.c +++ b/fs/select.c @@ -1265,9 +1265,9 @@ out_nofds: return ret; } -COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp, - compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, - struct compat_timeval __user *, tvp) +static int do_compat_select(int n, compat_ulong_t __user *inp, + compat_ulong_t __user *outp, compat_ulong_t __user *exp, + struct compat_timeval __user *tvp) { struct timespec64 end_time, *to = NULL; struct compat_timeval tv; @@ -1290,6 +1290,13 @@ COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp, return ret; } +COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp, + compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, + struct compat_timeval __user *, tvp) +{ + return do_compat_select(n, inp, outp, exp, tvp); +} + struct compat_sel_arg_struct { compat_ulong_t n; compat_uptr_t inp; @@ -1304,8 +1311,8 @@ COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg) if (copy_from_user(&a, arg, sizeof(a))) return -EFAULT; - return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp), - compat_ptr(a.exp), compat_ptr(a.tvp)); + return do_compat_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp), + compat_ptr(a.exp), compat_ptr(a.tvp)); } static long do_compat_pselect(int n, compat_ulong_t __user *inp, -- cgit v1.2.3 From 570484bfe813fa67da003077898a7046767cb4d1 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Mar 2018 19:36:46 +0100 Subject: fs: add do_compat_signalfd4() helper; remove in-kernel call to compat syscall Using the fs-internal do_compat_signalfd4() helper allows us to get rid of the fs-internal call to the compat_sys_signalfd4() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- fs/signalfd.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/signalfd.c b/fs/signalfd.c index 501c41f3351f..d2187a813376 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -323,10 +323,9 @@ SYSCALL_DEFINE3(signalfd, int, ufd, sigset_t __user *, user_mask, } #ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE4(signalfd4, int, ufd, - const compat_sigset_t __user *,sigmask, - compat_size_t, sigsetsize, - int, flags) +static long do_compat_signalfd4(int ufd, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize, int flags) { sigset_t tmp; sigset_t __user *ksigmask; @@ -342,10 +341,18 @@ COMPAT_SYSCALL_DEFINE4(signalfd4, int, ufd, return do_signalfd4(ufd, ksigmask, sizeof(sigset_t), flags); } +COMPAT_SYSCALL_DEFINE4(signalfd4, int, ufd, + const compat_sigset_t __user *, sigmask, + compat_size_t, sigsetsize, + int, flags) +{ + return do_compat_signalfd4(ufd, sigmask, sigsetsize, flags); +} + COMPAT_SYSCALL_DEFINE3(signalfd, int, ufd, const compat_sigset_t __user *,sigmask, compat_size_t, sigsetsize) { - return compat_sys_signalfd4(ufd, sigmask, sigsetsize, 0); + return do_compat_signalfd4(ufd, sigmask, sigsetsize, 0); } #endif -- cgit v1.2.3 From ab641afa73fbcdee962c4ec9641dd561fdae9944 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Mar 2018 19:39:44 +0100 Subject: fs: add do_compat_futimesat() helper; remove in-kernel call to compat syscall Using the fs-internal do_compat_futimesat() helper allows us to get rid of the fs-internal call to the compat_sys_futimesat() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- fs/utimes.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/utimes.c b/fs/utimes.c index 5be035ed26c0..69d4b6ba1bfb 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -260,7 +260,8 @@ COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filena return do_utimes(dfd, filename, t ? tv : NULL, flags); } -COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, const char __user *, filename, struct compat_timeval __user *, t) +static long do_compat_futimesat(unsigned int dfd, const char __user *filename, + struct compat_timeval __user *t) { struct timespec64 tv[2]; @@ -279,8 +280,15 @@ COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, const char __user *, filena return do_utimes(dfd, filename, t ? tv : NULL, 0); } +COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, + const char __user *, filename, + struct compat_timeval __user *, t) +{ + return do_compat_futimesat(dfd, filename, t); +} + COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct compat_timeval __user *, t) { - return compat_sys_futimesat(AT_FDCWD, filename, t); + return do_compat_futimesat(AT_FDCWD, filename, t); } #endif -- cgit v1.2.3 From d0d89d1ed3ff7e39d80773be09918037d06522fb Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 21:27:21 +0100 Subject: inotify: add do_inotify_init() helper; remove in-kernel call to syscall Using the inotify-internal do_inotify_init() helper allows us to get rid of the in-kernel call to sys_inotify_init1() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Acked-by: Jan Kara Cc: Amir Goldstein Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Dominik Brodowski --- fs/notify/inotify/inotify_user.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 2c908b31d6c9..43c23653ce2e 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -635,7 +635,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) /* inotify syscalls */ -SYSCALL_DEFINE1(inotify_init1, int, flags) +static int do_inotify_init(int flags) { struct fsnotify_group *group; int ret; @@ -660,9 +660,14 @@ SYSCALL_DEFINE1(inotify_init1, int, flags) return ret; } +SYSCALL_DEFINE1(inotify_init1, int, flags) +{ + return do_inotify_init(flags); +} + SYSCALL_DEFINE0(inotify_init) { - return sys_inotify_init1(0); + return do_inotify_init(0); } SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, -- cgit v1.2.3 From 183caa3c8668e95c3647ac1e7e6b8876b7d9fbdb Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 17 Mar 2018 15:06:11 +0100 Subject: fanotify: add do_fanotify_mark() helper; remove in-kernel call to syscall Using the fs-internal do_fanotify_mark() helper allows us to get rid of the fs-internal call to the sys_fanotify_mark() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Acked-by: Jan Kara Cc: Amir Goldstein Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Dominik Brodowski --- fs/notify/fanotify/fanotify_user.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index c07eb3d655ea..fa803a58a605 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -820,9 +820,8 @@ out_destroy_group: return fd; } -SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, - __u64, mask, int, dfd, - const char __user *, pathname) +static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, + int dfd, const char __user *pathname) { struct inode *inode = NULL; struct vfsmount *mnt = NULL; @@ -928,13 +927,20 @@ fput_and_out: return ret; } +SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, + __u64, mask, int, dfd, + const char __user *, pathname) +{ + return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname); +} + #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE6(fanotify_mark, int, fanotify_fd, unsigned int, flags, __u32, mask0, __u32, mask1, int, dfd, const char __user *, pathname) { - return sys_fanotify_mark(fanotify_fd, flags, + return do_fanotify_mark(fanotify_fd, flags, #ifdef __BIG_ENDIAN ((__u64)mask0 << 32) | mask1, #else -- cgit v1.2.3 From cb0b476ab12ca3bd9dd9122047660f3a73e8d647 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 17 Mar 2018 16:26:56 +0100 Subject: fs/quota: add kernel_quotactl() helper; remove in-kernel call to syscall Using the fs-internal kernel_quotactl() helper allows us to get rid of the fs-internal call to the sys_quotactl() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Acked-by: Jan Kara Signed-off-by: Dominik Brodowski --- fs/quota/compat.c | 8 ++++---- fs/quota/quota.c | 10 ++++++++-- include/linux/quotaops.h | 3 +++ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/fs/quota/compat.c b/fs/quota/compat.c index 779caed4f078..1577a2fd51f4 100644 --- a/fs/quota/compat.c +++ b/fs/quota/compat.c @@ -59,7 +59,7 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special, case Q_GETQUOTA: dqblk = compat_alloc_user_space(sizeof(struct if_dqblk)); compat_dqblk = addr; - ret = sys_quotactl(cmd, special, id, dqblk); + ret = kernel_quotactl(cmd, special, id, dqblk); if (ret) break; if (copy_in_user(compat_dqblk, dqblk, sizeof(*compat_dqblk)) || @@ -75,12 +75,12 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special, get_user(data, &compat_dqblk->dqb_valid) || put_user(data, &dqblk->dqb_valid)) break; - ret = sys_quotactl(cmd, special, id, dqblk); + ret = kernel_quotactl(cmd, special, id, dqblk); break; case Q_XGETQSTAT: fsqstat = compat_alloc_user_space(sizeof(struct fs_quota_stat)); compat_fsqstat = addr; - ret = sys_quotactl(cmd, special, id, fsqstat); + ret = kernel_quotactl(cmd, special, id, fsqstat); if (ret) break; ret = -EFAULT; @@ -113,7 +113,7 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special, ret = 0; break; default: - ret = sys_quotactl(cmd, special, id, addr); + ret = kernel_quotactl(cmd, special, id, addr); } return ret; } diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 43612e2a73af..860bfbe7a07a 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -833,8 +833,8 @@ static struct super_block *quotactl_block(const char __user *special, int cmd) * calls. Maybe we need to add the process quotas etc. in the future, * but we probably should use rlimits for that. */ -SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special, - qid_t, id, void __user *, addr) +int kernel_quotactl(unsigned int cmd, const char __user *special, + qid_t id, void __user *addr) { uint cmds, type; struct super_block *sb = NULL; @@ -885,3 +885,9 @@ out: path_put(pathp); return ret; } + +SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special, + qid_t, id, void __user *, addr) +{ + return kernel_quotactl(cmd, special, id, addr); +} diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 2fb6fb11132e..dc905a4ff8d7 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -27,6 +27,9 @@ static inline bool is_quota_modification(struct inode *inode, struct iattr *ia) (ia->ia_valid & ATTR_GID && !gid_eq(ia->ia_gid, inode->i_gid)); } +int kernel_quotactl(unsigned int cmd, const char __user *special, + qid_t id, void __user *addr); + #if defined(CONFIG_QUOTA) #define quota_error(sb, fmt, args...) \ -- cgit v1.2.3 From ab0d1e85bfd0c25260f02cd3708d5abdfb5b5a9c Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 4 Mar 2018 21:54:05 +0100 Subject: fs/quota: use COMPAT_SYSCALL_DEFINE for sys32_quotactl() While sys32_quotactl() is only needed on x86, it can use the recommended COMPAT_SYSCALL_DEFINEx() machinery for its setup. Acked-by: Jan Kara Cc: Christoph Hellwig Signed-off-by: Dominik Brodowski --- arch/x86/entry/syscalls/syscall_32.tbl | 2 +- fs/quota/compat.c | 5 +++-- include/linux/compat.h | 3 +++ include/linux/syscalls.h | 2 -- kernel/sys_ni.c | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index ef6edaf285cd..c58f75b088c5 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -137,7 +137,7 @@ 128 i386 init_module sys_init_module 129 i386 delete_module sys_delete_module 130 i386 get_kernel_syms -131 i386 quotactl sys_quotactl sys32_quotactl +131 i386 quotactl sys_quotactl compat_sys_quotactl32 132 i386 getpgid sys_getpgid 133 i386 fchdir sys_fchdir 134 i386 bdflush sys_bdflush diff --git a/fs/quota/compat.c b/fs/quota/compat.c index 1577a2fd51f4..c30572857619 100644 --- a/fs/quota/compat.c +++ b/fs/quota/compat.c @@ -41,8 +41,9 @@ struct compat_fs_quota_stat { __u16 qs_iwarnlimit; }; -asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special, - qid_t id, void __user *addr) +COMPAT_SYSCALL_DEFINE4(quotactl32, unsigned int, cmd, + const char __user *, special, qid_t, id, + void __user *, addr) { unsigned int cmds; struct if_dqblk __user *dqblk; diff --git a/include/linux/compat.h b/include/linux/compat.h index 16c3027074a2..f1649a5e6716 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -461,6 +461,9 @@ asmlinkage ssize_t compat_sys_pwritev2(compat_ulong_t fd, const struct compat_iovec __user *vec, compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags); +asmlinkage long compat_sys_quotactl32(unsigned int cmd, + const char __user *special, qid_t id, void __user *addr); + #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 asmlinkage long compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a63e21e7a3af..6ab7ed71a8b6 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -241,8 +241,6 @@ static inline void addr_limit_user_check(void) #endif } -asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special, - qid_t id, void __user *addr); asmlinkage long sys_time(time_t __user *tloc); asmlinkage long sys_stime(time_t __user *tptr); asmlinkage long sys_gettimeofday(struct timeval __user *tv, diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index b5189762d275..951dbda5c2b4 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -18,7 +18,7 @@ asmlinkage long sys_ni_syscall(void) } cond_syscall(sys_quotactl); -cond_syscall(sys32_quotactl); +cond_syscall(compat_sys_quotactl32); cond_syscall(sys_acct); cond_syscall(sys_lookup_dcookie); cond_syscall(compat_sys_lookup_dcookie); -- cgit v1.2.3 From 312db1aa1dc7bff133d95c92efcc5e42b57cefa6 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:39 +0100 Subject: fs: add ksys_mount() helper; remove in-kernel calls to sys_mount() Using this helper allows us to avoid the in-kernel calls to the sys_mount() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_mount(). In the near future, all callers of ksys_mount() should be converted to call do_mount() directly. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- drivers/base/devtmpfs.c | 5 +++-- fs/namespace.c | 10 ++++++++-- include/linux/syscalls.h | 3 +++ init/do_mounts.c | 4 ++-- init/do_mounts_initrd.c | 6 +++--- 5 files changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 50025d7959cb..4afb04686c8e 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -356,7 +356,8 @@ int devtmpfs_mount(const char *mntdir) if (!thread) return 0; - err = sys_mount("devtmpfs", (char *)mntdir, "devtmpfs", MS_SILENT, NULL); + err = ksys_mount("devtmpfs", (char *)mntdir, "devtmpfs", MS_SILENT, + NULL); if (err) printk(KERN_INFO "devtmpfs: error mounting %i\n", err); else @@ -382,7 +383,7 @@ static int devtmpfsd(void *p) *err = sys_unshare(CLONE_NEWNS); if (*err) goto out; - *err = sys_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, options); + *err = ksys_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, options); if (*err) goto out; sys_chdir("/.."); /* will traverse into overmounted root */ diff --git a/fs/namespace.c b/fs/namespace.c index 9d1374ab6e06..642b8b229944 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3032,8 +3032,8 @@ struct dentry *mount_subtree(struct vfsmount *mnt, const char *name) } EXPORT_SYMBOL(mount_subtree); -SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, - char __user *, type, unsigned long, flags, void __user *, data) +int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type, + unsigned long flags, void __user *data) { int ret; char *kernel_type; @@ -3066,6 +3066,12 @@ out_type: return ret; } +SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, + char __user *, type, unsigned long, flags, void __user *, data) +{ + return ksys_mount(dev_name, dir_name, type, flags, data); +} + /* * Return true if path is reachable from root * diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 6ab7ed71a8b6..3a9f9c534624 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -946,4 +946,7 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, * the ksys_xyzyyz() functions prototyped below. */ +int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type, + unsigned long flags, void __user *data); + #endif diff --git a/init/do_mounts.c b/init/do_mounts.c index 7cf4f6dafd5f..eb768de43d84 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -363,7 +363,7 @@ static void __init get_fs_names(char *page) static int __init do_mount_root(char *name, char *fs, int flags, void *data) { struct super_block *s; - int err = sys_mount(name, "/root", fs, flags, data); + int err = ksys_mount(name, "/root", fs, flags, data); if (err) return err; @@ -599,7 +599,7 @@ void __init prepare_namespace(void) mount_root(); out: devtmpfs_mount("dev"); - sys_mount(".", "/", NULL, MS_MOVE, NULL); + ksys_mount(".", "/", NULL, MS_MOVE, NULL); sys_chroot("."); } diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 53d4f0f326e7..7868a6039fb4 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -43,7 +43,7 @@ static int init_linuxrc(struct subprocess_info *info, struct cred *new) sys_dup(0); /* move initrd over / and chdir/chroot in initrd root */ sys_chdir("/root"); - sys_mount(".", "/", NULL, MS_MOVE, NULL); + ksys_mount(".", "/", NULL, MS_MOVE, NULL); sys_chroot("."); sys_setsid(); return 0; @@ -81,7 +81,7 @@ static void __init handle_initrd(void) current->flags &= ~PF_FREEZER_SKIP; /* move initrd to rootfs' /old */ - sys_mount("..", ".", NULL, MS_MOVE, NULL); + ksys_mount("..", ".", NULL, MS_MOVE, NULL); /* switch root and cwd back to / of rootfs */ sys_chroot(".."); @@ -95,7 +95,7 @@ static void __init handle_initrd(void) mount_root(); printk(KERN_NOTICE "Trying to move old root to /initrd ... "); - error = sys_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL); + error = ksys_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL); if (!error) printk("okay\n"); else { -- cgit v1.2.3 From 3a18ef5c1b3935cb05888fc37964321f7bd6231d Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:40 +0100 Subject: fs: add ksys_umount() helper; remove in-kernel call to sys_umount() Using this helper allows us to avoid the in-kernel call to the sys_umount() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as ksys_umount(). In the near future, the only fs-external caller of ksys_umount() should be converted to call do_umount() directly. Then, ksys_umount() can be moved within sys_umount() again. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- fs/namespace.c | 9 +++++++-- include/linux/syscalls.h | 1 + init/do_mounts_initrd.c | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 642b8b229944..e398f32d7541 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1680,7 +1680,7 @@ static inline bool may_mandlock(void) * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD */ -SYSCALL_DEFINE2(umount, char __user *, name, int, flags) +int ksys_umount(char __user *name, int flags) { struct path path; struct mount *mnt; @@ -1720,6 +1720,11 @@ out: return retval; } +SYSCALL_DEFINE2(umount, char __user *, name, int, flags) +{ + return ksys_umount(name, flags); +} + #ifdef __ARCH_WANT_SYS_OLDUMOUNT /* @@ -1727,7 +1732,7 @@ out: */ SYSCALL_DEFINE1(oldumount, char __user *, name) { - return sys_umount(name, 0); + return ksys_umount(name, 0); } #endif diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3a9f9c534624..48964c408c7b 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -948,5 +948,6 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type, unsigned long flags, void __user *data); +int ksys_umount(char __user *name, int flags); #endif diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 7868a6039fb4..1c4da8353332 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -105,7 +105,7 @@ static void __init handle_initrd(void) else printk("failed\n"); printk(KERN_NOTICE "Unmounting old root\n"); - sys_umount("/old", MNT_DETACH); + ksys_umount("/old", MNT_DETACH); printk(KERN_NOTICE "Trying to free ramdisk memory ... "); if (fd < 0) { error = fd; -- cgit v1.2.3 From c7248321a3d42ffba78db0dde88d1c49ca1c045f Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:40 +0100 Subject: fs: add ksys_dup{,3}() helper; remove in-kernel calls to sys_dup{,3}() Using ksys_dup() and ksys_dup3() as helper functions allows us to avoid the in-kernel calls to the sys_dup() and sys_dup3() syscalls. The ksys_ prefix denotes that these functions are meant as a drop-in replacement for the syscalls. In particular, they use the same calling convention as sys_dup{,3}(). In the near future, the fs-external callers of ksys_dup{,3}() should be converted to call do_dup2() directly. Then, ksys_dup{,3}() can be moved within sys_dup{,3}() again. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- fs/file.c | 16 +++++++++++++--- include/linux/syscalls.h | 1 + init/do_mounts_initrd.c | 4 ++-- init/main.c | 4 ++-- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/fs/file.c b/fs/file.c index 42f0db4bd0fb..d304004f0b65 100644 --- a/fs/file.c +++ b/fs/file.c @@ -870,7 +870,7 @@ out_unlock: return err; } -SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) +static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags) { int err = -EBADF; struct file *file; @@ -904,6 +904,11 @@ out_unlock: return err; } +SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) +{ + return ksys_dup3(oldfd, newfd, flags); +} + SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) { if (unlikely(newfd == oldfd)) { /* corner case */ @@ -916,10 +921,10 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) rcu_read_unlock(); return retval; } - return sys_dup3(oldfd, newfd, 0); + return ksys_dup3(oldfd, newfd, 0); } -SYSCALL_DEFINE1(dup, unsigned int, fildes) +int ksys_dup(unsigned int fildes) { int ret = -EBADF; struct file *file = fget_raw(fildes); @@ -934,6 +939,11 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes) return ret; } +SYSCALL_DEFINE1(dup, unsigned int, fildes) +{ + return ksys_dup(fildes); +} + int f_dupfd(unsigned int from, struct file *file, unsigned flags) { int err; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 48964c408c7b..50876ae1d17b 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -949,5 +949,6 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type, unsigned long flags, void __user *data); int ksys_umount(char __user *name, int flags); +int ksys_dup(unsigned int fildes); #endif diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 1c4da8353332..e8573e1776f6 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -39,8 +39,8 @@ static int init_linuxrc(struct subprocess_info *info, struct cred *new) sys_unshare(CLONE_FS | CLONE_FILES); /* stdin/stdout/stderr for /linuxrc */ sys_open("/dev/console", O_RDWR, 0); - sys_dup(0); - sys_dup(0); + ksys_dup(0); + ksys_dup(0); /* move initrd over / and chdir/chroot in initrd root */ sys_chdir("/root"); ksys_mount(".", "/", NULL, MS_MOVE, NULL); diff --git a/init/main.c b/init/main.c index 969eaf140ef0..b8649d1466e1 100644 --- a/init/main.c +++ b/init/main.c @@ -1077,8 +1077,8 @@ static noinline void __init kernel_init_freeable(void) if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) pr_err("Warning: unable to open an initial console.\n"); - (void) sys_dup(0); - (void) sys_dup(0); + (void) ksys_dup(0); + (void) ksys_dup(0); /* * check if there is an early userspace init. If yes, let it do all * the work -- cgit v1.2.3 From a16fe33ab5572e52ef4cb9719d6eb49623b2528a Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:41 +0100 Subject: fs: add ksys_chroot() helper; remove-in kernel calls to sys_chroot() Using this helper allows us to avoid the in-kernel calls to the sys_chroot() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_chroot(). In the near future, the fs-external callers of ksys_chroot() should be converted to use kern_path()/set_fs_root() directly. Then ksys_chroot() can be moved within sys_chroot() again. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- drivers/base/devtmpfs.c | 2 +- fs/open.c | 7 ++++++- include/linux/syscalls.h | 1 + init/do_mounts.c | 2 +- init/do_mounts_initrd.c | 4 ++-- 5 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 4afb04686c8e..5743f04014ca 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -387,7 +387,7 @@ static int devtmpfsd(void *p) if (*err) goto out; sys_chdir("/.."); /* will traverse into overmounted root */ - sys_chroot("."); + ksys_chroot("."); complete(&setup_done); while (1) { spin_lock(&req_lock); diff --git a/fs/open.c b/fs/open.c index 7ea118471dce..7a475e8a2e41 100644 --- a/fs/open.c +++ b/fs/open.c @@ -479,7 +479,7 @@ out: return error; } -SYSCALL_DEFINE1(chroot, const char __user *, filename) +int ksys_chroot(const char __user *filename) { struct path path; int error; @@ -512,6 +512,11 @@ out: return error; } +SYSCALL_DEFINE1(chroot, const char __user *, filename) +{ + return ksys_chroot(filename); +} + static int chmod_common(const struct path *path, umode_t mode) { struct inode *inode = path->dentry->d_inode; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 50876ae1d17b..920a0db1871d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -950,5 +950,6 @@ int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type, unsigned long flags, void __user *data); int ksys_umount(char __user *name, int flags); int ksys_dup(unsigned int fildes); +int ksys_chroot(const char __user *filename); #endif diff --git a/init/do_mounts.c b/init/do_mounts.c index eb768de43d84..2f06f7827b0c 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -600,7 +600,7 @@ void __init prepare_namespace(void) out: devtmpfs_mount("dev"); ksys_mount(".", "/", NULL, MS_MOVE, NULL); - sys_chroot("."); + ksys_chroot("."); } static bool is_tmpfs; diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index e8573e1776f6..71293265ac4b 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -44,7 +44,7 @@ static int init_linuxrc(struct subprocess_info *info, struct cred *new) /* move initrd over / and chdir/chroot in initrd root */ sys_chdir("/root"); ksys_mount(".", "/", NULL, MS_MOVE, NULL); - sys_chroot("."); + ksys_chroot("."); sys_setsid(); return 0; } @@ -83,7 +83,7 @@ static void __init handle_initrd(void) /* move initrd to rootfs' /old */ ksys_mount("..", ".", NULL, MS_MOVE, NULL); /* switch root and cwd back to / of rootfs */ - sys_chroot(".."); + ksys_chroot(".."); if (new_decode_dev(real_root_dev) == Root_RAM0) { sys_chdir("/old"); -- cgit v1.2.3 From e7a3e8b2edf544ec28f689385c3adc2903f46ec0 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:41 +0100 Subject: fs: add ksys_write() helper; remove in-kernel calls to sys_write() Using this helper allows us to avoid the in-kernel calls to the sys_write() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_write(). In the near future, the do_mounts / initramfs callers of ksys_write() should be converted to use filp_open() and vfs_write() instead. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Cc: linux-s390@vger.kernel.org Signed-off-by: Dominik Brodowski --- arch/s390/kernel/compat_linux.c | 2 +- fs/read_write.c | 9 +++++++-- include/linux/syscalls.h | 1 + init/do_mounts_rd.c | 4 ++-- init/initramfs.c | 2 +- 5 files changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 79b7a3438d54..5a9cfde5fc28 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -468,7 +468,7 @@ COMPAT_SYSCALL_DEFINE3(s390_write, unsigned int, fd, const char __user *, buf, c if ((compat_ssize_t) count < 0) return -EINVAL; - return sys_write(fd, buf, count); + return ksys_write(fd, buf, count); } /* diff --git a/fs/read_write.c b/fs/read_write.c index f8547b82dfb3..8e8f0b4f52e2 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -578,8 +578,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) return ret; } -SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, - size_t, count) +ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count) { struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; @@ -595,6 +594,12 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, return ret; } +SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, + size_t, count) +{ + return ksys_write(fd, buf, count); +} + SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf, size_t, count, loff_t, pos) { diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 920a0db1871d..80524faa9664 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -951,5 +951,6 @@ int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type, int ksys_umount(char __user *name, int flags); int ksys_dup(unsigned int fildes); int ksys_chroot(const char __user *filename); +ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); #endif diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index 99e0b649fc0e..2d365c398ccc 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -270,7 +270,7 @@ int __init rd_load_image(char *from) printk("Loading disk #%d... ", disk); } sys_read(in_fd, buf, BLOCK_SIZE); - sys_write(out_fd, buf, BLOCK_SIZE); + ksys_write(out_fd, buf, BLOCK_SIZE); #if !defined(CONFIG_S390) if (!(i % 16)) { pr_cont("%c\b", rotator[rotate & 0x3]); @@ -317,7 +317,7 @@ static long __init compr_fill(void *buf, unsigned long len) static long __init compr_flush(void *window, unsigned long outcnt) { - long written = sys_write(crd_outfd, window, outcnt); + long written = ksys_write(crd_outfd, window, outcnt); if (written != outcnt) { if (decompress_error == 0) printk(KERN_ERR diff --git a/init/initramfs.c b/init/initramfs.c index 7e99a0038942..6f972df15bf2 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -27,7 +27,7 @@ static ssize_t __init xwrite(int fd, const char *p, size_t count) /* sys_write only can write MAX_RW_COUNT aka 2G-4K bytes at most */ while (count) { - ssize_t rv = sys_write(fd, p, count); + ssize_t rv = ksys_write(fd, p, count); if (rv < 0) { if (rv == -EINTR || rv == -EAGAIN) -- cgit v1.2.3 From 447016e9681965fda8dcd9e4fd3c55308a6fd166 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:46 +0100 Subject: fs: add ksys_chdir() helper; remove in-kernel calls to sys_chdir() Using this helper allows us to avoid the in-kernel calls to the sys_chdir() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_chdir(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- drivers/base/devtmpfs.c | 2 +- fs/open.c | 7 ++++++- include/linux/syscalls.h | 1 + init/do_mounts.c | 2 +- init/do_mounts_initrd.c | 8 ++++---- 5 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 5743f04014ca..3f7ee954fd2c 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -386,7 +386,7 @@ static int devtmpfsd(void *p) *err = ksys_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, options); if (*err) goto out; - sys_chdir("/.."); /* will traverse into overmounted root */ + ksys_chdir("/.."); /* will traverse into overmounted root */ ksys_chroot("."); complete(&setup_done); while (1) { diff --git a/fs/open.c b/fs/open.c index 7a475e8a2e41..a19b8277c439 100644 --- a/fs/open.c +++ b/fs/open.c @@ -431,7 +431,7 @@ SYSCALL_DEFINE2(access, const char __user *, filename, int, mode) return sys_faccessat(AT_FDCWD, filename, mode); } -SYSCALL_DEFINE1(chdir, const char __user *, filename) +int ksys_chdir(const char __user *filename) { struct path path; int error; @@ -457,6 +457,11 @@ out: return error; } +SYSCALL_DEFINE1(chdir, const char __user *, filename) +{ + return ksys_chdir(filename); +} + SYSCALL_DEFINE1(fchdir, unsigned int, fd) { struct fd f = fdget_raw(fd); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 80524faa9664..090645b48447 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -952,5 +952,6 @@ int ksys_umount(char __user *name, int flags); int ksys_dup(unsigned int fildes); int ksys_chroot(const char __user *filename); ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); +int ksys_chdir(const char __user *filename); #endif diff --git a/init/do_mounts.c b/init/do_mounts.c index 2f06f7827b0c..89f18985fa90 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -367,7 +367,7 @@ static int __init do_mount_root(char *name, char *fs, int flags, void *data) if (err) return err; - sys_chdir("/root"); + ksys_chdir("/root"); s = current->fs->pwd.dentry->d_sb; ROOT_DEV = s->s_dev; printk(KERN_INFO diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 71293265ac4b..83f396d30b9a 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -42,7 +42,7 @@ static int init_linuxrc(struct subprocess_info *info, struct cred *new) ksys_dup(0); ksys_dup(0); /* move initrd over / and chdir/chroot in initrd root */ - sys_chdir("/root"); + ksys_chdir("/root"); ksys_mount(".", "/", NULL, MS_MOVE, NULL); ksys_chroot("."); sys_setsid(); @@ -61,7 +61,7 @@ static void __init handle_initrd(void) /* mount initrd on rootfs' /root */ mount_block_root("/dev/root.old", root_mountflags & ~MS_RDONLY); sys_mkdir("/old", 0700); - sys_chdir("/old"); + ksys_chdir("/old"); /* try loading default modules from initrd */ load_default_modules(); @@ -86,11 +86,11 @@ static void __init handle_initrd(void) ksys_chroot(".."); if (new_decode_dev(real_root_dev) == Root_RAM0) { - sys_chdir("/old"); + ksys_chdir("/old"); return; } - sys_chdir("/"); + ksys_chdir("/"); ROOT_DEV = new_decode_dev(real_root_dev); mount_root(); -- cgit v1.2.3 From 0f32ab8cfac478be053cb526ced8918ef6f4df47 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:47 +0100 Subject: fs: add ksys_unlink() wrapper; remove in-kernel calls to sys_unlink() Using this wrapper allows us to avoid the in-kernel calls to the sys_unlink() syscall. The ksys_ prefix denotes that this function is meant s a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_unlink(). In the near future, all callers of ksys_unlink() should be converted to call do_unlinkat() directly or, at least, to operate on regular kernel pointers. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 11 +++++++++++ init/do_mounts.h | 2 +- init/do_mounts_initrd.c | 4 ++-- init/do_mounts_rd.c | 2 +- init/initramfs.c | 4 ++-- 5 files changed, 17 insertions(+), 6 deletions(-) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 090645b48447..7cbfb41e666b 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -954,4 +954,15 @@ int ksys_chroot(const char __user *filename); ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); int ksys_chdir(const char __user *filename); +/* + * The following kernel syscall equivalents are just wrappers to fs-internal + * functions. Therefore, provide stubs to be inlined at the callsites. + */ +extern long do_unlinkat(int dfd, struct filename *name); + +static inline long ksys_unlink(const char __user *pathname) +{ + return do_unlinkat(AT_FDCWD, getname(pathname)); +} + #endif diff --git a/init/do_mounts.h b/init/do_mounts.h index 5b05c8f93f47..401f90ee1eeb 100644 --- a/init/do_mounts.h +++ b/init/do_mounts.h @@ -16,7 +16,7 @@ extern int root_mountflags; static inline int create_dev(char *name, dev_t dev) { - sys_unlink(name); + ksys_unlink(name); return sys_mknod(name, S_IFBLK|0600, new_encode_dev(dev)); } diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 83f396d30b9a..e9e9e1c67d31 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -128,11 +128,11 @@ bool __init initrd_load(void) * mounted in the normal path. */ if (rd_load_image("/initrd.image") && ROOT_DEV != Root_RAM0) { - sys_unlink("/initrd.image"); + ksys_unlink("/initrd.image"); handle_initrd(); return true; } } - sys_unlink("/initrd.image"); + ksys_unlink("/initrd.image"); return false; } diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index 2d365c398ccc..5b69056f610a 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -288,7 +288,7 @@ noclose_input: sys_close(out_fd); out: kfree(buf); - sys_unlink("/dev/ram"); + ksys_unlink("/dev/ram"); return res; } diff --git a/init/initramfs.c b/init/initramfs.c index 6f972df15bf2..08eb551168a8 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -319,7 +319,7 @@ static void __init clean_path(char *path, umode_t fmode) if (S_ISDIR(st.mode)) sys_rmdir(path); else - sys_unlink(path); + ksys_unlink(path); } } @@ -591,7 +591,7 @@ static void __init clean_rootfs(void) if (S_ISDIR(st.mode)) sys_rmdir(dirp->d_name); else - sys_unlink(dirp->d_name); + ksys_unlink(dirp->d_name); } num -= dirp->d_reclen; -- cgit v1.2.3 From 6380161ce9d08320d2e09f0fc64b778da433b451 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:48 +0100 Subject: hostfs: rename do_rmdir() to hostfs_do_rmdir() do_rmdir() is used in the VFS layer at fs/namei.c, so use a different name in hostfs. Cc: Jeff Dike Cc: user-mode-linux-devel@lists.sourceforge.net Acked-by: Richard Weinberger Signed-off-by: Dominik Brodowski --- fs/hostfs/hostfs.h | 2 +- fs/hostfs/hostfs_kern.c | 2 +- fs/hostfs/hostfs_user.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index ffaec2e7526c..cb8374af08a6 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h @@ -84,7 +84,7 @@ extern int set_attr(const char *file, struct hostfs_iattr *attrs, int fd); extern int make_symlink(const char *from, const char *to); extern int unlink_file(const char *file); extern int do_mkdir(const char *file, int mode); -extern int do_rmdir(const char *file); +extern int hostfs_do_rmdir(const char *file); extern int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor); extern int link_file(const char *from, const char *to); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index c148e7f4f451..3cd85eb5bbb1 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -706,7 +706,7 @@ static int hostfs_rmdir(struct inode *ino, struct dentry *dentry) if ((file = dentry_name(dentry)) == NULL) return -ENOMEM; - err = do_rmdir(file); + err = hostfs_do_rmdir(file); __putname(file); return err; } diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index 9c1e0f019880..5ecc4706172b 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c @@ -304,7 +304,7 @@ int do_mkdir(const char *file, int mode) return 0; } -int do_rmdir(const char *file) +int hostfs_do_rmdir(const char *file) { int err; -- cgit v1.2.3 From f459dffae1c6026928bbe8e972daecb635b7b5e9 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:48 +0100 Subject: fs: add ksys_rmdir() wrapper; remove in-kernel calls to sys_rmdir() Using this wrapper allows us to avoid the in-kernel calls to the sys_rmdir() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_rmdir(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- fs/internal.h | 1 + fs/namei.c | 2 +- include/linux/syscalls.h | 7 +++++++ init/initramfs.c | 4 ++-- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index df262f41a0ef..0eda35fa1743 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -55,6 +55,7 @@ extern void __init chrdev_init(void); extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); +long do_rmdir(int dfd, const char __user *pathname); long do_unlinkat(int dfd, struct filename *name); /* diff --git a/fs/namei.c b/fs/namei.c index 524e829ffc7d..8545151f74e9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3872,7 +3872,7 @@ out: } EXPORT_SYMBOL(vfs_rmdir); -static long do_rmdir(int dfd, const char __user *pathname) +long do_rmdir(int dfd, const char __user *pathname) { int error = 0; struct filename *name; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 7cbfb41e666b..746043a05884 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -965,4 +965,11 @@ static inline long ksys_unlink(const char __user *pathname) return do_unlinkat(AT_FDCWD, getname(pathname)); } +extern long do_rmdir(int dfd, const char __user *pathname); + +static inline long ksys_rmdir(const char __user *pathname) +{ + return do_rmdir(AT_FDCWD, pathname); +} + #endif diff --git a/init/initramfs.c b/init/initramfs.c index 08eb551168a8..73bbb227f868 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -317,7 +317,7 @@ static void __init clean_path(char *path, umode_t fmode) if (!vfs_lstat(path, &st) && (st.mode ^ fmode) & S_IFMT) { if (S_ISDIR(st.mode)) - sys_rmdir(path); + ksys_rmdir(path); else ksys_unlink(path); } @@ -589,7 +589,7 @@ static void __init clean_rootfs(void) WARN_ON_ONCE(ret); if (!ret) { if (S_ISDIR(st.mode)) - sys_rmdir(dirp->d_name); + ksys_rmdir(dirp->d_name); else ksys_unlink(dirp->d_name); } -- cgit v1.2.3 From 0101db7a301981a008296d522d8c1f456b0fe837 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:49 +0100 Subject: fs: add do_mkdirat() helper and ksys_mkdir() wrapper; remove in-kernel calls to syscall Using the fs-internal do_mkdirat() helper allows us to get rid of fs-internal calls to the sys_mkdirat() syscall. Introducing the ksys_mkdir() wrapper allows us to avoid the in-kernel calls to the sys_mkdir() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_mkdir(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- fs/internal.h | 1 + fs/namei.c | 9 +++++++-- include/linux/syscalls.h | 7 +++++++ init/do_mounts_initrd.c | 2 +- init/initramfs.c | 2 +- init/noinitramfs.c | 4 ++-- 6 files changed, 19 insertions(+), 6 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index 0eda35fa1743..53846bd4d9d7 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -55,6 +55,7 @@ extern void __init chrdev_init(void); extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); +long do_mkdirat(int dfd, const char __user *pathname, umode_t mode); long do_rmdir(int dfd, const char __user *pathname); long do_unlinkat(int dfd, struct filename *name); diff --git a/fs/namei.c b/fs/namei.c index 8545151f74e9..dcf506227509 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3803,7 +3803,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) } EXPORT_SYMBOL(vfs_mkdir); -SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) +long do_mkdirat(int dfd, const char __user *pathname, umode_t mode) { struct dentry *dentry; struct path path; @@ -3828,9 +3828,14 @@ retry: return error; } +SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) +{ + return do_mkdirat(dfd, pathname, mode); +} + SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode) { - return sys_mkdirat(AT_FDCWD, pathname, mode); + return do_mkdirat(AT_FDCWD, pathname, mode); } int vfs_rmdir(struct inode *dir, struct dentry *dentry) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 746043a05884..c982cb5f4e50 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -972,4 +972,11 @@ static inline long ksys_rmdir(const char __user *pathname) return do_rmdir(AT_FDCWD, pathname); } +extern long do_mkdirat(int dfd, const char __user *pathname, umode_t mode); + +static inline long ksys_mkdir(const char __user *pathname, umode_t mode) +{ + return do_mkdirat(AT_FDCWD, pathname, mode); +} + #endif diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index e9e9e1c67d31..99922d1ebfe6 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -60,7 +60,7 @@ static void __init handle_initrd(void) create_dev("/dev/root.old", Root_RAM0); /* mount initrd on rootfs' /root */ mount_block_root("/dev/root.old", root_mountflags & ~MS_RDONLY); - sys_mkdir("/old", 0700); + ksys_mkdir("/old", 0700); ksys_chdir("/old"); /* try loading default modules from initrd */ diff --git a/init/initramfs.c b/init/initramfs.c index 73bbb227f868..ca538a5f9fa9 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -352,7 +352,7 @@ static int __init do_name(void) } } } else if (S_ISDIR(mode)) { - sys_mkdir(collected, mode); + ksys_mkdir(collected, mode); sys_chown(collected, uid, gid); sys_chmod(collected, mode); dir_add(collected, mtime); diff --git a/init/noinitramfs.c b/init/noinitramfs.c index 267739d85179..a08a9d937e60 100644 --- a/init/noinitramfs.c +++ b/init/noinitramfs.c @@ -29,7 +29,7 @@ static int __init default_rootfs(void) { int err; - err = sys_mkdir((const char __user __force *) "/dev", 0755); + err = ksys_mkdir((const char __user __force *) "/dev", 0755); if (err < 0) goto out; @@ -39,7 +39,7 @@ static int __init default_rootfs(void) if (err < 0) goto out; - err = sys_mkdir((const char __user __force *) "/root", 0700); + err = ksys_mkdir((const char __user __force *) "/root", 0700); if (err < 0) goto out; -- cgit v1.2.3 From b724e846b491ef8db943be8086226c9d8da31877 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:49 +0100 Subject: fs: add do_symlinkat() helper and ksys_symlink() wrapper; remove in-kernel calls to syscall Using the fs-internal do_symlinkat() helper allows us to get rid of fs-internal calls to the sys_symlinkat() syscall. Introducing the ksys_symlink() wrapper allows us to avoid the in-kernel calls to the sys_symlink() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_symlink(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- fs/internal.h | 2 ++ fs/namei.c | 12 +++++++++--- include/linux/syscalls.h | 9 +++++++++ init/initramfs.c | 2 +- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index 53846bd4d9d7..a3f04ca2a08b 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -58,6 +58,8 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *, long do_mkdirat(int dfd, const char __user *pathname, umode_t mode); long do_rmdir(int dfd, const char __user *pathname); long do_unlinkat(int dfd, struct filename *name); +long do_symlinkat(const char __user *oldname, int newdfd, + const char __user *newname); /* * namespace.c diff --git a/fs/namei.c b/fs/namei.c index dcf506227509..e15da92209d5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4113,8 +4113,8 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) } EXPORT_SYMBOL(vfs_symlink); -SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, - int, newdfd, const char __user *, newname) +long do_symlinkat(const char __user *oldname, int newdfd, + const char __user *newname) { int error; struct filename *from; @@ -4144,9 +4144,15 @@ out_putname: return error; } +SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, + int, newdfd, const char __user *, newname) +{ + return do_symlinkat(oldname, newdfd, newname); +} + SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newname) { - return sys_symlinkat(oldname, AT_FDCWD, newname); + return do_symlinkat(oldname, AT_FDCWD, newname); } /** diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index c982cb5f4e50..39c5cef86a10 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -979,4 +979,13 @@ static inline long ksys_mkdir(const char __user *pathname, umode_t mode) return do_mkdirat(AT_FDCWD, pathname, mode); } +extern long do_symlinkat(const char __user *oldname, int newdfd, + const char __user *newname); + +static inline long ksys_symlink(const char __user *oldname, + const char __user *newname) +{ + return do_symlinkat(oldname, AT_FDCWD, newname); +} + #endif diff --git a/init/initramfs.c b/init/initramfs.c index ca538a5f9fa9..cd9571a113b6 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -392,7 +392,7 @@ static int __init do_symlink(void) { collected[N_ALIGN(name_len) + body_len] = '\0'; clean_path(collected, 0); - sys_symlink(collected + N_ALIGN(name_len), collected); + ksys_symlink(collected + N_ALIGN(name_len), collected); sys_lchown(collected, uid, gid); do_utime(collected, mtime); state = SkipIt; -- cgit v1.2.3 From 87c4e19262d81862886207be3c8795f6576d5a52 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:50 +0100 Subject: fs: add do_mknodat() helper and ksys_mknod() wrapper; remove in-kernel calls to syscall Using the fs-internal do_mknodat() helper allows us to get rid of fs-internal calls to the sys_mknodat() syscall. Introducing the ksys_mknod() wrapper allows us to avoid the in-kernel calls to sys_mknod() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_mknod(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- fs/internal.h | 2 ++ fs/namei.c | 12 +++++++++--- include/linux/syscalls.h | 9 +++++++++ init/do_mounts.h | 2 +- init/initramfs.c | 2 +- init/noinitramfs.c | 2 +- 6 files changed, 23 insertions(+), 6 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index a3f04ca2a08b..4f0b67054c54 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -55,6 +55,8 @@ extern void __init chrdev_init(void); extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); +long do_mknodat(int dfd, const char __user *filename, umode_t mode, + unsigned int dev); long do_mkdirat(int dfd, const char __user *pathname, umode_t mode); long do_rmdir(int dfd, const char __user *pathname); long do_unlinkat(int dfd, struct filename *name); diff --git a/fs/namei.c b/fs/namei.c index e15da92209d5..8459a18cdd18 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3728,8 +3728,8 @@ static int may_mknod(umode_t mode) } } -SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, - unsigned, dev) +long do_mknodat(int dfd, const char __user *filename, umode_t mode, + unsigned int dev) { struct dentry *dentry; struct path path; @@ -3772,9 +3772,15 @@ out: return error; } +SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, + unsigned int, dev) +{ + return do_mknodat(dfd, filename, mode, dev); +} + SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev) { - return sys_mknodat(AT_FDCWD, filename, mode, dev); + return do_mknodat(AT_FDCWD, filename, mode, dev); } int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 39c5cef86a10..0b4fd684f0f1 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -988,4 +988,13 @@ static inline long ksys_symlink(const char __user *oldname, return do_symlinkat(oldname, AT_FDCWD, newname); } +extern long do_mknodat(int dfd, const char __user *filename, umode_t mode, + unsigned int dev); + +static inline long ksys_mknod(const char __user *filename, umode_t mode, + unsigned int dev) +{ + return do_mknodat(AT_FDCWD, filename, mode, dev); +} + #endif diff --git a/init/do_mounts.h b/init/do_mounts.h index 401f90ee1eeb..0bb0806de4ce 100644 --- a/init/do_mounts.h +++ b/init/do_mounts.h @@ -17,7 +17,7 @@ extern int root_mountflags; static inline int create_dev(char *name, dev_t dev) { ksys_unlink(name); - return sys_mknod(name, S_IFBLK|0600, new_encode_dev(dev)); + return ksys_mknod(name, S_IFBLK|0600, new_encode_dev(dev)); } static inline u32 bstat(char *name) diff --git a/init/initramfs.c b/init/initramfs.c index cd9571a113b6..2972ed0ab399 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -359,7 +359,7 @@ static int __init do_name(void) } else if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) { if (maybe_link() == 0) { - sys_mknod(collected, mode, rdev); + ksys_mknod(collected, mode, rdev); sys_chown(collected, uid, gid); sys_chmod(collected, mode); do_utime(collected, mtime); diff --git a/init/noinitramfs.c b/init/noinitramfs.c index a08a9d937e60..f4bad8436c93 100644 --- a/init/noinitramfs.c +++ b/init/noinitramfs.c @@ -33,7 +33,7 @@ static int __init default_rootfs(void) if (err < 0) goto out; - err = sys_mknod((const char __user __force *) "/dev/console", + err = ksys_mknod((const char __user __force *) "/dev/console", S_IFCHR | S_IRUSR | S_IWUSR, new_encode_dev(MKDEV(5, 1))); if (err < 0) -- cgit v1.2.3 From 46ea89eb652a365e10257016d09dcf1aaf23cf63 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:53 +0100 Subject: fs: add do_linkat() helper and ksys_link() wrapper; remove in-kernel calls to syscall Using the fs-internal do_linkat() helper allows us to get rid of fs-internal calls to the sys_linkat() syscall. Introducing the ksys_link() wrapper allows us to avoid the in-kernel calls to sys_link() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_link(). In the near future, the only fs-external user of ksys_link() should be converted to use vfs_link() instead. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- fs/internal.h | 2 ++ fs/namei.c | 12 +++++++++--- include/linux/syscalls.h | 9 +++++++++ init/initramfs.c | 2 +- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index 4f0b67054c54..91e6fc93fcb5 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -62,6 +62,8 @@ long do_rmdir(int dfd, const char __user *pathname); long do_unlinkat(int dfd, struct filename *name); long do_symlinkat(const char __user *oldname, int newdfd, const char __user *newname); +int do_linkat(int olddfd, const char __user *oldname, int newdfd, + const char __user *newname, int flags); /* * namespace.c diff --git a/fs/namei.c b/fs/namei.c index 8459a18cdd18..10148235829f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4250,8 +4250,8 @@ EXPORT_SYMBOL(vfs_link); * with linux 2.0, and to avoid hard-linking to directories * and other special files. --ADM */ -SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, - int, newdfd, const char __user *, newname, int, flags) +int do_linkat(int olddfd, const char __user *oldname, int newdfd, + const char __user *newname, int flags) { struct dentry *new_dentry; struct path old_path, new_path; @@ -4315,9 +4315,15 @@ out: return error; } +SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, + int, newdfd, const char __user *, newname, int, flags) +{ + return do_linkat(olddfd, oldname, newdfd, newname, flags); +} + SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname) { - return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); + return do_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); } /** diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0b4fd684f0f1..827ed917630c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -997,4 +997,13 @@ static inline long ksys_mknod(const char __user *filename, umode_t mode, return do_mknodat(AT_FDCWD, filename, mode, dev); } +extern int do_linkat(int olddfd, const char __user *oldname, int newdfd, + const char __user *newname, int flags); + +static inline long ksys_link(const char __user *oldname, + const char __user *newname) +{ + return do_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); +} + #endif diff --git a/init/initramfs.c b/init/initramfs.c index 2972ed0ab399..5855ab632b4e 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -306,7 +306,7 @@ static int __init maybe_link(void) if (nlink >= 2) { char *old = find_link(major, minor, ino, mode, collected); if (old) - return (sys_link(old, collected) < 0) ? -1 : 1; + return (ksys_link(old, collected) < 0) ? -1 : 1; } return 0; } -- cgit v1.2.3 From 03450e271a160bc07a2c48e5769e0ba338582d77 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:53 +0100 Subject: fs: add ksys_fchmod() and do_fchmodat() helpers and ksys_chmod() wrapper; remove in-kernel calls to syscall Using the fs-internal do_fchmodat() helper allows us to get rid of fs-internal calls to the sys_fchmodat() syscall. Introducing the ksys_fchmod() helper and the ksys_chmod() wrapper allows us to avoid the in-kernel calls to the sys_fchmod() and sys_chmod() syscalls. The ksys_ prefix denotes that these functions are meant as a drop-in replacement for the syscalls. In particular, they use the same calling convention as sys_fchmod() and sys_chmod(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- fs/internal.h | 2 ++ fs/open.c | 17 ++++++++++++++--- include/linux/syscalls.h | 8 ++++++++ init/initramfs.c | 6 +++--- 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index 91e6fc93fcb5..2474bf460f96 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -119,6 +119,8 @@ extern struct file *do_filp_open(int dfd, struct filename *pathname, extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, const char *, const struct open_flags *); +int do_fchmodat(int dfd, const char __user *filename, umode_t mode); + extern int open_check_o_direct(struct file *f); extern int vfs_open(const struct path *, struct file *, const struct cred *); extern struct file *filp_clone_open(struct file *); diff --git a/fs/open.c b/fs/open.c index a19b8277c439..6037f2bf418c 100644 --- a/fs/open.c +++ b/fs/open.c @@ -551,7 +551,7 @@ out_unlock: return error; } -SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode) +int ksys_fchmod(unsigned int fd, umode_t mode) { struct fd f = fdget(fd); int err = -EBADF; @@ -564,7 +564,12 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode) return err; } -SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode) +SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode) +{ + return ksys_fchmod(fd, mode); +} + +int do_fchmodat(int dfd, const char __user *filename, umode_t mode) { struct path path; int error; @@ -582,9 +587,15 @@ retry: return error; } +SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, + umode_t, mode) +{ + return do_fchmodat(dfd, filename, mode); +} + SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode) { - return sys_fchmodat(AT_FDCWD, filename, mode); + return do_fchmodat(AT_FDCWD, filename, mode); } static int chown_common(const struct path *path, uid_t user, gid_t group) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 827ed917630c..dd6c306f4f00 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -953,6 +953,7 @@ int ksys_dup(unsigned int fildes); int ksys_chroot(const char __user *filename); ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); int ksys_chdir(const char __user *filename); +int ksys_fchmod(unsigned int fd, umode_t mode); /* * The following kernel syscall equivalents are just wrappers to fs-internal @@ -1006,4 +1007,11 @@ static inline long ksys_link(const char __user *oldname, return do_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); } +extern int do_fchmodat(int dfd, const char __user *filename, umode_t mode); + +static inline int ksys_chmod(const char __user *filename, umode_t mode) +{ + return do_fchmodat(AT_FDCWD, filename, mode); +} + #endif diff --git a/init/initramfs.c b/init/initramfs.c index 5855ab632b4e..16c3c23076e2 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -344,7 +344,7 @@ static int __init do_name(void) if (wfd >= 0) { sys_fchown(wfd, uid, gid); - sys_fchmod(wfd, mode); + ksys_fchmod(wfd, mode); if (body_len) sys_ftruncate(wfd, body_len); vcollected = kstrdup(collected, GFP_KERNEL); @@ -354,14 +354,14 @@ static int __init do_name(void) } else if (S_ISDIR(mode)) { ksys_mkdir(collected, mode); sys_chown(collected, uid, gid); - sys_chmod(collected, mode); + ksys_chmod(collected, mode); dir_add(collected, mtime); } else if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) { if (maybe_link() == 0) { ksys_mknod(collected, mode, rdev); sys_chown(collected, uid, gid); - sys_chmod(collected, mode); + ksys_chmod(collected, mode); do_utime(collected, mtime); } } -- cgit v1.2.3 From cbfe20f565228966f0249f016752437df95df679 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:54 +0100 Subject: fs: add do_faccessat() helper and ksys_access() wrapper; remove in-kernel calls to syscall Using the fs-internal do_faccessat() helper allows us to get rid of fs-internal calls to the sys_faccessat() syscall. Introducing the ksys_access() wrapper allows us to avoid the in-kernel calls to the sys_access() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_access(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- fs/internal.h | 1 + fs/open.c | 9 +++++++-- include/linux/syscalls.h | 7 +++++++ init/main.c | 3 ++- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index 2474bf460f96..26f4f05b52ef 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -119,6 +119,7 @@ extern struct file *do_filp_open(int dfd, struct filename *pathname, extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, const char *, const struct open_flags *); +long do_faccessat(int dfd, const char __user *filename, int mode); int do_fchmodat(int dfd, const char __user *filename, umode_t mode); extern int open_check_o_direct(struct file *f); diff --git a/fs/open.c b/fs/open.c index 6037f2bf418c..0fc8188be31a 100644 --- a/fs/open.c +++ b/fs/open.c @@ -350,7 +350,7 @@ SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len) * We do this by temporarily clearing all FS-related capabilities and * switching the fsuid/fsgid around to the real ones. */ -SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) +long do_faccessat(int dfd, const char __user *filename, int mode) { const struct cred *old_cred; struct cred *override_cred; @@ -426,9 +426,14 @@ out: return res; } +SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) +{ + return do_faccessat(dfd, filename, mode); +} + SYSCALL_DEFINE2(access, const char __user *, filename, int, mode) { - return sys_faccessat(AT_FDCWD, filename, mode); + return do_faccessat(AT_FDCWD, filename, mode); } int ksys_chdir(const char __user *filename) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index dd6c306f4f00..33f06de090ea 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1014,4 +1014,11 @@ static inline int ksys_chmod(const char __user *filename, umode_t mode) return do_fchmodat(AT_FDCWD, filename, mode); } +extern long do_faccessat(int dfd, const char __user *filename, int mode); + +static inline long ksys_access(const char __user *filename, int mode) +{ + return do_faccessat(AT_FDCWD, filename, mode); +} + #endif diff --git a/init/main.c b/init/main.c index b8649d1466e1..d0ded4322c6b 100644 --- a/init/main.c +++ b/init/main.c @@ -1087,7 +1087,8 @@ static noinline void __init kernel_init_freeable(void) if (!ramdisk_execute_command) ramdisk_execute_command = "/init"; - if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) { + if (ksys_access((const char __user *) + ramdisk_execute_command, 0) != 0) { ramdisk_execute_command = NULL; prepare_namespace(); } -- cgit v1.2.3 From 55731b3cda3a85ee888dac3bf1f36489f275c187 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:55 +0100 Subject: fs: add do_fchownat(), ksys_fchown() helpers and ksys_{,l}chown() wrappers Using the fs-interal do_fchownat() wrapper allows us to get rid of fs-internal calls to the sys_fchownat() syscall. Introducing the ksys_fchown() helper and the ksys_{,}chown() wrappers allows us to avoid the in-kernel calls to the sys_{,l,f}chown() syscalls. The ksys_ prefix denotes that these functions are meant as a drop-in replacement for the syscalls. In particular, they use the same calling convention as sys_{,l,f}chown(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- arch/s390/kernel/compat_linux.c | 6 +++--- fs/internal.h | 2 ++ fs/open.c | 23 +++++++++++++++++------ include/linux/syscalls.h | 17 +++++++++++++++++ init/initramfs.c | 8 ++++---- kernel/uid16.c | 6 +++--- 6 files changed, 46 insertions(+), 16 deletions(-) diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 5a9cfde5fc28..9a9bb395359c 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -89,18 +89,18 @@ COMPAT_SYSCALL_DEFINE3(s390_chown16, const char __user *, filename, u16, user, u16, group) { - return sys_chown(filename, low2highuid(user), low2highgid(group)); + return ksys_chown(filename, low2highuid(user), low2highgid(group)); } COMPAT_SYSCALL_DEFINE3(s390_lchown16, const char __user *, filename, u16, user, u16, group) { - return sys_lchown(filename, low2highuid(user), low2highgid(group)); + return ksys_lchown(filename, low2highuid(user), low2highgid(group)); } COMPAT_SYSCALL_DEFINE3(s390_fchown16, unsigned int, fd, u16, user, u16, group) { - return sys_fchown(fd, low2highuid(user), low2highgid(group)); + return ksys_fchown(fd, low2highuid(user), low2highgid(group)); } COMPAT_SYSCALL_DEFINE2(s390_setregid16, u16, rgid, u16, egid) diff --git a/fs/internal.h b/fs/internal.h index 26f4f05b52ef..c797480cbd6f 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -121,6 +121,8 @@ extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, long do_faccessat(int dfd, const char __user *filename, int mode); int do_fchmodat(int dfd, const char __user *filename, umode_t mode); +int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, + int flag); extern int open_check_o_direct(struct file *f); extern int vfs_open(const struct path *, struct file *, const struct cred *); diff --git a/fs/open.c b/fs/open.c index 0fc8188be31a..7b2eccb541f2 100644 --- a/fs/open.c +++ b/fs/open.c @@ -645,8 +645,8 @@ retry_deleg: return error; } -SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, - gid_t, group, int, flag) +int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, + int flag) { struct path path; int error = -EINVAL; @@ -677,18 +677,24 @@ out: return error; } +SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, + gid_t, group, int, flag) +{ + return do_fchownat(dfd, filename, user, group, flag); +} + SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group) { - return sys_fchownat(AT_FDCWD, filename, user, group, 0); + return do_fchownat(AT_FDCWD, filename, user, group, 0); } SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group) { - return sys_fchownat(AT_FDCWD, filename, user, group, - AT_SYMLINK_NOFOLLOW); + return do_fchownat(AT_FDCWD, filename, user, group, + AT_SYMLINK_NOFOLLOW); } -SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) +int ksys_fchown(unsigned int fd, uid_t user, gid_t group) { struct fd f = fdget(fd); int error = -EBADF; @@ -708,6 +714,11 @@ out: return error; } +SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) +{ + return ksys_fchown(fd, user, group); +} + int open_check_o_direct(struct file *f) { /* NB: we're sure to have correct a_ops only after f_op->open */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 33f06de090ea..df0d1e818a6e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -954,6 +954,7 @@ int ksys_chroot(const char __user *filename); ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); int ksys_chdir(const char __user *filename); int ksys_fchmod(unsigned int fd, umode_t mode); +int ksys_fchown(unsigned int fd, uid_t user, gid_t group); /* * The following kernel syscall equivalents are just wrappers to fs-internal @@ -1021,4 +1022,20 @@ static inline long ksys_access(const char __user *filename, int mode) return do_faccessat(AT_FDCWD, filename, mode); } +extern int do_fchownat(int dfd, const char __user *filename, uid_t user, + gid_t group, int flag); + +static inline long ksys_chown(const char __user *filename, uid_t user, + gid_t group) +{ + return do_fchownat(AT_FDCWD, filename, user, group, 0); +} + +static inline long ksys_lchown(const char __user *filename, uid_t user, + gid_t group) +{ + return do_fchownat(AT_FDCWD, filename, user, group, + AT_SYMLINK_NOFOLLOW); +} + #endif diff --git a/init/initramfs.c b/init/initramfs.c index 16c3c23076e2..35173bef7c00 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -343,7 +343,7 @@ static int __init do_name(void) wfd = sys_open(collected, openflags, mode); if (wfd >= 0) { - sys_fchown(wfd, uid, gid); + ksys_fchown(wfd, uid, gid); ksys_fchmod(wfd, mode); if (body_len) sys_ftruncate(wfd, body_len); @@ -353,14 +353,14 @@ static int __init do_name(void) } } else if (S_ISDIR(mode)) { ksys_mkdir(collected, mode); - sys_chown(collected, uid, gid); + ksys_chown(collected, uid, gid); ksys_chmod(collected, mode); dir_add(collected, mtime); } else if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) { if (maybe_link() == 0) { ksys_mknod(collected, mode, rdev); - sys_chown(collected, uid, gid); + ksys_chown(collected, uid, gid); ksys_chmod(collected, mode); do_utime(collected, mtime); } @@ -393,7 +393,7 @@ static int __init do_symlink(void) collected[N_ALIGN(name_len) + body_len] = '\0'; clean_path(collected, 0); ksys_symlink(collected + N_ALIGN(name_len), collected); - sys_lchown(collected, uid, gid); + ksys_lchown(collected, uid, gid); do_utime(collected, mtime); state = SkipIt; next_state = Reset; diff --git a/kernel/uid16.c b/kernel/uid16.c index 7b930edfe461..af6925d8599b 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -22,17 +22,17 @@ SYSCALL_DEFINE3(chown16, const char __user *, filename, old_uid_t, user, old_gid_t, group) { - return sys_chown(filename, low2highuid(user), low2highgid(group)); + return ksys_chown(filename, low2highuid(user), low2highgid(group)); } SYSCALL_DEFINE3(lchown16, const char __user *, filename, old_uid_t, user, old_gid_t, group) { - return sys_lchown(filename, low2highuid(user), low2highgid(group)); + return ksys_lchown(filename, low2highuid(user), low2highgid(group)); } SYSCALL_DEFINE3(fchown16, unsigned int, fd, old_uid_t, user, old_gid_t, group) { - return sys_fchown(fd, low2highuid(user), low2highgid(group)); + return ksys_fchown(fd, low2highuid(user), low2highgid(group)); } SYSCALL_DEFINE2(setregid16, old_gid_t, rgid, old_gid_t, egid) -- cgit v1.2.3 From 411d9475cf901b5a6d2996b46cb5726184a4fa50 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:54 +0100 Subject: fs: add ksys_ftruncate() wrapper; remove in-kernel calls to sys_ftruncate() Using the ksys_ftruncate() wrapper allows us to get rid of in-kernel calls to the sys_ftruncate() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_ftruncate(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- arch/mips/kernel/linux32.c | 2 +- arch/parisc/kernel/sys_parisc.c | 4 ++-- arch/powerpc/kernel/sys_ppc32.c | 2 +- arch/s390/kernel/compat_linux.c | 2 +- arch/sparc/kernel/sys_sparc32.c | 2 +- arch/x86/ia32/sys_ia32.c | 2 +- fs/internal.h | 1 + fs/open.c | 2 +- include/linux/syscalls.h | 7 +++++++ init/initramfs.c | 2 +- 10 files changed, 17 insertions(+), 9 deletions(-) diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index b332f6fc1e72..3c90449742a0 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -88,7 +88,7 @@ SYSCALL_DEFINE4(32_truncate64, const char __user *, path, SYSCALL_DEFINE4(32_ftruncate64, unsigned long, fd, unsigned long, __dummy, unsigned long, a2, unsigned long, a3) { - return sys_ftruncate(fd, merge_64(a2, a3)); + return ksys_ftruncate(fd, merge_64(a2, a3)); } SYSCALL_DEFINE5(32_llseek, unsigned int, fd, unsigned int, offset_high, diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 378a754ca186..6d2a64859c22 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -298,7 +298,7 @@ asmlinkage long parisc_truncate64(const char __user * path, asmlinkage long parisc_ftruncate64(unsigned int fd, unsigned int high, unsigned int low) { - return sys_ftruncate(fd, (long)high << 32 | low); + return ksys_ftruncate(fd, (long)high << 32 | low); } /* stubs for the benefit of the syscall_table since truncate64 and truncate @@ -309,7 +309,7 @@ asmlinkage long sys_truncate64(const char __user * path, unsigned long length) } asmlinkage long sys_ftruncate64(unsigned int fd, unsigned long length) { - return sys_ftruncate(fd, length); + return ksys_ftruncate(fd, length); } asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg) { diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 15f216d022e2..e0c9b7f1bf38 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -107,7 +107,7 @@ asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo, asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long high, unsigned long low) { - return sys_ftruncate(fd, (high << 32) | low); + return ksys_ftruncate(fd, (high << 32) | low); } long ppc32_fadvise64(int fd, u32 unused, u32 offset_high, u32 offset_low, diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 9a9bb395359c..9c5e975f71a6 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -307,7 +307,7 @@ COMPAT_SYSCALL_DEFINE3(s390_truncate64, const char __user *, path, u32, high, u3 COMPAT_SYSCALL_DEFINE3(s390_ftruncate64, unsigned int, fd, u32, high, u32, low) { - return sys_ftruncate(fd, (unsigned long)high << 32 | low); + return ksys_ftruncate(fd, (unsigned long)high << 32 | low); } COMPAT_SYSCALL_DEFINE5(s390_pread64, unsigned int, fd, char __user *, ubuf, diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index 6d964bdefbaa..d64b425fff93 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -65,7 +65,7 @@ asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned if ((int)high < 0) return -EINVAL; else - return sys_ftruncate(fd, (high << 32) | low); + return ksys_ftruncate(fd, (high << 32) | low); } static int cp_compat_stat64(struct kstat *stat, diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 23a5260eae67..1979e5b4ad9a 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -61,7 +61,7 @@ COMPAT_SYSCALL_DEFINE3(x86_truncate64, const char __user *, filename, COMPAT_SYSCALL_DEFINE3(x86_ftruncate64, unsigned int, fd, unsigned long, offset_low, unsigned long, offset_high) { - return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low); + return ksys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low); } /* diff --git a/fs/internal.h b/fs/internal.h index c797480cbd6f..980d005b21b4 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -119,6 +119,7 @@ extern struct file *do_filp_open(int dfd, struct filename *pathname, extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, const char *, const struct open_flags *); +long do_sys_ftruncate(unsigned int fd, loff_t length, int small); long do_faccessat(int dfd, const char __user *filename, int mode); int do_fchmodat(int dfd, const char __user *filename, umode_t mode); int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, diff --git a/fs/open.c b/fs/open.c index 7b2eccb541f2..b3f3b2cd9f19 100644 --- a/fs/open.c +++ b/fs/open.c @@ -162,7 +162,7 @@ COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length } #endif -static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) +long do_sys_ftruncate(unsigned int fd, loff_t length, int small) { struct inode *inode; struct dentry *dentry; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index df0d1e818a6e..41023177c8ec 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1038,4 +1038,11 @@ static inline long ksys_lchown(const char __user *filename, uid_t user, AT_SYMLINK_NOFOLLOW); } +extern long do_sys_ftruncate(unsigned int fd, loff_t length, int small); + +static inline long ksys_ftruncate(unsigned int fd, unsigned long length) +{ + return do_sys_ftruncate(fd, length, 1); +} + #endif diff --git a/init/initramfs.c b/init/initramfs.c index 35173bef7c00..0d3b001b0dc5 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -346,7 +346,7 @@ static int __init do_name(void) ksys_fchown(wfd, uid, gid); ksys_fchmod(wfd, mode); if (body_len) - sys_ftruncate(wfd, body_len); + ksys_ftruncate(wfd, body_len); vcollected = kstrdup(collected, GFP_KERNEL); state = CopyFile; } -- cgit v1.2.3 From 2ca2a09d6215fd9621aa3e2db7cc9428a61f2911 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:55 +0100 Subject: fs: add ksys_close() wrapper; remove in-kernel calls to sys_close() Using the ksys_close() wrapper allows us to get rid of in-kernel calls to the sys_close() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_close(), with one subtle difference: The few places which checked the return value did not care about the return value re-writing in sys_close(), so simply use a wrapper around __close_fd(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- fs/autofs4/dev-ioctl.c | 2 +- fs/binfmt_misc.c | 2 +- fs/file.c | 1 + fs/open.c | 1 - include/linux/syscalls.h | 12 ++++++++++++ init/do_mounts.c | 4 ++-- init/do_mounts_initrd.c | 2 +- init/do_mounts_md.c | 8 ++++---- init/do_mounts_rd.c | 6 +++--- init/initramfs.c | 8 ++++---- 10 files changed, 29 insertions(+), 17 deletions(-) diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index b7c816f39404..26f6b4f41ce6 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -310,7 +310,7 @@ static int autofs_dev_ioctl_closemount(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { - return sys_close(param->ioctlfd); + return ksys_close(param->ioctlfd); } /* diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index a7c5a9861bef..a41b48f82a70 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -241,7 +241,7 @@ ret: return retval; error: if (fd_binary > 0) - sys_close(fd_binary); + ksys_close(fd_binary); bprm->interp_flags = 0; bprm->interp_data = 0; goto ret; diff --git a/fs/file.c b/fs/file.c index d304004f0b65..7ffd6e9d103d 100644 --- a/fs/file.c +++ b/fs/file.c @@ -638,6 +638,7 @@ out_unlock: spin_unlock(&files->file_lock); return -EBADF; } +EXPORT_SYMBOL(__close_fd); /* for ksys_close() */ void do_close_on_exec(struct files_struct *files) { diff --git a/fs/open.c b/fs/open.c index b3f3b2cd9f19..710102fc262b 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1200,7 +1200,6 @@ SYSCALL_DEFINE1(close, unsigned int, fd) return retval; } -EXPORT_SYMBOL(sys_close); /* * This routine simulates a hangup on the tty, to arrange that users diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 41023177c8ec..38805f3447ea 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1045,4 +1045,16 @@ static inline long ksys_ftruncate(unsigned int fd, unsigned long length) return do_sys_ftruncate(fd, length, 1); } +extern int __close_fd(struct files_struct *files, unsigned int fd); + +/* + * In contrast to sys_close(), this stub does not check whether the syscall + * should or should not be restarted, but returns the raw error codes from + * __close_fd(). + */ +static inline int ksys_close(unsigned int fd) +{ + return __close_fd(current->files, fd); +} + #endif diff --git a/init/do_mounts.c b/init/do_mounts.c index 89f18985fa90..a28dd42d1f84 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -492,7 +492,7 @@ void __init change_floppy(char *fmt, ...) fd = sys_open("/dev/root", O_RDWR | O_NDELAY, 0); if (fd >= 0) { sys_ioctl(fd, FDEJECT, 0); - sys_close(fd); + ksys_close(fd); } printk(KERN_NOTICE "VFS: Insert %s and press ENTER\n", buf); fd = sys_open("/dev/console", O_RDWR, 0); @@ -503,7 +503,7 @@ void __init change_floppy(char *fmt, ...) sys_read(fd, &c, 1); termios.c_lflag |= ICANON; sys_ioctl(fd, TCSETSF, (long)&termios); - sys_close(fd); + ksys_close(fd); } } #endif diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 99922d1ebfe6..6907c6dbc443 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -111,7 +111,7 @@ static void __init handle_initrd(void) error = fd; } else { error = sys_ioctl(fd, BLKFLSBUF, 0); - sys_close(fd); + ksys_close(fd); } printk(!error ? "okay\n" : "failed\n"); } diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c index 3f733c760a8c..ebd4013d589e 100644 --- a/init/do_mounts_md.c +++ b/init/do_mounts_md.c @@ -191,7 +191,7 @@ static void __init md_setup_drive(void) printk(KERN_WARNING "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n", minor); - sys_close(fd); + ksys_close(fd); continue; } @@ -243,11 +243,11 @@ static void __init md_setup_drive(void) * boot a kernel with devfs compiled in from partitioned md * array without it */ - sys_close(fd); + ksys_close(fd); fd = sys_open(name, 0, 0); sys_ioctl(fd, BLKRRPART, 0); } - sys_close(fd); + ksys_close(fd); } } @@ -297,7 +297,7 @@ static void __init autodetect_raid(void) fd = sys_open("/dev/md0", 0, 0); if (fd >= 0) { sys_ioctl(fd, RAID_AUTORUN, raid_autopart); - sys_close(fd); + ksys_close(fd); } } diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index 5b69056f610a..f1aa341862d3 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -257,7 +257,7 @@ int __init rd_load_image(char *from) if (i && (i % devblocks == 0)) { printk("done disk #%d.\n", disk++); rotate = 0; - if (sys_close(in_fd)) { + if (ksys_close(in_fd)) { printk("Error closing the disk.\n"); goto noclose_input; } @@ -283,9 +283,9 @@ int __init rd_load_image(char *from) successful_load: res = 1; done: - sys_close(in_fd); + ksys_close(in_fd); noclose_input: - sys_close(out_fd); + ksys_close(out_fd); out: kfree(buf); ksys_unlink("/dev/ram"); diff --git a/init/initramfs.c b/init/initramfs.c index 0d3b001b0dc5..ce2bcad97cdf 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -373,7 +373,7 @@ static int __init do_copy(void) if (byte_count >= body_len) { if (xwrite(wfd, victim, body_len) != body_len) error("write error"); - sys_close(wfd); + ksys_close(wfd); do_utime(vcollected, mtime); kfree(vcollected); eat(body_len); @@ -574,7 +574,7 @@ static void __init clean_rootfs(void) buf = kzalloc(BUF_SIZE, GFP_KERNEL); WARN_ON(!buf); if (!buf) { - sys_close(fd); + ksys_close(fd); return; } @@ -602,7 +602,7 @@ static void __init clean_rootfs(void) num = sys_getdents64(fd, dirp, BUF_SIZE); } - sys_close(fd); + ksys_close(fd); kfree(buf); } #endif @@ -639,7 +639,7 @@ static int __init populate_rootfs(void) pr_err("/initrd.image: incomplete write (%zd != %ld)\n", written, initrd_end - initrd_start); - sys_close(fd); + ksys_close(fd); free_initrd(); } done: -- cgit v1.2.3 From bae217ea8c7e123ed3fb1064909a262924771bbb Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:56 +0100 Subject: fs: add ksys_open() wrapper; remove in-kernel calls to sys_open() Using this wrapper allows us to avoid the in-kernel calls to the sys_open() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_open(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- fs/open.c | 2 +- include/linux/syscalls.h | 11 +++++++++++ init/do_mounts.c | 4 ++-- init/do_mounts_initrd.c | 4 ++-- init/do_mounts_md.c | 6 +++--- init/do_mounts_rd.c | 6 +++--- init/initramfs.c | 6 +++--- init/main.c | 2 +- 8 files changed, 26 insertions(+), 15 deletions(-) diff --git a/fs/open.c b/fs/open.c index 710102fc262b..8a42a2961130 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1151,7 +1151,7 @@ COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, fla */ SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode) { - return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode); + return ksys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode); } #endif diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 38805f3447ea..f9df17dcec1c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1057,4 +1057,15 @@ static inline int ksys_close(unsigned int fd) return __close_fd(current->files, fd); } +extern long do_sys_open(int dfd, const char __user *filename, int flags, + umode_t mode); + +static inline long ksys_open(const char __user *filename, int flags, + umode_t mode) +{ + if (force_o_largefile()) + flags |= O_LARGEFILE; + return do_sys_open(AT_FDCWD, filename, flags, mode); +} + #endif diff --git a/init/do_mounts.c b/init/do_mounts.c index a28dd42d1f84..cc1103477071 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -489,13 +489,13 @@ void __init change_floppy(char *fmt, ...) va_start(args, fmt); vsprintf(buf, fmt, args); va_end(args); - fd = sys_open("/dev/root", O_RDWR | O_NDELAY, 0); + fd = ksys_open("/dev/root", O_RDWR | O_NDELAY, 0); if (fd >= 0) { sys_ioctl(fd, FDEJECT, 0); ksys_close(fd); } printk(KERN_NOTICE "VFS: Insert %s and press ENTER\n", buf); - fd = sys_open("/dev/console", O_RDWR, 0); + fd = ksys_open("/dev/console", O_RDWR, 0); if (fd >= 0) { sys_ioctl(fd, TCGETS, (long)&termios); termios.c_lflag &= ~ICANON; diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 6907c6dbc443..cedca8fd2590 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -38,7 +38,7 @@ static int init_linuxrc(struct subprocess_info *info, struct cred *new) { sys_unshare(CLONE_FS | CLONE_FILES); /* stdin/stdout/stderr for /linuxrc */ - sys_open("/dev/console", O_RDWR, 0); + ksys_open("/dev/console", O_RDWR, 0); ksys_dup(0); ksys_dup(0); /* move initrd over / and chdir/chroot in initrd root */ @@ -99,7 +99,7 @@ static void __init handle_initrd(void) if (!error) printk("okay\n"); else { - int fd = sys_open("/dev/root.old", O_RDWR, 0); + int fd = ksys_open("/dev/root.old", O_RDWR, 0); if (error == -ENOENT) printk("/initrd does not exist. Ignored.\n"); else diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c index ebd4013d589e..76dcfaada3ed 100644 --- a/init/do_mounts_md.c +++ b/init/do_mounts_md.c @@ -181,7 +181,7 @@ static void __init md_setup_drive(void) partitioned ? "_d" : "", minor, md_setup_args[ent].device_names); - fd = sys_open(name, 0, 0); + fd = ksys_open(name, 0, 0); if (fd < 0) { printk(KERN_ERR "md: open failed - cannot start " "array %s\n", name); @@ -244,7 +244,7 @@ static void __init md_setup_drive(void) * array without it */ ksys_close(fd); - fd = sys_open(name, 0, 0); + fd = ksys_open(name, 0, 0); sys_ioctl(fd, BLKRRPART, 0); } ksys_close(fd); @@ -294,7 +294,7 @@ static void __init autodetect_raid(void) wait_for_device_probe(); - fd = sys_open("/dev/md0", 0, 0); + fd = ksys_open("/dev/md0", 0, 0); if (fd >= 0) { sys_ioctl(fd, RAID_AUTORUN, raid_autopart); ksys_close(fd); diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index f1aa341862d3..a6706314baa7 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -196,11 +196,11 @@ int __init rd_load_image(char *from) char rotator[4] = { '|' , '/' , '-' , '\\' }; #endif - out_fd = sys_open("/dev/ram", O_RDWR, 0); + out_fd = ksys_open("/dev/ram", O_RDWR, 0); if (out_fd < 0) goto out; - in_fd = sys_open(from, O_RDONLY, 0); + in_fd = ksys_open(from, O_RDONLY, 0); if (in_fd < 0) goto noclose_input; @@ -262,7 +262,7 @@ int __init rd_load_image(char *from) goto noclose_input; } change_floppy("disk #%d", disk); - in_fd = sys_open(from, O_RDONLY, 0); + in_fd = ksys_open(from, O_RDONLY, 0); if (in_fd < 0) { printk("Error opening disk.\n"); goto noclose_input; diff --git a/init/initramfs.c b/init/initramfs.c index ce2bcad97cdf..5f2ff1d2370e 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -340,7 +340,7 @@ static int __init do_name(void) int openflags = O_WRONLY|O_CREAT; if (ml != 1) openflags |= O_TRUNC; - wfd = sys_open(collected, openflags, mode); + wfd = ksys_open(collected, openflags, mode); if (wfd >= 0) { ksys_fchown(wfd, uid, gid); @@ -567,7 +567,7 @@ static void __init clean_rootfs(void) struct linux_dirent64 *dirp; int num; - fd = sys_open("/", O_RDONLY, 0); + fd = ksys_open("/", O_RDONLY, 0); WARN_ON(fd < 0); if (fd < 0) return; @@ -629,7 +629,7 @@ static int __init populate_rootfs(void) } printk(KERN_INFO "rootfs image is not initramfs (%s)" "; looks like an initrd\n", err); - fd = sys_open("/initrd.image", + fd = ksys_open("/initrd.image", O_WRONLY|O_CREAT, 0700); if (fd >= 0) { ssize_t written = xwrite(fd, (char *)initrd_start, diff --git a/init/main.c b/init/main.c index d0ded4322c6b..e77951ae2c19 100644 --- a/init/main.c +++ b/init/main.c @@ -1074,7 +1074,7 @@ static noinline void __init kernel_init_freeable(void) do_basic_setup(); /* Open the /dev/console on the rootfs, this should never fail */ - if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) + if (ksys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) pr_err("Warning: unable to open an initial console.\n"); (void) ksys_dup(0); -- cgit v1.2.3 From 454dab3f965ec24fda8fbe135c8dad4c5b238a86 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 21:34:04 +0100 Subject: fs: add ksys_getdents64() helper; remove in-kernel calls to sys_getdents64() Using this helper allows us to avoid the in-kernel calls to the sys_getdents64() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_getdents64(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- fs/readdir.c | 11 +++++++++-- include/linux/syscalls.h | 2 ++ init/initramfs.c | 4 ++-- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/readdir.c b/fs/readdir.c index 1b83b0ad183b..d97f548e6323 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -292,8 +292,8 @@ efault: return -EFAULT; } -SYSCALL_DEFINE3(getdents64, unsigned int, fd, - struct linux_dirent64 __user *, dirent, unsigned int, count) +int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, + unsigned int count) { struct fd f; struct linux_dirent64 __user * lastdirent; @@ -326,6 +326,13 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, return error; } + +SYSCALL_DEFINE3(getdents64, unsigned int, fd, + struct linux_dirent64 __user *, dirent, unsigned int, count) +{ + return ksys_getdents64(fd, dirent, count); +} + #ifdef CONFIG_COMPAT struct compat_old_linux_dirent { compat_ulong_t d_ino; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f9df17dcec1c..c056aff6d7ad 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -955,6 +955,8 @@ ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); int ksys_chdir(const char __user *filename); int ksys_fchmod(unsigned int fd, umode_t mode); int ksys_fchown(unsigned int fd, uid_t user, gid_t group); +int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, + unsigned int count); /* * The following kernel syscall equivalents are just wrappers to fs-internal diff --git a/init/initramfs.c b/init/initramfs.c index 5f2ff1d2370e..13643c46ebab 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -579,7 +579,7 @@ static void __init clean_rootfs(void) } dirp = buf; - num = sys_getdents64(fd, dirp, BUF_SIZE); + num = ksys_getdents64(fd, dirp, BUF_SIZE); while (num > 0) { while (num > 0) { struct kstat st; @@ -599,7 +599,7 @@ static void __init clean_rootfs(void) } dirp = buf; memset(buf, 0, BUF_SIZE); - num = sys_getdents64(fd, dirp, BUF_SIZE); + num = ksys_getdents64(fd, dirp, BUF_SIZE); } ksys_close(fd); -- cgit v1.2.3 From cbb60b924b9f3e4d7c67a1c9dcf981718f926e4e Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 21:43:59 +0100 Subject: fs: add ksys_ioctl() helper; remove in-kernel calls to sys_ioctl() Using this helper allows us to avoid the in-kernel calls to the sys_ioctl() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_ioctl(). After careful review, at least some of these calls could be converted to do_vfs_ioctl() in future. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- fs/ioctl.c | 7 ++++++- include/linux/syscalls.h | 1 + init/do_mounts.c | 8 ++++---- init/do_mounts_initrd.c | 2 +- init/do_mounts_md.c | 15 ++++++++------- init/do_mounts_rd.c | 4 ++-- 6 files changed, 22 insertions(+), 15 deletions(-) diff --git a/fs/ioctl.c b/fs/ioctl.c index 5ace7efb0d04..4823431d1c9d 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -689,7 +689,7 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, return error; } -SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) +int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) { int error; struct fd f = fdget(fd); @@ -702,3 +702,8 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) fdput(f); return error; } + +SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) +{ + return ksys_ioctl(fd, cmd, arg); +} diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index c056aff6d7ad..5a959efd8fb7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -957,6 +957,7 @@ int ksys_fchmod(unsigned int fd, umode_t mode); int ksys_fchown(unsigned int fd, uid_t user, gid_t group); int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, unsigned int count); +int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); /* * The following kernel syscall equivalents are just wrappers to fs-internal diff --git a/init/do_mounts.c b/init/do_mounts.c index cc1103477071..b17e0095eb4e 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -491,18 +491,18 @@ void __init change_floppy(char *fmt, ...) va_end(args); fd = ksys_open("/dev/root", O_RDWR | O_NDELAY, 0); if (fd >= 0) { - sys_ioctl(fd, FDEJECT, 0); + ksys_ioctl(fd, FDEJECT, 0); ksys_close(fd); } printk(KERN_NOTICE "VFS: Insert %s and press ENTER\n", buf); fd = ksys_open("/dev/console", O_RDWR, 0); if (fd >= 0) { - sys_ioctl(fd, TCGETS, (long)&termios); + ksys_ioctl(fd, TCGETS, (long)&termios); termios.c_lflag &= ~ICANON; - sys_ioctl(fd, TCSETSF, (long)&termios); + ksys_ioctl(fd, TCSETSF, (long)&termios); sys_read(fd, &c, 1); termios.c_lflag |= ICANON; - sys_ioctl(fd, TCSETSF, (long)&termios); + ksys_ioctl(fd, TCSETSF, (long)&termios); ksys_close(fd); } } diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index cedca8fd2590..03ec0c1b7553 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -110,7 +110,7 @@ static void __init handle_initrd(void) if (fd < 0) { error = fd; } else { - error = sys_ioctl(fd, BLKFLSBUF, 0); + error = ksys_ioctl(fd, BLKFLSBUF, 0); ksys_close(fd); } printk(!error ? "okay\n" : "failed\n"); diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c index 76dcfaada3ed..7d85d172bc7e 100644 --- a/init/do_mounts_md.c +++ b/init/do_mounts_md.c @@ -187,7 +187,7 @@ static void __init md_setup_drive(void) "array %s\n", name); continue; } - if (sys_ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) { + if (ksys_ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) { printk(KERN_WARNING "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n", minor); @@ -210,7 +210,7 @@ static void __init md_setup_drive(void) ainfo.state = (1 << MD_SB_CLEAN); ainfo.layout = 0; ainfo.chunk_size = md_setup_args[ent].chunk; - err = sys_ioctl(fd, SET_ARRAY_INFO, (long)&ainfo); + err = ksys_ioctl(fd, SET_ARRAY_INFO, (long)&ainfo); for (i = 0; !err && i <= MD_SB_DISKS; i++) { dev = devices[i]; if (!dev) @@ -220,7 +220,8 @@ static void __init md_setup_drive(void) dinfo.state = (1<= 0) { - sys_ioctl(fd, RAID_AUTORUN, raid_autopart); + ksys_ioctl(fd, RAID_AUTORUN, raid_autopart); ksys_close(fd); } } diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index a6706314baa7..4dafaed5736f 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -218,7 +218,7 @@ int __init rd_load_image(char *from) * NOTE NOTE: nblocks is not actually blocks but * the number of kibibytes of data to load into a ramdisk. */ - if (sys_ioctl(out_fd, BLKGETSIZE, (unsigned long)&rd_blocks) < 0) + if (ksys_ioctl(out_fd, BLKGETSIZE, (unsigned long)&rd_blocks) < 0) rd_blocks = 0; else rd_blocks >>= 1; @@ -232,7 +232,7 @@ int __init rd_load_image(char *from) /* * OK, time to copy in the data */ - if (sys_ioctl(in_fd, BLKGETSIZE, (unsigned long)&devblocks) < 0) + if (ksys_ioctl(in_fd, BLKGETSIZE, (unsigned long)&devblocks) < 0) devblocks = 0; else devblocks >>= 1; -- cgit v1.2.3 From 76847e4344350970e1c2e27c28b5abb3c588c5b3 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 21:51:17 +0100 Subject: fs: add ksys_lseek() helper; remove in-kernel calls to sys_lseek() Using this helper allows us to avoid the in-kernel calls to the sys_lseek() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_lseek(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- fs/read_write.c | 9 +++++++-- include/linux/syscalls.h | 1 + init/do_mounts_rd.c | 8 ++++---- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 8e8f0b4f52e2..b38b008a078e 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -301,7 +301,7 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence) } EXPORT_SYMBOL(vfs_llseek); -SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) +off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence) { off_t retval; struct fd f = fdget_pos(fd); @@ -319,10 +319,15 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) return retval; } +SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) +{ + return ksys_lseek(fd, offset, whence); +} + #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence) { - return sys_lseek(fd, offset, whence); + return ksys_lseek(fd, offset, whence); } #endif diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 5a959efd8fb7..0f24e5334569 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -958,6 +958,7 @@ int ksys_fchown(unsigned int fd, uid_t user, gid_t group); int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, unsigned int count); int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); +off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence); /* * The following kernel syscall equivalents are just wrappers to fs-internal diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index 4dafaed5736f..13e54148c0e0 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -90,7 +90,7 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) /* * Read block 0 to test for compressed kernel */ - sys_lseek(fd, start_block * BLOCK_SIZE, 0); + ksys_lseek(fd, start_block * BLOCK_SIZE, 0); sys_read(fd, buf, size); *decompressor = decompress_method(buf, size, &compress_name); @@ -136,7 +136,7 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) /* * Read 512 bytes further to check if cramfs is padded */ - sys_lseek(fd, start_block * BLOCK_SIZE + 0x200, 0); + ksys_lseek(fd, start_block * BLOCK_SIZE + 0x200, 0); sys_read(fd, buf, size); if (cramfsb->magic == CRAMFS_MAGIC) { @@ -150,7 +150,7 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) /* * Read block 1 to test for minix and ext2 superblock */ - sys_lseek(fd, (start_block+1) * BLOCK_SIZE, 0); + ksys_lseek(fd, (start_block+1) * BLOCK_SIZE, 0); sys_read(fd, buf, size); /* Try minix */ @@ -178,7 +178,7 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) start_block); done: - sys_lseek(fd, start_block * BLOCK_SIZE, 0); + ksys_lseek(fd, start_block * BLOCK_SIZE, 0); kfree(buf); return nblocks; } -- cgit v1.2.3 From 3ce4a7bf66263748194b77ccefd284be963c6304 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 21:56:26 +0100 Subject: fs: add ksys_read() helper; remove in-kernel calls to sys_read() Using this helper allows us to avoid the in-kernel calls to the sys_read() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_read(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- arch/s390/kernel/compat_linux.c | 2 +- fs/read_write.c | 7 ++++++- include/linux/syscalls.h | 1 + init/do_mounts.c | 2 +- init/do_mounts_rd.c | 10 +++++----- 5 files changed, 14 insertions(+), 8 deletions(-) diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 9c5e975f71a6..af0469f204fd 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -460,7 +460,7 @@ COMPAT_SYSCALL_DEFINE3(s390_read, unsigned int, fd, char __user *, buf, compat_s if ((compat_ssize_t) count < 0) return -EINVAL; - return sys_read(fd, buf, count); + return ksys_read(fd, buf, count); } COMPAT_SYSCALL_DEFINE3(s390_write, unsigned int, fd, const char __user *, buf, compat_size_t, count) diff --git a/fs/read_write.c b/fs/read_write.c index b38b008a078e..fc441e1ac683 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -568,7 +568,7 @@ static inline void file_pos_write(struct file *file, loff_t pos) file->f_pos = pos; } -SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) +ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count) { struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; @@ -583,6 +583,11 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) return ret; } +SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) +{ + return ksys_read(fd, buf, count); +} + ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count) { struct fd f = fdget_pos(fd); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0f24e5334569..3a2e90842ff8 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -959,6 +959,7 @@ int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, unsigned int count); int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence); +ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count); /* * The following kernel syscall equivalents are just wrappers to fs-internal diff --git a/init/do_mounts.c b/init/do_mounts.c index b17e0095eb4e..2c71dabe5626 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -500,7 +500,7 @@ void __init change_floppy(char *fmt, ...) ksys_ioctl(fd, TCGETS, (long)&termios); termios.c_lflag &= ~ICANON; ksys_ioctl(fd, TCSETSF, (long)&termios); - sys_read(fd, &c, 1); + ksys_read(fd, &c, 1); termios.c_lflag |= ICANON; ksys_ioctl(fd, TCSETSF, (long)&termios); ksys_close(fd); diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index 13e54148c0e0..12c159824c7b 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -91,7 +91,7 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) * Read block 0 to test for compressed kernel */ ksys_lseek(fd, start_block * BLOCK_SIZE, 0); - sys_read(fd, buf, size); + ksys_read(fd, buf, size); *decompressor = decompress_method(buf, size, &compress_name); if (compress_name) { @@ -137,7 +137,7 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) * Read 512 bytes further to check if cramfs is padded */ ksys_lseek(fd, start_block * BLOCK_SIZE + 0x200, 0); - sys_read(fd, buf, size); + ksys_read(fd, buf, size); if (cramfsb->magic == CRAMFS_MAGIC) { printk(KERN_NOTICE @@ -151,7 +151,7 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) * Read block 1 to test for minix and ext2 superblock */ ksys_lseek(fd, (start_block+1) * BLOCK_SIZE, 0); - sys_read(fd, buf, size); + ksys_read(fd, buf, size); /* Try minix */ if (minixsb->s_magic == MINIX_SUPER_MAGIC || @@ -269,7 +269,7 @@ int __init rd_load_image(char *from) } printk("Loading disk #%d... ", disk); } - sys_read(in_fd, buf, BLOCK_SIZE); + ksys_read(in_fd, buf, BLOCK_SIZE); ksys_write(out_fd, buf, BLOCK_SIZE); #if !defined(CONFIG_S390) if (!(i % 16)) { @@ -307,7 +307,7 @@ static int crd_infd, crd_outfd; static long __init compr_fill(void *buf, unsigned long len) { - long r = sys_read(crd_infd, buf, len); + long r = ksys_read(crd_infd, buf, len); if (r < 0) printk(KERN_ERR "RAMDISK: error while reading compressed data"); else if (r == 0) -- cgit v1.2.3 From 70f68ee81e2e9ad5105b8d2bd324e890e94c6ad9 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Wed, 14 Mar 2018 22:35:11 +0100 Subject: fs: add ksys_sync() helper; remove in-kernel calls to sys_sync() Using this helper allows us to avoid the in-kernel calls to the sys_sync() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_sync(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- arch/sparc/kernel/setup_32.c | 2 +- drivers/tty/sysrq.c | 2 +- fs/sync.c | 7 ++++++- include/linux/syscalls.h | 1 + kernel/power/hibernate.c | 2 +- kernel/power/suspend.c | 2 +- kernel/power/user.c | 2 +- 7 files changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index 2e3a3e203061..13664c377196 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -86,7 +86,7 @@ static void prom_sync_me(void) show_free_areas(0, NULL); if (!is_idle_task(current)) { local_irq_enable(); - sys_sync(); + ksys_sync(); local_irq_disable(); } prom_printf("Returning to prom\n"); diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index b674793be478..6364890575ec 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -660,7 +660,7 @@ static void sysrq_do_reset(struct timer_list *t) state->reset_requested = true; - sys_sync(); + ksys_sync(); kernel_restart(NULL); } diff --git a/fs/sync.c b/fs/sync.c index 6e0a2cbaf6de..602ae94bb67e 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -105,7 +105,7 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg) * just write metadata (such as inodes or bitmaps) to block device page cache * and do not sync it on their own in ->sync_fs(). */ -SYSCALL_DEFINE0(sync) +void ksys_sync(void) { int nowait = 0, wait = 1; @@ -117,6 +117,11 @@ SYSCALL_DEFINE0(sync) iterate_bdevs(fdatawait_one_bdev, NULL); if (unlikely(laptop_mode)) laptop_sync_completion(); +} + +SYSCALL_DEFINE0(sync) +{ + ksys_sync(); return 0; } diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3a2e90842ff8..0a9942b3e718 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -960,6 +960,7 @@ int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence); ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count); +void ksys_sync(void); /* * The following kernel syscall equivalents are just wrappers to fs-internal diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index a5c36e9c56a6..4710f1b142fc 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -701,7 +701,7 @@ int hibernate(void) } pr_info("Syncing filesystems ... \n"); - sys_sync(); + ksys_sync(); pr_info("done.\n"); error = freeze_processes(); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 0685c4499431..4c10be0f4843 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -560,7 +560,7 @@ static int enter_state(suspend_state_t state) #ifndef CONFIG_SUSPEND_SKIP_SYNC trace_suspend_resume(TPS("sync_filesystems"), 0, true); pr_info("Syncing filesystems ... "); - sys_sync(); + ksys_sync(); pr_cont("done.\n"); trace_suspend_resume(TPS("sync_filesystems"), 0, false); #endif diff --git a/kernel/power/user.c b/kernel/power/user.c index 22df9f7ff672..75c959de4b29 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -224,7 +224,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, break; printk("Syncing filesystems ... "); - sys_sync(); + ksys_sync(); printk("done.\n"); error = freeze_processes(); -- cgit v1.2.3 From 9b32105ec6b13d32d5db6a6e7992c97ce54b5ea7 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:42 +0100 Subject: kernel: add ksys_unshare() helper; remove in-kernel calls to sys_unshare() Using this helper allows us to avoid the in-kernel calls to the sys_unshare() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_unshare(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Cc: Ingo Molnar Signed-off-by: Dominik Brodowski --- drivers/base/devtmpfs.c | 2 +- include/linux/syscalls.h | 1 + init/do_mounts_initrd.c | 2 +- kernel/fork.c | 7 ++++++- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 3f7ee954fd2c..f7768077e817 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -380,7 +380,7 @@ static int devtmpfsd(void *p) { char options[] = "mode=0755"; int *err = p; - *err = sys_unshare(CLONE_NEWNS); + *err = ksys_unshare(CLONE_NEWNS); if (*err) goto out; *err = ksys_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, options); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0a9942b3e718..e724dda509e0 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -961,6 +961,7 @@ int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence); ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count); void ksys_sync(void); +int ksys_unshare(unsigned long unshare_flags); /* * The following kernel syscall equivalents are just wrappers to fs-internal diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 03ec0c1b7553..d1d3e53bdeef 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -36,7 +36,7 @@ __setup("noinitrd", no_initrd); static int init_linuxrc(struct subprocess_info *info, struct cred *new) { - sys_unshare(CLONE_FS | CLONE_FILES); + ksys_unshare(CLONE_FS | CLONE_FILES); /* stdin/stdout/stderr for /linuxrc */ ksys_open("/dev/console", O_RDWR, 0); ksys_dup(0); diff --git a/kernel/fork.c b/kernel/fork.c index b1e031aac9db..f71b67dc156d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2354,7 +2354,7 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp * constructed. Here we are modifying the current, active, * task_struct. */ -SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) +int ksys_unshare(unsigned long unshare_flags) { struct fs_struct *fs, *new_fs = NULL; struct files_struct *fd, *new_fd = NULL; @@ -2470,6 +2470,11 @@ bad_unshare_out: return err; } +SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) +{ + return ksys_unshare(unshare_flags); +} + /* * Helper to unshare the files of the current task. * We don't want to expose copy_files internals to -- cgit v1.2.3 From e2aaa9f423367ee03755d632555c242629a08d00 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 16 Mar 2018 12:36:06 +0100 Subject: kernel: add ksys_setsid() helper; remove in-kernel call to sys_setsid() Using this helper allows us to avoid the in-kernel call to the sys_setsid() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_setsid(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 1 + init/do_mounts_initrd.c | 2 +- kernel/sys.c | 7 ++++++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e724dda509e0..4dd685ee425d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -962,6 +962,7 @@ off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence); ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count); void ksys_sync(void); int ksys_unshare(unsigned long unshare_flags); +int ksys_setsid(void); /* * The following kernel syscall equivalents are just wrappers to fs-internal diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index d1d3e53bdeef..5a91aefa7305 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -45,7 +45,7 @@ static int init_linuxrc(struct subprocess_info *info, struct cred *new) ksys_chdir("/root"); ksys_mount(".", "/", NULL, MS_MOVE, NULL); ksys_chroot("."); - sys_setsid(); + ksys_setsid(); return 0; } diff --git a/kernel/sys.c b/kernel/sys.c index 550f47788ae4..ad692183dfe9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1150,7 +1150,7 @@ static void set_special_pids(struct pid *pid) change_pid(curr, PIDTYPE_PGID, pid); } -SYSCALL_DEFINE0(setsid) +int ksys_setsid(void) { struct task_struct *group_leader = current->group_leader; struct pid *sid = task_pid(group_leader); @@ -1183,6 +1183,11 @@ out: return err; } +SYSCALL_DEFINE0(setsid) +{ + return ksys_setsid(); +} + DECLARE_RWSEM(uts_sem); #ifdef COMPAT_UTS_MACHINE -- cgit v1.2.3 From 806cbae1228cc1a19b978c4513f6851e9ab7f388 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:47 +0100 Subject: fs: add ksys_sync_file_range helper(); remove in-kernel calls to syscall Using this helper allows us to avoid the in-kernel calls to the sys_sync_file_range() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_sync_file_range(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- arch/mips/kernel/linux32.c | 2 +- arch/parisc/kernel/sys_parisc.c | 2 +- arch/powerpc/kernel/sys_ppc32.c | 2 +- arch/s390/kernel/compat_linux.c | 2 +- arch/sparc/kernel/sys_sparc32.c | 2 +- arch/x86/ia32/sys_ia32.c | 6 +++--- fs/sync.c | 12 +++++++++--- include/linux/syscalls.h | 2 ++ 8 files changed, 19 insertions(+), 11 deletions(-) diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 3c90449742a0..57b3310873f0 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -139,7 +139,7 @@ asmlinkage long sys32_sync_file_range(int fd, int __pad, unsigned long a4, unsigned long a5, int flags) { - return sys_sync_file_range(fd, + return ksys_sync_file_range(fd, merge_64(a2, a3), merge_64(a4, a5), flags); } diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 6d2a64859c22..ef995b5d97ef 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -360,7 +360,7 @@ asmlinkage long parisc_sync_file_range(int fd, u32 hi_off, u32 lo_off, u32 hi_nbytes, u32 lo_nbytes, unsigned int flags) { - return sys_sync_file_range(fd, (loff_t)hi_off << 32 | lo_off, + return ksys_sync_file_range(fd, (loff_t)hi_off << 32 | lo_off, (loff_t)hi_nbytes << 32 | lo_nbytes, flags); } diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index e0c9b7f1bf38..664e4c5b5855 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -124,5 +124,5 @@ asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags, loff_t offset = ((loff_t)offset_hi << 32) | offset_lo; loff_t nbytes = ((loff_t)nbytes_hi << 32) | nbytes_lo; - return sys_sync_file_range(fd, offset, nbytes, flags); + return ksys_sync_file_range(fd, offset, nbytes, flags); } diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index af0469f204fd..605f6f026e44 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -509,7 +509,7 @@ COMPAT_SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, arg COMPAT_SYSCALL_DEFINE6(s390_sync_file_range, int, fd, u32, offhigh, u32, offlow, u32, nhigh, u32, nlow, unsigned int, flags) { - return sys_sync_file_range(fd, ((loff_t)offhigh << 32) + offlow, + return ksys_sync_file_range(fd, ((loff_t)offhigh << 32) + offlow, ((u64)nhigh << 32) + nlow, flags); } diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index d64b425fff93..af18b9ff86be 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -241,7 +241,7 @@ long compat_sys_fadvise64_64(int fd, long sys32_sync_file_range(unsigned int fd, unsigned long off_high, unsigned long off_low, unsigned long nb_high, unsigned long nb_low, unsigned int flags) { - return sys_sync_file_range(fd, + return ksys_sync_file_range(fd, (off_high << 32) | off_low, (nb_high << 32) | nb_low, flags); diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 1979e5b4ad9a..0c5f8932c136 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -209,9 +209,9 @@ COMPAT_SYSCALL_DEFINE6(x86_sync_file_range, int, fd, unsigned int, off_low, unsigned int, off_hi, unsigned int, n_low, unsigned int, n_hi, int, flags) { - return sys_sync_file_range(fd, - ((u64)off_hi << 32) | off_low, - ((u64)n_hi << 32) | n_low, flags); + return ksys_sync_file_range(fd, + ((u64)off_hi << 32) | off_low, + ((u64)n_hi << 32) | n_low, flags); } COMPAT_SYSCALL_DEFINE5(x86_fadvise64, int, fd, unsigned int, offset_lo, diff --git a/fs/sync.c b/fs/sync.c index 602ae94bb67e..9908a114d506 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -285,8 +285,8 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd) * already-instantiated disk blocks, there are no guarantees here that the data * will be available after a crash. */ -SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes, - unsigned int, flags) +int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes, + unsigned int flags) { int ret; struct fd f; @@ -364,10 +364,16 @@ out: return ret; } +SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes, + unsigned int, flags) +{ + return ksys_sync_file_range(fd, offset, nbytes, flags); +} + /* It would be nice if people remember that not all the world's an i386 when they introduce new system calls */ SYSCALL_DEFINE4(sync_file_range2, int, fd, unsigned int, flags, loff_t, offset, loff_t, nbytes) { - return sys_sync_file_range(fd, offset, nbytes, flags); + return ksys_sync_file_range(fd, offset, nbytes, flags); } diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 4dd685ee425d..331da76f66e2 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -963,6 +963,8 @@ ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count); void ksys_sync(void); int ksys_unshare(unsigned long unshare_flags); int ksys_setsid(void); +int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes, + unsigned int flags); /* * The following kernel syscall equivalents are just wrappers to fs-internal -- cgit v1.2.3 From df260e21e6cd5d2dfc1fe9b6a3bbf747e72b3bed Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 19 Mar 2018 17:32:11 +0100 Subject: fs: add ksys_truncate() wrapper; remove in-kernel calls to sys_truncate() Using the ksys_truncate() wrapper allows us to get rid of in-kernel calls to the sys_truncate() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_truncate(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- arch/mips/kernel/linux32.c | 2 +- arch/parisc/kernel/sys_parisc.c | 6 +++--- arch/powerpc/kernel/sys_ppc32.c | 2 +- arch/s390/kernel/compat_linux.c | 2 +- arch/sparc/kernel/sys_sparc32.c | 2 +- arch/x86/ia32/sys_ia32.c | 3 ++- fs/open.c | 2 +- include/linux/syscalls.h | 7 +++++++ 8 files changed, 17 insertions(+), 9 deletions(-) diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 57b3310873f0..58e7dd27f106 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -82,7 +82,7 @@ struct rlimit32 { SYSCALL_DEFINE4(32_truncate64, const char __user *, path, unsigned long, __dummy, unsigned long, a2, unsigned long, a3) { - return sys_truncate(path, merge_64(a2, a3)); + return ksys_truncate(path, merge_64(a2, a3)); } SYSCALL_DEFINE4(32_ftruncate64, unsigned long, fd, unsigned long, __dummy, diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index ef995b5d97ef..3377a00163c3 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -292,7 +292,7 @@ asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len, asmlinkage long parisc_truncate64(const char __user * path, unsigned int high, unsigned int low) { - return sys_truncate(path, (long)high << 32 | low); + return ksys_truncate(path, (long)high << 32 | low); } asmlinkage long parisc_ftruncate64(unsigned int fd, @@ -305,7 +305,7 @@ asmlinkage long parisc_ftruncate64(unsigned int fd, * are identical on LP64 */ asmlinkage long sys_truncate64(const char __user * path, unsigned long length) { - return sys_truncate(path, length); + return ksys_truncate(path, length); } asmlinkage long sys_ftruncate64(unsigned int fd, unsigned long length) { @@ -320,7 +320,7 @@ asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg asmlinkage long parisc_truncate64(const char __user * path, unsigned int high, unsigned int low) { - return sys_truncate64(path, (loff_t)high << 32 | low); + return ksys_truncate(path, (loff_t)high << 32 | low); } asmlinkage long parisc_ftruncate64(unsigned int fd, diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 664e4c5b5855..a0a251d7c9c5 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -94,7 +94,7 @@ compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offhi, u32 offlo, u32 co asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4, unsigned long high, unsigned long low) { - return sys_truncate(path, (high << 32) | low); + return ksys_truncate(path, (high << 32) | low); } asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo, diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 605f6f026e44..e4092ec17ea5 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -302,7 +302,7 @@ COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second, COMPAT_SYSCALL_DEFINE3(s390_truncate64, const char __user *, path, u32, high, u32, low) { - return sys_truncate(path, (unsigned long)high << 32 | low); + return ksys_truncate(path, (unsigned long)high << 32 | low); } COMPAT_SYSCALL_DEFINE3(s390_ftruncate64, unsigned int, fd, u32, high, u32, low) diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index af18b9ff86be..aab2428b17b5 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -57,7 +57,7 @@ asmlinkage long sys32_truncate64(const char __user * path, unsigned long high, u if ((int)high < 0) return -EINVAL; else - return sys_truncate(path, (high << 32) | low); + return ksys_truncate(path, (high << 32) | low); } asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low) diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 0c5f8932c136..90e7a2b3dc5b 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -55,7 +55,8 @@ COMPAT_SYSCALL_DEFINE3(x86_truncate64, const char __user *, filename, unsigned long, offset_low, unsigned long, offset_high) { - return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low); + return ksys_truncate(filename, + ((loff_t) offset_high << 32) | offset_low); } COMPAT_SYSCALL_DEFINE3(x86_ftruncate64, unsigned int, fd, diff --git a/fs/open.c b/fs/open.c index 8a42a2961130..2e816fc7bd56 100644 --- a/fs/open.c +++ b/fs/open.c @@ -128,7 +128,7 @@ out: } EXPORT_SYMBOL_GPL(vfs_truncate); -static long do_sys_truncate(const char __user *pathname, loff_t length) +long do_sys_truncate(const char __user *pathname, loff_t length) { unsigned int lookup_flags = LOOKUP_FOLLOW; struct path path; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 331da76f66e2..78b79e3a1279 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1078,4 +1078,11 @@ static inline long ksys_open(const char __user *filename, int flags, return do_sys_open(AT_FDCWD, filename, flags, mode); } +extern long do_sys_truncate(const char __user *pathname, loff_t length); + +static inline long ksys_truncate(const char __user *pathname, loff_t length) +{ + return do_sys_truncate(pathname, length); +} + #endif -- cgit v1.2.3 From 36028d5dd71175c332ab634e089e16dbdfe3812b Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 19 Mar 2018 17:38:31 +0100 Subject: fs: add ksys_p{read,write}64() helpers; remove in-kernel calls to syscalls Using the ksys_p{read,write}64() wrappers allows us to get rid of in-kernel calls to the sys_pread64() and sys_pwrite64() syscalls. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_p{read,write}64(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- arch/mips/kernel/linux32.c | 4 ++-- arch/parisc/kernel/sys_parisc.c | 4 ++-- arch/powerpc/kernel/sys_ppc32.c | 4 ++-- arch/s390/kernel/compat_linux.c | 4 ++-- arch/sh/kernel/sys_sh32.c | 4 ++-- arch/sparc/kernel/sys_sparc32.c | 4 ++-- arch/x86/ia32/sys_ia32.c | 8 ++++---- fs/read_write.c | 20 ++++++++++++++++---- include/linux/syscalls.h | 4 ++++ 9 files changed, 36 insertions(+), 20 deletions(-) diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 58e7dd27f106..91e85e2f8aa5 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -105,13 +105,13 @@ SYSCALL_DEFINE5(32_llseek, unsigned int, fd, unsigned int, offset_high, SYSCALL_DEFINE6(32_pread, unsigned long, fd, char __user *, buf, size_t, count, unsigned long, unused, unsigned long, a4, unsigned long, a5) { - return sys_pread64(fd, buf, count, merge_64(a4, a5)); + return ksys_pread64(fd, buf, count, merge_64(a4, a5)); } SYSCALL_DEFINE6(32_pwrite, unsigned int, fd, const char __user *, buf, size_t, count, u32, unused, u64, a4, u64, a5) { - return sys_pwrite64(fd, buf, count, merge_64(a4, a5)); + return ksys_pwrite64(fd, buf, count, merge_64(a4, a5)); } SYSCALL_DEFINE1(32_personality, unsigned long, personality) diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 3377a00163c3..4f47e8379854 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -333,13 +333,13 @@ asmlinkage long parisc_ftruncate64(unsigned int fd, asmlinkage ssize_t parisc_pread64(unsigned int fd, char __user *buf, size_t count, unsigned int high, unsigned int low) { - return sys_pread64(fd, buf, count, (loff_t)high << 32 | low); + return ksys_pread64(fd, buf, count, (loff_t)high << 32 | low); } asmlinkage ssize_t parisc_pwrite64(unsigned int fd, const char __user *buf, size_t count, unsigned int high, unsigned int low) { - return sys_pwrite64(fd, buf, count, (loff_t)high << 32 | low); + return ksys_pwrite64(fd, buf, count, (loff_t)high << 32 | low); } asmlinkage ssize_t parisc_readahead(int fd, unsigned int high, unsigned int low, diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index a0a251d7c9c5..36a651e6e033 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -77,13 +77,13 @@ unsigned long compat_sys_mmap2(unsigned long addr, size_t len, compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, u32 reg6, u32 poshi, u32 poslo) { - return sys_pread64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo); + return ksys_pread64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo); } compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, u32 reg6, u32 poshi, u32 poslo) { - return sys_pwrite64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo); + return ksys_pwrite64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo); } compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offhi, u32 offlo, u32 count) diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index e4092ec17ea5..71e2c5bc4926 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -315,7 +315,7 @@ COMPAT_SYSCALL_DEFINE5(s390_pread64, unsigned int, fd, char __user *, ubuf, { if ((compat_ssize_t) count < 0) return -EINVAL; - return sys_pread64(fd, ubuf, count, (unsigned long)high << 32 | low); + return ksys_pread64(fd, ubuf, count, (unsigned long)high << 32 | low); } COMPAT_SYSCALL_DEFINE5(s390_pwrite64, unsigned int, fd, const char __user *, ubuf, @@ -323,7 +323,7 @@ COMPAT_SYSCALL_DEFINE5(s390_pwrite64, unsigned int, fd, const char __user *, ubu { if ((compat_ssize_t) count < 0) return -EINVAL; - return sys_pwrite64(fd, ubuf, count, (unsigned long)high << 32 | low); + return ksys_pwrite64(fd, ubuf, count, (unsigned long)high << 32 | low); } COMPAT_SYSCALL_DEFINE4(s390_readahead, int, fd, u32, high, u32, low, s32, count) diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c index f8dc8bfd4606..c37ee3d0c803 100644 --- a/arch/sh/kernel/sys_sh32.c +++ b/arch/sh/kernel/sys_sh32.c @@ -39,13 +39,13 @@ asmlinkage int sys_sh_pipe(void) asmlinkage ssize_t sys_pread_wrapper(unsigned int fd, char __user *buf, size_t count, long dummy, loff_t pos) { - return sys_pread64(fd, buf, count, pos); + return ksys_pread64(fd, buf, count, pos); } asmlinkage ssize_t sys_pwrite_wrapper(unsigned int fd, const char __user *buf, size_t count, long dummy, loff_t pos) { - return sys_pwrite64(fd, buf, count, pos); + return ksys_pwrite64(fd, buf, count, pos); } asmlinkage int sys_fadvise64_64_wrapper(int fd, u32 offset0, u32 offset1, diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index aab2428b17b5..40c0d2e1db1d 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -200,7 +200,7 @@ asmlinkage compat_ssize_t sys32_pread64(unsigned int fd, unsigned long poshi, unsigned long poslo) { - return sys_pread64(fd, ubuf, count, (poshi << 32) | poslo); + return ksys_pread64(fd, ubuf, count, (poshi << 32) | poslo); } asmlinkage compat_ssize_t sys32_pwrite64(unsigned int fd, @@ -209,7 +209,7 @@ asmlinkage compat_ssize_t sys32_pwrite64(unsigned int fd, unsigned long poshi, unsigned long poslo) { - return sys_pwrite64(fd, ubuf, count, (poshi << 32) | poslo); + return ksys_pwrite64(fd, ubuf, count, (poshi << 32) | poslo); } asmlinkage long compat_sys_readahead(int fd, diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 90e7a2b3dc5b..c08c25f73d79 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -174,15 +174,15 @@ COMPAT_SYSCALL_DEFINE1(x86_mmap, struct mmap_arg_struct32 __user *, arg) COMPAT_SYSCALL_DEFINE5(x86_pread, unsigned int, fd, char __user *, ubuf, u32, count, u32, poslo, u32, poshi) { - return sys_pread64(fd, ubuf, count, - ((loff_t)AA(poshi) << 32) | AA(poslo)); + return ksys_pread64(fd, ubuf, count, + ((loff_t)AA(poshi) << 32) | AA(poslo)); } COMPAT_SYSCALL_DEFINE5(x86_pwrite, unsigned int, fd, const char __user *, ubuf, u32, count, u32, poslo, u32, poshi) { - return sys_pwrite64(fd, ubuf, count, - ((loff_t)AA(poshi) << 32) | AA(poslo)); + return ksys_pwrite64(fd, ubuf, count, + ((loff_t)AA(poshi) << 32) | AA(poslo)); } diff --git a/fs/read_write.c b/fs/read_write.c index fc441e1ac683..c4eabbfc90df 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -610,8 +610,8 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, return ksys_write(fd, buf, count); } -SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf, - size_t, count, loff_t, pos) +ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count, + loff_t pos) { struct fd f; ssize_t ret = -EBADF; @@ -630,8 +630,14 @@ SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf, return ret; } -SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf, - size_t, count, loff_t, pos) +SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf, + size_t, count, loff_t, pos) +{ + return ksys_pread64(fd, buf, count, pos); +} + +ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf, + size_t count, loff_t pos) { struct fd f; ssize_t ret = -EBADF; @@ -650,6 +656,12 @@ SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf, return ret; } +SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf, + size_t, count, loff_t, pos) +{ + return ksys_pwrite64(fd, buf, count, pos); +} + static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, loff_t *ppos, int type, rwf_t flags) { diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 78b79e3a1279..a30e4c2d0c27 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -965,6 +965,10 @@ int ksys_unshare(unsigned long unshare_flags); int ksys_setsid(void); int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes, unsigned int flags); +ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count, + loff_t pos); +ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf, + size_t count, loff_t pos); /* * The following kernel syscall equivalents are just wrappers to fs-internal -- cgit v1.2.3 From edf292c76b884a499cc60ad5cdada2663cc39a2f Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 19 Mar 2018 17:46:32 +0100 Subject: fs: add ksys_fallocate() wrapper; remove in-kernel calls to sys_fallocate() Using the ksys_fallocate() wrapper allows us to get rid of in-kernel calls to the sys_fallocate() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_fallocate(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- arch/mips/kernel/linux32.c | 4 ++-- arch/parisc/kernel/sys_parisc.c | 4 ++-- arch/powerpc/kernel/sys_ppc32.c | 2 +- arch/s390/kernel/compat_linux.c | 4 ++-- arch/sparc/kernel/sys_sparc32.c | 4 ++-- arch/x86/ia32/sys_ia32.c | 4 ++-- fs/open.c | 7 ++++++- include/linux/syscalls.h | 1 + 8 files changed, 18 insertions(+), 12 deletions(-) diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 91e85e2f8aa5..0779d474c8ad 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -157,6 +157,6 @@ asmlinkage long sys32_fadvise64_64(int fd, int __pad, asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_a2, unsigned offset_a3, unsigned len_a4, unsigned len_a5) { - return sys_fallocate(fd, mode, merge_64(offset_a2, offset_a3), - merge_64(len_a4, len_a5)); + return ksys_fallocate(fd, mode, merge_64(offset_a2, offset_a3), + merge_64(len_a4, len_a5)); } diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 4f47e8379854..588fab336ddd 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -367,8 +367,8 @@ asmlinkage long parisc_sync_file_range(int fd, asmlinkage long parisc_fallocate(int fd, int mode, u32 offhi, u32 offlo, u32 lenhi, u32 lenlo) { - return sys_fallocate(fd, mode, ((u64)offhi << 32) | offlo, - ((u64)lenhi << 32) | lenlo); + return ksys_fallocate(fd, mode, ((u64)offhi << 32) | offlo, + ((u64)lenhi << 32) | lenlo); } long parisc_personality(unsigned long personality) diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 36a651e6e033..68f11e1065f8 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -100,7 +100,7 @@ asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4, asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo, u32 lenhi, u32 lenlo) { - return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo, + return ksys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo, ((loff_t)lenhi << 32) | lenlo); } diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 71e2c5bc4926..039858f9f128 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -516,6 +516,6 @@ COMPAT_SYSCALL_DEFINE6(s390_sync_file_range, int, fd, u32, offhigh, u32, offlow, COMPAT_SYSCALL_DEFINE6(s390_fallocate, int, fd, int, mode, u32, offhigh, u32, offlow, u32, lenhigh, u32, lenlow) { - return sys_fallocate(fd, mode, ((loff_t)offhigh << 32) + offlow, - ((u64)lenhigh << 32) + lenlow); + return ksys_fallocate(fd, mode, ((loff_t)offhigh << 32) + offlow, + ((u64)lenhigh << 32) + lenlow); } diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index 40c0d2e1db1d..4ba62d676632 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -250,6 +250,6 @@ long sys32_sync_file_range(unsigned int fd, unsigned long off_high, unsigned lon asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo, u32 lenhi, u32 lenlo) { - return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo, - ((loff_t)lenhi << 32) | lenlo); + return ksys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo, + ((loff_t)lenhi << 32) | lenlo); } diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index c08c25f73d79..df2acb13623f 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -226,8 +226,8 @@ COMPAT_SYSCALL_DEFINE6(x86_fallocate, int, fd, int, mode, unsigned int, offset_lo, unsigned int, offset_hi, unsigned int, len_lo, unsigned int, len_hi) { - return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo, - ((u64)len_hi << 32) | len_lo); + return ksys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo, + ((u64)len_hi << 32) | len_lo); } /* diff --git a/fs/open.c b/fs/open.c index 2e816fc7bd56..d0e955b558ad 100644 --- a/fs/open.c +++ b/fs/open.c @@ -333,7 +333,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) } EXPORT_SYMBOL_GPL(vfs_fallocate); -SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len) +int ksys_fallocate(int fd, int mode, loff_t offset, loff_t len) { struct fd f = fdget(fd); int error = -EBADF; @@ -345,6 +345,11 @@ SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len) return error; } +SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len) +{ + return ksys_fallocate(fd, mode, offset, len); +} + /* * access() needs to use the real uid/gid, not the effective uid/gid. * We do this by temporarily clearing all FS-related capabilities and diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a30e4c2d0c27..613b8127834d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -969,6 +969,7 @@ ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count, loff_t pos); ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf, size_t count, loff_t pos); +int ksys_fallocate(int fd, int mode, loff_t offset, loff_t len); /* * The following kernel syscall equivalents are just wrappers to fs-internal -- cgit v1.2.3 From 9d5b7c956b09daab955fb2a42447d5d89ff15093 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:45 +0100 Subject: mm: add ksys_fadvise64_64() helper; remove in-kernel call to sys_fadvise64_64() Using the ksys_fadvise64_64() helper allows us to avoid the in-kernel calls to the sys_fadvise64_64() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as ksys_fadvise64_64(). Some compat stubs called sys_fadvise64(), which then just passed through the arguments to sys_fadvise64_64(). Get rid of this indirection, and call ksys_fadvise64_64() directly. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Andrew Morton Cc: linux-mm@kvack.org Signed-off-by: Dominik Brodowski --- arch/arm/kernel/sys_arm.c | 2 +- arch/mips/kernel/linux32.c | 2 +- arch/parisc/kernel/sys_parisc.c | 2 +- arch/powerpc/kernel/sys_ppc32.c | 4 ++-- arch/powerpc/kernel/syscalls.c | 4 ++-- arch/s390/kernel/compat_linux.c | 5 +++-- arch/sh/kernel/sys_sh32.c | 8 ++++---- arch/sparc/kernel/sys_sparc32.c | 10 +++++----- arch/x86/ia32/sys_ia32.c | 12 ++++++------ arch/xtensa/kernel/syscall.c | 2 +- include/linux/syscalls.h | 9 +++++++++ mm/fadvise.c | 10 ++++++++-- 12 files changed, 43 insertions(+), 27 deletions(-) diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index 3151f5623d0e..bdf7514204ab 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -35,5 +35,5 @@ asmlinkage long sys_arm_fadvise64_64(int fd, int advice, loff_t offset, loff_t len) { - return sys_fadvise64_64(fd, offset, len, advice); + return ksys_fadvise64_64(fd, offset, len, advice); } diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 0779d474c8ad..1c5785e72db4 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -149,7 +149,7 @@ asmlinkage long sys32_fadvise64_64(int fd, int __pad, unsigned long a4, unsigned long a5, int flags) { - return sys_fadvise64_64(fd, + return ksys_fadvise64_64(fd, merge_64(a2, a3), merge_64(a4, a5), flags); } diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 588fab336ddd..f36ab1f09595 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -352,7 +352,7 @@ asmlinkage long parisc_fadvise64_64(int fd, unsigned int high_off, unsigned int low_off, unsigned int high_len, unsigned int low_len, int advice) { - return sys_fadvise64_64(fd, (loff_t)high_off << 32 | low_off, + return ksys_fadvise64_64(fd, (loff_t)high_off << 32 | low_off, (loff_t)high_len << 32 | low_len, advice); } diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 68f11e1065f8..0b95fa13307f 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -113,8 +113,8 @@ asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long h long ppc32_fadvise64(int fd, u32 unused, u32 offset_high, u32 offset_low, size_t len, int advice) { - return sys_fadvise64(fd, (u64)offset_high << 32 | offset_low, len, - advice); + return ksys_fadvise64_64(fd, (u64)offset_high << 32 | offset_low, len, + advice); } asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags, diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index a877bf8269fe..ecb981eea74b 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -119,8 +119,8 @@ long ppc64_personality(unsigned long personality) long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, u32 len_high, u32 len_low) { - return sys_fadvise64(fd, (u64)offset_high << 32 | offset_low, - (u64)len_high << 32 | len_low, advice); + return ksys_fadvise64_64(fd, (u64)offset_high << 32 | offset_low, + (u64)len_high << 32 | len_low, advice); } long sys_switch_endian(void) diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 039858f9f128..9bb897e443a6 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -483,7 +483,8 @@ COMPAT_SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, high, u32, low, compat_size advise = POSIX_FADV_DONTNEED; else if (advise == 5) advise = POSIX_FADV_NOREUSE; - return sys_fadvise64(fd, (unsigned long)high << 32 | low, len, advise); + return ksys_fadvise64_64(fd, (unsigned long)high << 32 | low, len, + advise); } struct fadvise64_64_args { @@ -503,7 +504,7 @@ COMPAT_SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, arg a.advice = POSIX_FADV_DONTNEED; else if (a.advice == 5) a.advice = POSIX_FADV_NOREUSE; - return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice); + return ksys_fadvise64_64(a.fd, a.offset, a.len, a.advice); } COMPAT_SYSCALL_DEFINE6(s390_sync_file_range, int, fd, u32, offhigh, u32, offlow, diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c index c37ee3d0c803..9dca568509a5 100644 --- a/arch/sh/kernel/sys_sh32.c +++ b/arch/sh/kernel/sys_sh32.c @@ -52,10 +52,10 @@ asmlinkage int sys_fadvise64_64_wrapper(int fd, u32 offset0, u32 offset1, u32 len0, u32 len1, int advice) { #ifdef __LITTLE_ENDIAN__ - return sys_fadvise64_64(fd, (u64)offset1 << 32 | offset0, - (u64)len1 << 32 | len0, advice); + return ksys_fadvise64_64(fd, (u64)offset1 << 32 | offset0, + (u64)len1 << 32 | len0, advice); #else - return sys_fadvise64_64(fd, (u64)offset0 << 32 | offset1, - (u64)len0 << 32 | len1, advice); + return ksys_fadvise64_64(fd, (u64)offset0 << 32 | offset1, + (u64)len0 << 32 | len1, advice); #endif } diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index 4ba62d676632..4da66aed50b4 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -225,7 +225,7 @@ long compat_sys_fadvise64(int fd, unsigned long offlo, compat_size_t len, int advice) { - return sys_fadvise64_64(fd, (offhi << 32) | offlo, len, advice); + return ksys_fadvise64_64(fd, (offhi << 32) | offlo, len, advice); } long compat_sys_fadvise64_64(int fd, @@ -233,10 +233,10 @@ long compat_sys_fadvise64_64(int fd, unsigned long lenhi, unsigned long lenlo, int advice) { - return sys_fadvise64_64(fd, - (offhi << 32) | offlo, - (lenhi << 32) | lenlo, - advice); + return ksys_fadvise64_64(fd, + (offhi << 32) | offlo, + (lenhi << 32) | lenlo, + advice); } long sys32_sync_file_range(unsigned int fd, unsigned long off_high, unsigned long off_low, unsigned long nb_high, unsigned long nb_low, unsigned int flags) diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index df2acb13623f..401bd8ec9cf0 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -194,10 +194,10 @@ COMPAT_SYSCALL_DEFINE6(x86_fadvise64_64, int, fd, __u32, offset_low, __u32, offset_high, __u32, len_low, __u32, len_high, int, advice) { - return sys_fadvise64_64(fd, - (((u64)offset_high)<<32) | offset_low, - (((u64)len_high)<<32) | len_low, - advice); + return ksys_fadvise64_64(fd, + (((u64)offset_high)<<32) | offset_low, + (((u64)len_high)<<32) | len_low, + advice); } COMPAT_SYSCALL_DEFINE4(x86_readahead, int, fd, unsigned int, off_lo, @@ -218,8 +218,8 @@ COMPAT_SYSCALL_DEFINE6(x86_sync_file_range, int, fd, unsigned int, off_low, COMPAT_SYSCALL_DEFINE5(x86_fadvise64, int, fd, unsigned int, offset_lo, unsigned int, offset_hi, size_t, len, int, advice) { - return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo, - len, advice); + return ksys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo, + len, advice); } COMPAT_SYSCALL_DEFINE6(x86_fallocate, int, fd, int, mode, diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c index 74afbf02d07e..8201748da05b 100644 --- a/arch/xtensa/kernel/syscall.c +++ b/arch/xtensa/kernel/syscall.c @@ -55,7 +55,7 @@ asmlinkage long xtensa_shmat(int shmid, char __user *shmaddr, int shmflg) asmlinkage long xtensa_fadvise64_64(int fd, int advice, unsigned long long offset, unsigned long long len) { - return sys_fadvise64_64(fd, offset, len, advice); + return ksys_fadvise64_64(fd, offset, len, advice); } #ifdef CONFIG_MMU diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 613b8127834d..466d408deefd 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -970,6 +970,15 @@ ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count, ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf, size_t count, loff_t pos); int ksys_fallocate(int fd, int mode, loff_t offset, loff_t len); +#ifdef CONFIG_ADVISE_SYSCALLS +int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice); +#else +static inline int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, + int advice) +{ + return -EINVAL; +} +#endif /* * The following kernel syscall equivalents are just wrappers to fs-internal diff --git a/mm/fadvise.c b/mm/fadvise.c index 767887f5f3bf..afa41491d324 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -26,7 +26,8 @@ * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could * deactivate the pages and clear PG_Referenced. */ -SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) + +int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) { struct fd f = fdget(fd); struct inode *inode; @@ -185,11 +186,16 @@ out: return ret; } +SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) +{ + return ksys_fadvise64_64(fd, offset, len, advice); +} + #ifdef __ARCH_WANT_SYS_FADVISE64 SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice) { - return sys_fadvise64_64(fd, offset, len, advice); + return ksys_fadvise64_64(fd, offset, len, advice); } #endif -- cgit v1.2.3 From a90f590a1bee36fc2129cfb38ceec24a555bb12d Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:46 +0100 Subject: mm: add ksys_mmap_pgoff() helper; remove in-kernel calls to sys_mmap_pgoff() Using this helper allows us to avoid the in-kernel calls to the sys_mmap_pgoff() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_mmap_pgoff(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Andrew Morton Cc: linux-mm@kvack.org Signed-off-by: Dominik Brodowski --- arch/alpha/kernel/osf_sys.c | 2 +- arch/arm64/kernel/sys.c | 2 +- arch/ia64/kernel/sys_ia64.c | 4 ++-- arch/m68k/kernel/sys_m68k.c | 2 +- arch/microblaze/kernel/sys_microblaze.c | 6 +++--- arch/mips/kernel/linux32.c | 4 ++-- arch/mips/kernel/syscall.c | 6 ++++-- arch/parisc/kernel/sys_parisc.c | 6 +++--- arch/powerpc/kernel/syscalls.c | 2 +- arch/riscv/kernel/sys_riscv.c | 4 ++-- arch/s390/kernel/compat_linux.c | 6 +++--- arch/s390/kernel/sys_s390.c | 2 +- arch/sh/kernel/sys_sh.c | 4 ++-- arch/sparc/kernel/sys_sparc_32.c | 6 +++--- arch/sparc/kernel/sys_sparc_64.c | 2 +- arch/um/kernel/syscall.c | 2 +- arch/x86/ia32/sys_ia32.c | 2 +- arch/x86/kernel/sys_x86_64.c | 2 +- include/linux/syscalls.h | 3 +++ mm/mmap.c | 17 ++++++++++++----- mm/nommu.c | 17 ++++++++++++----- 21 files changed, 60 insertions(+), 41 deletions(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index fa1a392ca9a2..89faa6f4de47 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -189,7 +189,7 @@ SYSCALL_DEFINE6(osf_mmap, unsigned long, addr, unsigned long, len, goto out; if (off & ~PAGE_MASK) goto out; - ret = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); + ret = ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); out: return ret; } diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index 26fe8ea93ea2..72981bae10eb 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -34,7 +34,7 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len, if (offset_in_page(off) != 0) return -EINVAL; - return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); } SYSCALL_DEFINE1(arm64_personality, unsigned int, personality) diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index 085adfcc74a4..9ebe1d633abc 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -139,7 +139,7 @@ int ia64_mmap_check(unsigned long addr, unsigned long len, asmlinkage unsigned long sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff) { - addr = sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); + addr = ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); if (!IS_ERR((void *) addr)) force_successful_syscall_return(); return addr; @@ -151,7 +151,7 @@ sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, lo if (offset_in_page(off) != 0) return -EINVAL; - addr = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); + addr = ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); if (!IS_ERR((void *) addr)) force_successful_syscall_return(); return addr; diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c index 27e10af5153a..6363ec83a290 100644 --- a/arch/m68k/kernel/sys_m68k.c +++ b/arch/m68k/kernel/sys_m68k.c @@ -46,7 +46,7 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, * so we need to shift the argument down by 1; m68k mmap64(3) * (in libc) expects the last argument of mmap2 in 4Kb units. */ - return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); } /* Convert virtual (user) address VADDR to physical address PADDR */ diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c index f1e1f666ddde..ed9f34da1a2a 100644 --- a/arch/microblaze/kernel/sys_microblaze.c +++ b/arch/microblaze/kernel/sys_microblaze.c @@ -40,7 +40,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, if (pgoff & ~PAGE_MASK) return -EINVAL; - return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff >> PAGE_SHIFT); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff >> PAGE_SHIFT); } SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, @@ -50,6 +50,6 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, if (pgoff & (~PAGE_MASK >> 12)) return -EINVAL; - return sys_mmap_pgoff(addr, len, prot, flags, fd, - pgoff >> (PAGE_SHIFT - 12)); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, + pgoff >> (PAGE_SHIFT - 12)); } diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 1c5785e72db4..0571ab7b68b0 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -67,8 +67,8 @@ SYSCALL_DEFINE6(32_mmap2, unsigned long, addr, unsigned long, len, { if (pgoff & (~PAGE_MASK >> 12)) return -EINVAL; - return sys_mmap_pgoff(addr, len, prot, flags, fd, - pgoff >> (PAGE_SHIFT-12)); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, + pgoff >> (PAGE_SHIFT-12)); } #define RLIM_INFINITY32 0x7fffffff diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 58c6f634b550..69c17b549fd3 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -63,7 +63,8 @@ SYSCALL_DEFINE6(mips_mmap, unsigned long, addr, unsigned long, len, { if (offset & ~PAGE_MASK) return -EINVAL; - return sys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, + offset >> PAGE_SHIFT); } SYSCALL_DEFINE6(mips_mmap2, unsigned long, addr, unsigned long, len, @@ -73,7 +74,8 @@ SYSCALL_DEFINE6(mips_mmap2, unsigned long, addr, unsigned long, len, if (pgoff & (~PAGE_MASK >> 12)) return -EINVAL; - return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff >> (PAGE_SHIFT-12)); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, + pgoff >> (PAGE_SHIFT - 12)); } save_static_function(sys_fork); diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index f36ab1f09595..080d566654ea 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -270,8 +270,8 @@ asmlinkage unsigned long sys_mmap2(unsigned long addr, unsigned long len, { /* Make sure the shift for mmap2 is constant (12), no matter what PAGE_SIZE we have. */ - return sys_mmap_pgoff(addr, len, prot, flags, fd, - pgoff >> (PAGE_SHIFT - 12)); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, + pgoff >> (PAGE_SHIFT - 12)); } asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len, @@ -279,7 +279,7 @@ asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len, unsigned long offset) { if (!(offset & ~PAGE_MASK)) { - return sys_mmap_pgoff(addr, len, prot, flags, fd, + return ksys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); } else { return -EINVAL; diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index ecb981eea74b..1ef3b80b62a6 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -57,7 +57,7 @@ static inline long do_mmap2(unsigned long addr, size_t len, off >>= shift; } - ret = sys_mmap_pgoff(addr, len, prot, flags, fd, off); + ret = ksys_mmap_pgoff(addr, len, prot, flags, fd, off); out: return ret; } diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 79c78668258e..f7181ed8aafc 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -24,8 +24,8 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, { if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) return -EINVAL; - return sys_mmap_pgoff(addr, len, prot, flags, fd, - offset >> (PAGE_SHIFT - page_shift_offset)); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, + offset >> (PAGE_SHIFT - page_shift_offset)); } #ifdef CONFIG_64BIT diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 9bb897e443a6..da5ef7718254 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -442,8 +442,8 @@ COMPAT_SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct_emu31 __user *, arg return -EFAULT; if (a.offset & ~PAGE_MASK) return -EINVAL; - return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, - a.offset >> PAGE_SHIFT); + return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, + a.offset >> PAGE_SHIFT); } COMPAT_SYSCALL_DEFINE1(s390_mmap2, struct mmap_arg_struct_emu31 __user *, arg) @@ -452,7 +452,7 @@ COMPAT_SYSCALL_DEFINE1(s390_mmap2, struct mmap_arg_struct_emu31 __user *, arg) if (copy_from_user(&a, arg, sizeof(a))) return -EFAULT; - return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); + return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); } COMPAT_SYSCALL_DEFINE3(s390_read, unsigned int, fd, char __user *, buf, compat_size_t, count) diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index 0090037ab148..31cefe0c28c0 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -53,7 +53,7 @@ SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg) if (copy_from_user(&a, arg, sizeof(a))) goto out; - error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); + error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); out: return error; } diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c index 724911c59e7d..f8afc014e084 100644 --- a/arch/sh/kernel/sys_sh.c +++ b/arch/sh/kernel/sys_sh.c @@ -35,7 +35,7 @@ asmlinkage int old_mmap(unsigned long addr, unsigned long len, { if (off & ~PAGE_MASK) return -EINVAL; - return sys_mmap_pgoff(addr, len, prot, flags, fd, off>>PAGE_SHIFT); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, off>>PAGE_SHIFT); } asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, @@ -51,7 +51,7 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, pgoff >>= PAGE_SHIFT - 12; - return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); } /* sys_cacheflush -- flush (part of) the processor cache. */ diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index 990703b7cf4d..d980da4ffd7b 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -104,8 +104,8 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, { /* Make sure the shift for mmap2 is constant (12), no matter what PAGE_SIZE we have. */ - return sys_mmap_pgoff(addr, len, prot, flags, fd, - pgoff >> (PAGE_SHIFT - 12)); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, + pgoff >> (PAGE_SHIFT - 12)); } asmlinkage long sys_mmap(unsigned long addr, unsigned long len, @@ -113,7 +113,7 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long off) { /* no alignment check? */ - return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); } long sparc_remap_file_pages(unsigned long start, unsigned long size, diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 55416db482ad..ebb84dc8a5a7 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -458,7 +458,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, goto out; if (off & ~PAGE_MASK) goto out; - retval = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); + retval = ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); out: return retval; } diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index 6258676bed85..35f7047bdebc 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -22,7 +22,7 @@ long old_mmap(unsigned long addr, unsigned long len, if (offset & ~PAGE_MASK) goto out; - err = sys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); + err = ksys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); out: return err; } diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 401bd8ec9cf0..bff71b9ae3f5 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -166,7 +166,7 @@ COMPAT_SYSCALL_DEFINE1(x86_mmap, struct mmap_arg_struct32 __user *, arg) if (a.offset & ~PAGE_MASK) return -EINVAL; - return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, + return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset>>PAGE_SHIFT); } diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 676774b9bb8d..a3f15ed545b5 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -97,7 +97,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, if (off & ~PAGE_MASK) goto out; - error = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); + error = ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); out: return error; } diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 466d408deefd..ec866c959e7d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -979,6 +979,9 @@ static inline int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, return -EINVAL; } #endif +unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff); /* * The following kernel syscall equivalents are just wrappers to fs-internal diff --git a/mm/mmap.c b/mm/mmap.c index 9efdc021ad22..aa0dc8231c0d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1488,9 +1488,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr, return addr; } -SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, - unsigned long, prot, unsigned long, flags, - unsigned long, fd, unsigned long, pgoff) +unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) { struct file *file = NULL; unsigned long retval; @@ -1537,6 +1537,13 @@ out_fput: return retval; } +SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, unsigned long, pgoff) +{ + return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); +} + #ifdef __ARCH_WANT_SYS_OLD_MMAP struct mmap_arg_struct { unsigned long addr; @@ -1556,8 +1563,8 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) if (offset_in_page(a.offset)) return -EINVAL; - return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, - a.offset >> PAGE_SHIFT); + return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, + a.offset >> PAGE_SHIFT); } #endif /* __ARCH_WANT_SYS_OLD_MMAP */ diff --git a/mm/nommu.c b/mm/nommu.c index ebb6e618dade..cad329629530 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1423,9 +1423,9 @@ error_getting_region: return -ENOMEM; } -SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, - unsigned long, prot, unsigned long, flags, - unsigned long, fd, unsigned long, pgoff) +unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) { struct file *file = NULL; unsigned long retval = -EBADF; @@ -1447,6 +1447,13 @@ out: return retval; } +SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, unsigned long, pgoff) +{ + return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); +} + #ifdef __ARCH_WANT_SYS_OLD_MMAP struct mmap_arg_struct { unsigned long addr; @@ -1466,8 +1473,8 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) if (offset_in_page(a.offset)) return -EINVAL; - return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, - a.offset >> PAGE_SHIFT); + return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, + a.offset >> PAGE_SHIFT); } #endif /* __ARCH_WANT_SYS_OLD_MMAP */ -- cgit v1.2.3 From c7b95d5156a9ee70f800bd2e47a9eba677be73e1 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 19 Mar 2018 17:51:36 +0100 Subject: mm: add ksys_readahead() helper; remove in-kernel calls to sys_readahead() Using this helper allows us to avoid the in-kernel calls to the sys_readahead() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_readahead(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Andrew Morton Cc: linux-mm@kvack.org Signed-off-by: Dominik Brodowski --- arch/mips/kernel/linux32.c | 2 +- arch/parisc/kernel/sys_parisc.c | 2 +- arch/powerpc/kernel/sys_ppc32.c | 2 +- arch/s390/kernel/compat_linux.c | 2 +- arch/sparc/kernel/sys_sparc32.c | 2 +- arch/x86/ia32/sys_ia32.c | 2 +- include/linux/syscalls.h | 1 + mm/readahead.c | 7 ++++++- 8 files changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 0571ab7b68b0..318f1c05c5b3 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -131,7 +131,7 @@ SYSCALL_DEFINE1(32_personality, unsigned long, personality) asmlinkage ssize_t sys32_readahead(int fd, u32 pad0, u64 a2, u64 a3, size_t count) { - return sys_readahead(fd, merge_64(a2, a3), count); + return ksys_readahead(fd, merge_64(a2, a3), count); } asmlinkage long sys32_sync_file_range(int fd, int __pad, diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 080d566654ea..8c99ebbe2bac 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -345,7 +345,7 @@ asmlinkage ssize_t parisc_pwrite64(unsigned int fd, const char __user *buf, asmlinkage ssize_t parisc_readahead(int fd, unsigned int high, unsigned int low, size_t count) { - return sys_readahead(fd, (loff_t)high << 32 | low, count); + return ksys_readahead(fd, (loff_t)high << 32 | low, count); } asmlinkage long parisc_fadvise64_64(int fd, diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 0b95fa13307f..c11c73373691 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -88,7 +88,7 @@ compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, com compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offhi, u32 offlo, u32 count) { - return sys_readahead(fd, ((loff_t)offhi << 32) | offlo, count); + return ksys_readahead(fd, ((loff_t)offhi << 32) | offlo, count); } asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4, diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index da5ef7718254..8ac38d51ed7d 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -328,7 +328,7 @@ COMPAT_SYSCALL_DEFINE5(s390_pwrite64, unsigned int, fd, const char __user *, ubu COMPAT_SYSCALL_DEFINE4(s390_readahead, int, fd, u32, high, u32, low, s32, count) { - return sys_readahead(fd, (unsigned long)high << 32 | low, count); + return ksys_readahead(fd, (unsigned long)high << 32 | low, count); } struct stat64_emu31 { diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index 4da66aed50b4..f166e5bbf506 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -217,7 +217,7 @@ asmlinkage long compat_sys_readahead(int fd, unsigned long offlo, compat_size_t count) { - return sys_readahead(fd, (offhi << 32) | offlo, count); + return ksys_readahead(fd, (offhi << 32) | offlo, count); } long compat_sys_fadvise64(int fd, diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index bff71b9ae3f5..bd8a7020b9a7 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -203,7 +203,7 @@ COMPAT_SYSCALL_DEFINE6(x86_fadvise64_64, int, fd, __u32, offset_low, COMPAT_SYSCALL_DEFINE4(x86_readahead, int, fd, unsigned int, off_lo, unsigned int, off_hi, size_t, count) { - return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count); + return ksys_readahead(fd, ((u64)off_hi << 32) | off_lo, count); } COMPAT_SYSCALL_DEFINE6(x86_sync_file_range, int, fd, unsigned int, off_low, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index ec866c959e7d..815fbdd9cca1 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -982,6 +982,7 @@ static inline int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff); +ssize_t ksys_readahead(int fd, loff_t offset, size_t count); /* * The following kernel syscall equivalents are just wrappers to fs-internal diff --git a/mm/readahead.c b/mm/readahead.c index c4ca70239233..4d57b4644f98 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -573,7 +573,7 @@ do_readahead(struct address_space *mapping, struct file *filp, return force_page_cache_readahead(mapping, filp, index, nr); } -SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count) +ssize_t ksys_readahead(int fd, loff_t offset, size_t count) { ssize_t ret; struct fd f; @@ -592,3 +592,8 @@ SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count) } return ret; } + +SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count) +{ + return ksys_readahead(fd, offset, count); +} -- cgit v1.2.3 From 66f4e88cc69da7d9ec4d68cf370cc69742d4af81 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:38 +0100 Subject: x86/ioport: add ksys_ioperm() helper; remove in-kernel calls to sys_ioperm() Using this helper allows us to avoid the in-kernel calls to the sys_ioperm() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_ioperm(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Ingo Molnar Cc: Jiri Slaby Cc: x86@kernel.org Acked-by: Greg Kroah-Hartman Reviewed-by: Thomas Gleixner Signed-off-by: Dominik Brodowski --- arch/x86/include/asm/syscalls.h | 1 + arch/x86/kernel/ioport.c | 7 ++++++- drivers/tty/vt/vt_ioctl.c | 6 +++--- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index bad25bb80679..1c0bebbd039e 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -17,6 +17,7 @@ /* Common in X86_32 and X86_64 */ /* kernel/ioport.c */ +long ksys_ioperm(unsigned long from, unsigned long num, int turn_on); asmlinkage long sys_ioperm(unsigned long, unsigned long, int); asmlinkage long sys_iopl(unsigned int); diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 38deafebb21b..0fe1c8782208 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -23,7 +23,7 @@ /* * this changes the io permissions bitmap in the current task. */ -SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) +long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) { struct thread_struct *t = ¤t->thread; struct tss_struct *tss; @@ -96,6 +96,11 @@ SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) return 0; } +SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) +{ + return ksys_ioperm(from, num, turn_on); +} + /* * sys_iopl has to be used when you want to access the IO ports * beyond the 0x3ff range: to get the full 65536 ports bitmapped diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index d61be307256a..a78ad10a119b 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -57,7 +57,7 @@ extern struct tty_driver *console_driver; */ #ifdef CONFIG_X86 -#include +#include #endif static void complete_change_console(struct vc_data *vc); @@ -420,12 +420,12 @@ int vt_ioctl(struct tty_struct *tty, ret = -EINVAL; break; } - ret = sys_ioperm(arg, 1, (cmd == KDADDIO)) ? -ENXIO : 0; + ret = ksys_ioperm(arg, 1, (cmd == KDADDIO)) ? -ENXIO : 0; break; case KDENABIO: case KDDISABIO: - ret = sys_ioperm(GPFIRST, GPNUM, + ret = ksys_ioperm(GPFIRST, GPNUM, (cmd == KDENABIO)) ? -ENXIO : 0; break; #endif -- cgit v1.2.3 From 025bd3905acab2cdfeb1a521491bee5e33a8fc90 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 22 Mar 2018 08:29:36 +0100 Subject: x86: fix sys_sigreturn() return type to be long, not unsigned long Same as with other system calls, sys_sigreturn() should return a value of type long, not unsigned long. This also matches the behaviour for IA32_EMULATION, see sys32_sigreturn() in arch/x86/ia32/ia32_signal.c . Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Slaby Cc: x86@kernel.org Cc: Michael Tautschnig Reviewed-by: Thomas Gleixner Signed-off-by: Dominik Brodowski --- arch/x86/include/asm/syscalls.h | 2 +- arch/x86/kernel/signal.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 1c0bebbd039e..ae6e05fdc24b 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -35,7 +35,7 @@ asmlinkage long sys_get_thread_area(struct user_desc __user *); #ifdef CONFIG_X86_32 /* kernel/signal.c */ -asmlinkage unsigned long sys_sigreturn(void); +asmlinkage long sys_sigreturn(void); /* kernel/vm86_32.c */ struct vm86_struct; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 4cdc0b27ec82..83a26726b689 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -601,7 +601,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig, * Do a signal return; undo the signal stack. */ #ifdef CONFIG_X86_32 -asmlinkage unsigned long sys_sigreturn(void) +asmlinkage long sys_sigreturn(void) { struct pt_regs *regs = current_pt_regs(); struct sigframe __user *frame; -- cgit v1.2.3 From 4c8ca51af730cce5cc5fdc7605f8aa20575c0731 Mon Sep 17 00:00:00 2001 From: "Tautschnig, Michael" Date: Wed, 14 Mar 2018 09:41:42 +0000 Subject: x86/sigreturn: use SYSCALL_DEFINE0 All definitions of syscalls in x86 except for those patched here have already been using the appropriate SYSCALL_DEFINE*. Signed-off-by: Michael Tautschnig Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Jaswinder Singh Cc: Andi Kleen Cc: x86@kernel.org Reviewed-by: Thomas Gleixner Signed-off-by: Dominik Brodowski --- arch/x86/kernel/signal.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 83a26726b689..da270b95fe4d 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -601,7 +602,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig, * Do a signal return; undo the signal stack. */ #ifdef CONFIG_X86_32 -asmlinkage long sys_sigreturn(void) +SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = current_pt_regs(); struct sigframe __user *frame; @@ -633,7 +634,7 @@ badframe: } #endif /* CONFIG_X86_32 */ -asmlinkage long sys_rt_sigreturn(void) +SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame; -- cgit v1.2.3 From 1bec510a9ebf00baa1aa8751e4a5d88b54efb748 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 22 Mar 2018 17:46:57 +0100 Subject: kexec: move sys_kexec_load() prototype to syscalls.h As the syscall function should only be called from the system call table but not from elsewhere in the kernel, move the prototype for sys_kexec_load() to include/syscall.h. Cc: Eric Biederman Cc: kexec@lists.infradead.org Signed-off-by: Dominik Brodowski --- include/linux/kexec.h | 4 ---- include/linux/syscalls.h | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index f16f6ceb3875..0ebcbeb21056 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -223,10 +223,6 @@ struct kimage { extern void machine_kexec(struct kimage *image); extern int machine_kexec_prepare(struct kimage *image); extern void machine_kexec_cleanup(struct kimage *image); -extern asmlinkage long sys_kexec_load(unsigned long entry, - unsigned long nr_segments, - struct kexec_segment __user *segments, - unsigned long flags); extern int kernel_kexec(void); extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 815fbdd9cca1..8330f046541e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -936,6 +936,10 @@ asmlinkage long sys_pkey_mprotect(unsigned long start, size_t len, unsigned long prot, int pkey); asmlinkage long sys_pkey_alloc(unsigned long flags, unsigned long init_val); asmlinkage long sys_pkey_free(int pkey); +asmlinkage long sys_kexec_load(unsigned long entry, + unsigned long nr_segments, + struct kexec_segment __user *segments, + unsigned long flags); asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, unsigned mask, struct statx __user *buffer); -- cgit v1.2.3 From 3c1c456f9b96c208c9dc9ad7aa3be36b8d488504 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 25 Mar 2018 21:50:11 +0200 Subject: syscalls: sort syscall prototypes in include/linux/syscalls.h Shuffle the syscall prototypes in include/linux/syscalls.h around so that they are kept in the same order as in include/uapi/asm-generic/unistd.h. The individual entries are kept the same, and neither modified to bring them in line with kernel coding style nor wrapped in proper ifdefs. Cc: Arnd Bergmann Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 1226 +++++++++++++++++++++++++--------------------- 1 file changed, 678 insertions(+), 548 deletions(-) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 8330f046541e..1f223b7cf16d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -241,223 +241,28 @@ static inline void addr_limit_user_check(void) #endif } -asmlinkage long sys_time(time_t __user *tloc); -asmlinkage long sys_stime(time_t __user *tptr); -asmlinkage long sys_gettimeofday(struct timeval __user *tv, - struct timezone __user *tz); -asmlinkage long sys_settimeofday(struct timeval __user *tv, - struct timezone __user *tz); -asmlinkage long sys_adjtimex(struct timex __user *txc_p); - -asmlinkage long sys_times(struct tms __user *tbuf); - -asmlinkage long sys_gettid(void); -asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp); -asmlinkage long sys_alarm(unsigned int seconds); -asmlinkage long sys_getpid(void); -asmlinkage long sys_getppid(void); -asmlinkage long sys_getuid(void); -asmlinkage long sys_geteuid(void); -asmlinkage long sys_getgid(void); -asmlinkage long sys_getegid(void); -asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid); -asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid); -asmlinkage long sys_getpgid(pid_t pid); -asmlinkage long sys_getpgrp(void); -asmlinkage long sys_getsid(pid_t pid); -asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist); - -asmlinkage long sys_setregid(gid_t rgid, gid_t egid); -asmlinkage long sys_setgid(gid_t gid); -asmlinkage long sys_setreuid(uid_t ruid, uid_t euid); -asmlinkage long sys_setuid(uid_t uid); -asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid); -asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid); -asmlinkage long sys_setfsuid(uid_t uid); -asmlinkage long sys_setfsgid(gid_t gid); -asmlinkage long sys_setpgid(pid_t pid, pid_t pgid); -asmlinkage long sys_setsid(void); -asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist); - -asmlinkage long sys_acct(const char __user *name); -asmlinkage long sys_capget(cap_user_header_t header, - cap_user_data_t dataptr); -asmlinkage long sys_capset(cap_user_header_t header, - const cap_user_data_t data); -asmlinkage long sys_personality(unsigned int personality); - -asmlinkage long sys_sigpending(old_sigset_t __user *uset); -asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set, - old_sigset_t __user *oset); -asmlinkage long sys_sigaltstack(const struct sigaltstack __user *uss, - struct sigaltstack __user *uoss); - -asmlinkage long sys_getitimer(int which, struct itimerval __user *value); -asmlinkage long sys_setitimer(int which, - struct itimerval __user *value, - struct itimerval __user *ovalue); -asmlinkage long sys_timer_create(clockid_t which_clock, - struct sigevent __user *timer_event_spec, - timer_t __user * created_timer_id); -asmlinkage long sys_timer_gettime(timer_t timer_id, - struct itimerspec __user *setting); -asmlinkage long sys_timer_getoverrun(timer_t timer_id); -asmlinkage long sys_timer_settime(timer_t timer_id, int flags, - const struct itimerspec __user *new_setting, - struct itimerspec __user *old_setting); -asmlinkage long sys_timer_delete(timer_t timer_id); -asmlinkage long sys_clock_settime(clockid_t which_clock, - const struct timespec __user *tp); -asmlinkage long sys_clock_gettime(clockid_t which_clock, - struct timespec __user *tp); -asmlinkage long sys_clock_adjtime(clockid_t which_clock, - struct timex __user *tx); -asmlinkage long sys_clock_getres(clockid_t which_clock, - struct timespec __user *tp); -asmlinkage long sys_clock_nanosleep(clockid_t which_clock, int flags, - const struct timespec __user *rqtp, - struct timespec __user *rmtp); - -asmlinkage long sys_nice(int increment); -asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, - struct sched_param __user *param); -asmlinkage long sys_sched_setparam(pid_t pid, - struct sched_param __user *param); -asmlinkage long sys_sched_setattr(pid_t pid, - struct sched_attr __user *attr, - unsigned int flags); -asmlinkage long sys_sched_getscheduler(pid_t pid); -asmlinkage long sys_sched_getparam(pid_t pid, - struct sched_param __user *param); -asmlinkage long sys_sched_getattr(pid_t pid, - struct sched_attr __user *attr, - unsigned int size, - unsigned int flags); -asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, - unsigned long __user *user_mask_ptr); -asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, - unsigned long __user *user_mask_ptr); -asmlinkage long sys_sched_yield(void); -asmlinkage long sys_sched_get_priority_max(int policy); -asmlinkage long sys_sched_get_priority_min(int policy); -asmlinkage long sys_sched_rr_get_interval(pid_t pid, - struct timespec __user *interval); -asmlinkage long sys_setpriority(int which, int who, int niceval); -asmlinkage long sys_getpriority(int which, int who); - -asmlinkage long sys_shutdown(int, int); -asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, - void __user *arg); -asmlinkage long sys_restart_syscall(void); -asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, - struct kexec_segment __user *segments, - unsigned long flags); -asmlinkage long sys_kexec_file_load(int kernel_fd, int initrd_fd, - unsigned long cmdline_len, - const char __user *cmdline_ptr, - unsigned long flags); - -asmlinkage long sys_exit(int error_code); -asmlinkage long sys_exit_group(int error_code); -asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr, - int options, struct rusage __user *ru); -asmlinkage long sys_waitid(int which, pid_t pid, - struct siginfo __user *infop, - int options, struct rusage __user *ru); -asmlinkage long sys_waitpid(pid_t pid, int __user *stat_addr, int options); -asmlinkage long sys_set_tid_address(int __user *tidptr); -asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, - struct timespec __user *utime, u32 __user *uaddr2, - u32 val3); - -asmlinkage long sys_init_module(void __user *umod, unsigned long len, - const char __user *uargs); -asmlinkage long sys_delete_module(const char __user *name_user, - unsigned int flags); - -#ifdef CONFIG_OLD_SIGSUSPEND -asmlinkage long sys_sigsuspend(old_sigset_t mask); -#endif - -#ifdef CONFIG_OLD_SIGSUSPEND3 -asmlinkage long sys_sigsuspend(int unused1, int unused2, old_sigset_t mask); -#endif - -asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize); - -#ifdef CONFIG_OLD_SIGACTION -asmlinkage long sys_sigaction(int, const struct old_sigaction __user *, - struct old_sigaction __user *); -#endif - -#ifndef CONFIG_ODD_RT_SIGACTION -asmlinkage long sys_rt_sigaction(int, - const struct sigaction __user *, - struct sigaction __user *, - size_t); -#endif -asmlinkage long sys_rt_sigprocmask(int how, sigset_t __user *set, - sigset_t __user *oset, size_t sigsetsize); -asmlinkage long sys_rt_sigpending(sigset_t __user *set, size_t sigsetsize); -asmlinkage long sys_rt_sigtimedwait(const sigset_t __user *uthese, - siginfo_t __user *uinfo, - const struct timespec __user *uts, - size_t sigsetsize); -asmlinkage long sys_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, - siginfo_t __user *uinfo); -asmlinkage long sys_kill(pid_t pid, int sig); -asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig); -asmlinkage long sys_tkill(pid_t pid, int sig); -asmlinkage long sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo); -asmlinkage long sys_sgetmask(void); -asmlinkage long sys_ssetmask(int newmask); -asmlinkage long sys_signal(int sig, __sighandler_t handler); -asmlinkage long sys_pause(void); - -asmlinkage long sys_sync(void); -asmlinkage long sys_fsync(unsigned int fd); -asmlinkage long sys_fdatasync(unsigned int fd); -asmlinkage long sys_bdflush(int func, long data); -asmlinkage long sys_mount(char __user *dev_name, char __user *dir_name, - char __user *type, unsigned long flags, - void __user *data); -asmlinkage long sys_umount(char __user *name, int flags); -asmlinkage long sys_oldumount(char __user *name); -asmlinkage long sys_truncate(const char __user *path, long length); -asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length); -asmlinkage long sys_stat(const char __user *filename, - struct __old_kernel_stat __user *statbuf); -asmlinkage long sys_statfs(const char __user * path, - struct statfs __user *buf); -asmlinkage long sys_statfs64(const char __user *path, size_t sz, - struct statfs64 __user *buf); -asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user *buf); -asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, - struct statfs64 __user *buf); -asmlinkage long sys_lstat(const char __user *filename, - struct __old_kernel_stat __user *statbuf); -asmlinkage long sys_fstat(unsigned int fd, - struct __old_kernel_stat __user *statbuf); -asmlinkage long sys_newstat(const char __user *filename, - struct stat __user *statbuf); -asmlinkage long sys_newlstat(const char __user *filename, - struct stat __user *statbuf); -asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf); -asmlinkage long sys_ustat(unsigned dev, struct ustat __user *ubuf); -#if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64) -asmlinkage long sys_stat64(const char __user *filename, - struct stat64 __user *statbuf); -asmlinkage long sys_fstat64(unsigned long fd, struct stat64 __user *statbuf); -asmlinkage long sys_lstat64(const char __user *filename, - struct stat64 __user *statbuf); -asmlinkage long sys_fstatat64(int dfd, const char __user *filename, - struct stat64 __user *statbuf, int flag); -#endif -#if BITS_PER_LONG == 32 -asmlinkage long sys_truncate64(const char __user *path, loff_t length); -asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length); -#endif +/* + * These syscall function prototypes are kept in the same order as + * include/uapi/asm-generic/unistd.h. Architecture specific entries go below, + * followed by deprecated or obsolete system calls. + * + * Please note that these prototypes here are only provided for information + * purposes, for static analysis, and for linking from the syscall table. + * These functions should not be called elsewhere from kernel code. + */ +asmlinkage long sys_io_setup(unsigned nr_reqs, aio_context_t __user *ctx); +asmlinkage long sys_io_destroy(aio_context_t ctx); +asmlinkage long sys_io_submit(aio_context_t, long, + struct iocb __user * __user *); +asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, + struct io_event __user *result); +asmlinkage long sys_io_getevents(aio_context_t ctx_id, + long min_nr, + long nr, + struct io_event __user *events, + struct timespec __user *timeout); +/* fs/xattr.c */ asmlinkage long sys_setxattr(const char __user *path, const char __user *name, const void __user *value, size_t size, int flags); asmlinkage long sys_lsetxattr(const char __user *path, const char __user *name, @@ -481,125 +286,126 @@ asmlinkage long sys_lremovexattr(const char __user *path, const char __user *name); asmlinkage long sys_fremovexattr(int fd, const char __user *name); -asmlinkage long sys_brk(unsigned long brk); -asmlinkage long sys_mprotect(unsigned long start, size_t len, - unsigned long prot); -asmlinkage long sys_mremap(unsigned long addr, - unsigned long old_len, unsigned long new_len, - unsigned long flags, unsigned long new_addr); -asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, - unsigned long prot, unsigned long pgoff, - unsigned long flags); -asmlinkage long sys_msync(unsigned long start, size_t len, int flags); -asmlinkage long sys_fadvise64(int fd, loff_t offset, size_t len, int advice); -asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice); -asmlinkage long sys_munmap(unsigned long addr, size_t len); -asmlinkage long sys_mlock(unsigned long start, size_t len); -asmlinkage long sys_munlock(unsigned long start, size_t len); -asmlinkage long sys_mlockall(int flags); -asmlinkage long sys_munlockall(void); -asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior); -asmlinkage long sys_mincore(unsigned long start, size_t len, - unsigned char __user * vec); +/* fs/dcache.c */ +asmlinkage long sys_getcwd(char __user *buf, unsigned long size); -asmlinkage long sys_pivot_root(const char __user *new_root, - const char __user *put_old); -asmlinkage long sys_chroot(const char __user *filename); -asmlinkage long sys_mknod(const char __user *filename, umode_t mode, - unsigned dev); -asmlinkage long sys_link(const char __user *oldname, - const char __user *newname); -asmlinkage long sys_symlink(const char __user *old, const char __user *new); -asmlinkage long sys_unlink(const char __user *pathname); -asmlinkage long sys_rename(const char __user *oldname, - const char __user *newname); -asmlinkage long sys_chmod(const char __user *filename, umode_t mode); -asmlinkage long sys_fchmod(unsigned int fd, umode_t mode); +/* fs/cookies.c */ +asmlinkage long sys_lookup_dcookie(u64 cookie64, char __user *buf, size_t len); + +/* fs/eventfd.c */ +asmlinkage long sys_eventfd2(unsigned int count, int flags); +/* fs/eventpoll.c */ +asmlinkage long sys_epoll_create1(int flags); +asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, + struct epoll_event __user *event); +asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, + int maxevents, int timeout, + const sigset_t __user *sigmask, + size_t sigsetsize); + +/* fs/fcntl.c */ +asmlinkage long sys_dup(unsigned int fildes); +asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags); asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg); #if BITS_PER_LONG == 32 asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg); #endif -asmlinkage long sys_pipe(int __user *fildes); -asmlinkage long sys_pipe2(int __user *fildes, int flags); -asmlinkage long sys_dup(unsigned int fildes); -asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd); -asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags); -asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on); + +/* fs/inotify_user.c */ +asmlinkage long sys_inotify_init1(int flags); +asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, + u32 mask); +asmlinkage long sys_inotify_rm_watch(int fd, __s32 wd); + +/* fs/ioctl.c */ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); + +/* fs/ioprio.c */ +asmlinkage long sys_ioprio_set(int which, int who, int ioprio); +asmlinkage long sys_ioprio_get(int which, int who); + +/* fs/locks.c */ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd); -asmlinkage long sys_io_setup(unsigned nr_reqs, aio_context_t __user *ctx); -asmlinkage long sys_io_destroy(aio_context_t ctx); -asmlinkage long sys_io_getevents(aio_context_t ctx_id, - long min_nr, - long nr, - struct io_event __user *events, - struct timespec __user *timeout); -asmlinkage long sys_io_submit(aio_context_t, long, - struct iocb __user * __user *); -asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, - struct io_event __user *result); -asmlinkage long sys_sendfile(int out_fd, int in_fd, - off_t __user *offset, size_t count); -asmlinkage long sys_sendfile64(int out_fd, int in_fd, - loff_t __user *offset, size_t count); -asmlinkage long sys_readlink(const char __user *path, - char __user *buf, int bufsiz); -asmlinkage long sys_creat(const char __user *pathname, umode_t mode); -asmlinkage long sys_open(const char __user *filename, - int flags, umode_t mode); + +/* fs/namei.c */ +asmlinkage long sys_mknodat(int dfd, const char __user * filename, umode_t mode, + unsigned dev); +asmlinkage long sys_mkdirat(int dfd, const char __user * pathname, umode_t mode); +asmlinkage long sys_unlinkat(int dfd, const char __user * pathname, int flag); +asmlinkage long sys_symlinkat(const char __user * oldname, + int newdfd, const char __user * newname); +asmlinkage long sys_linkat(int olddfd, const char __user *oldname, + int newdfd, const char __user *newname, int flags); +asmlinkage long sys_renameat(int olddfd, const char __user * oldname, + int newdfd, const char __user * newname); + +/* fs/namespace.c */ +asmlinkage long sys_umount(char __user *name, int flags); +asmlinkage long sys_mount(char __user *dev_name, char __user *dir_name, + char __user *type, unsigned long flags, + void __user *data); +asmlinkage long sys_pivot_root(const char __user *new_root, + const char __user *put_old); + +/* fs/nfsctl.c */ + +/* fs/open.c */ +asmlinkage long sys_statfs(const char __user * path, + struct statfs __user *buf); +asmlinkage long sys_statfs64(const char __user *path, size_t sz, + struct statfs64 __user *buf); +asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user *buf); +asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, + struct statfs64 __user *buf); +asmlinkage long sys_truncate(const char __user *path, long length); +asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length); +#if BITS_PER_LONG == 32 +asmlinkage long sys_truncate64(const char __user *path, loff_t length); +asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length); +#endif +asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); +asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode); +asmlinkage long sys_chdir(const char __user *filename); +asmlinkage long sys_fchdir(unsigned int fd); +asmlinkage long sys_chroot(const char __user *filename); +asmlinkage long sys_fchmod(unsigned int fd, umode_t mode); +asmlinkage long sys_fchmodat(int dfd, const char __user * filename, + umode_t mode); +asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, + gid_t group, int flag); +asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group); +asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, + umode_t mode); asmlinkage long sys_close(unsigned int fd); -asmlinkage long sys_access(const char __user *filename, int mode); asmlinkage long sys_vhangup(void); -asmlinkage long sys_chown(const char __user *filename, - uid_t user, gid_t group); -asmlinkage long sys_lchown(const char __user *filename, - uid_t user, gid_t group); -asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group); -#ifdef CONFIG_HAVE_UID16 -asmlinkage long sys_chown16(const char __user *filename, - old_uid_t user, old_gid_t group); -asmlinkage long sys_lchown16(const char __user *filename, - old_uid_t user, old_gid_t group); -asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group); -asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid); -asmlinkage long sys_setgid16(old_gid_t gid); -asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid); -asmlinkage long sys_setuid16(old_uid_t uid); -asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid); -asmlinkage long sys_getresuid16(old_uid_t __user *ruid, - old_uid_t __user *euid, old_uid_t __user *suid); -asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid); -asmlinkage long sys_getresgid16(old_gid_t __user *rgid, - old_gid_t __user *egid, old_gid_t __user *sgid); -asmlinkage long sys_setfsuid16(old_uid_t uid); -asmlinkage long sys_setfsgid16(old_gid_t gid); -asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist); -asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist); -asmlinkage long sys_getuid16(void); -asmlinkage long sys_geteuid16(void); -asmlinkage long sys_getgid16(void); -asmlinkage long sys_getegid16(void); -#endif -asmlinkage long sys_utime(char __user *filename, - struct utimbuf __user *times); -asmlinkage long sys_utimes(char __user *filename, - struct timeval __user *utimes); -asmlinkage long sys_lseek(unsigned int fd, off_t offset, - unsigned int whence); +/* fs/pipe.c */ +asmlinkage long sys_pipe2(int __user *fildes, int flags); + +/* fs/quota.c */ +asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, + qid_t id, void __user *addr); + +/* fs/readdir.c */ +asmlinkage long sys_getdents64(unsigned int fd, + struct linux_dirent64 __user *dirent, + unsigned int count); + +/* fs/read_write.c */ asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, unsigned long offset_low, loff_t __user *result, unsigned int whence); +asmlinkage long sys_lseek(unsigned int fd, off_t offset, + unsigned int whence); asmlinkage long sys_read(unsigned int fd, char __user *buf, size_t count); -asmlinkage long sys_readahead(int fd, loff_t offset, size_t count); +asmlinkage long sys_write(unsigned int fd, const char __user *buf, + size_t count); asmlinkage long sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen); -asmlinkage long sys_write(unsigned int fd, const char __user *buf, - size_t count); asmlinkage long sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen); @@ -609,264 +415,314 @@ asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf, size_t count, loff_t pos); asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h); -asmlinkage long sys_preadv2(unsigned long fd, const struct iovec __user *vec, - unsigned long vlen, unsigned long pos_l, unsigned long pos_h, - rwf_t flags); asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h); -asmlinkage long sys_pwritev2(unsigned long fd, const struct iovec __user *vec, - unsigned long vlen, unsigned long pos_l, unsigned long pos_h, - rwf_t flags); -asmlinkage long sys_getcwd(char __user *buf, unsigned long size); -asmlinkage long sys_mkdir(const char __user *pathname, umode_t mode); -asmlinkage long sys_chdir(const char __user *filename); -asmlinkage long sys_fchdir(unsigned int fd); -asmlinkage long sys_rmdir(const char __user *pathname); -asmlinkage long sys_lookup_dcookie(u64 cookie64, char __user *buf, size_t len); -asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, - qid_t id, void __user *addr); -asmlinkage long sys_getdents(unsigned int fd, - struct linux_dirent __user *dirent, - unsigned int count); -asmlinkage long sys_getdents64(unsigned int fd, - struct linux_dirent64 __user *dirent, - unsigned int count); -asmlinkage long sys_setsockopt(int fd, int level, int optname, - char __user *optval, int optlen); -asmlinkage long sys_getsockopt(int fd, int level, int optname, - char __user *optval, int __user *optlen); -asmlinkage long sys_bind(int, struct sockaddr __user *, int); -asmlinkage long sys_connect(int, struct sockaddr __user *, int); -asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *); -asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int); -asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *); -asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *); -asmlinkage long sys_send(int, void __user *, size_t, unsigned); -asmlinkage long sys_sendto(int, void __user *, size_t, unsigned, - struct sockaddr __user *, int); -asmlinkage long sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags); -asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg, - unsigned int vlen, unsigned flags); -asmlinkage long sys_recv(int, void __user *, size_t, unsigned); -asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned, - struct sockaddr __user *, int __user *); -asmlinkage long sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags); -asmlinkage long sys_recvmmsg(int fd, struct mmsghdr __user *msg, - unsigned int vlen, unsigned flags, - struct timespec __user *timeout); -asmlinkage long sys_socket(int, int, int); -asmlinkage long sys_socketpair(int, int, int, int __user *); -asmlinkage long sys_socketcall(int call, unsigned long __user *args); -asmlinkage long sys_listen(int, int); -asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, - int timeout); -asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, - fd_set __user *exp, struct timeval __user *tvp); -asmlinkage long sys_old_select(struct sel_arg_struct __user *arg); -asmlinkage long sys_epoll_create(int size); -asmlinkage long sys_epoll_create1(int flags); -asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, - struct epoll_event __user *event); -asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events, - int maxevents, int timeout); -asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, - int maxevents, int timeout, - const sigset_t __user *sigmask, +/* fs/sendfile.c */ +asmlinkage long sys_sendfile64(int out_fd, int in_fd, + loff_t __user *offset, size_t count); + +/* fs/select.c */ +asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *, + fd_set __user *, struct timespec __user *, + void __user *); +asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int, + struct timespec __user *, const sigset_t __user *, + size_t); + +/* fs/signalfd.c */ +asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask, size_t sizemask, int flags); + +/* fs/splice.c */ +asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, + unsigned long nr_segs, unsigned int flags); +asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, + int fd_out, loff_t __user *off_out, + size_t len, unsigned int flags); +asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags); + +/* fs/stat.c */ +asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *buf, + int bufsiz); +asmlinkage long sys_newfstatat(int dfd, const char __user *filename, + struct stat __user *statbuf, int flag); +asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf); +#if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64) +asmlinkage long sys_fstat64(unsigned long fd, struct stat64 __user *statbuf); +asmlinkage long sys_fstatat64(int dfd, const char __user *filename, + struct stat64 __user *statbuf, int flag); +#endif + +/* fs/sync.c */ +asmlinkage long sys_sync(void); +asmlinkage long sys_fsync(unsigned int fd); +asmlinkage long sys_fdatasync(unsigned int fd); +asmlinkage long sys_sync_file_range2(int fd, unsigned int flags, + loff_t offset, loff_t nbytes); +asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, + unsigned int flags); + +/* fs/timerfd.c */ +asmlinkage long sys_timerfd_create(int clockid, int flags); +asmlinkage long sys_timerfd_settime(int ufd, int flags, + const struct itimerspec __user *utmr, + struct itimerspec __user *otmr); +asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr); + +/* fs/utimes.c */ +asmlinkage long sys_utimensat(int dfd, const char __user *filename, + struct timespec __user *utimes, int flags); + +/* kernel/acct.c */ +asmlinkage long sys_acct(const char __user *name); + +/* kernel/capability.c */ +asmlinkage long sys_capget(cap_user_header_t header, + cap_user_data_t dataptr); +asmlinkage long sys_capset(cap_user_header_t header, + const cap_user_data_t data); + +/* kernel/exec_domain.c */ +asmlinkage long sys_personality(unsigned int personality); + +/* kernel/exit.c */ +asmlinkage long sys_exit(int error_code); +asmlinkage long sys_exit_group(int error_code); +asmlinkage long sys_waitid(int which, pid_t pid, + struct siginfo __user *infop, + int options, struct rusage __user *ru); + +/* kernel/fork.c */ +asmlinkage long sys_set_tid_address(int __user *tidptr); +asmlinkage long sys_unshare(unsigned long unshare_flags); + +/* kernel/futex.c */ +asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, + struct timespec __user *utime, u32 __user *uaddr2, + u32 val3); +asmlinkage long sys_get_robust_list(int pid, + struct robust_list_head __user * __user *head_ptr, + size_t __user *len_ptr); +asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, + size_t len); + +/* kernel/hrtimer.c */ +asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp); + +/* kernel/itimer.c */ +asmlinkage long sys_getitimer(int which, struct itimerval __user *value); +asmlinkage long sys_setitimer(int which, + struct itimerval __user *value, + struct itimerval __user *ovalue); + +/* kernel/kexec.c */ +asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, + struct kexec_segment __user *segments, + unsigned long flags); + +/* kernel/module.c */ +asmlinkage long sys_init_module(void __user *umod, unsigned long len, + const char __user *uargs); +asmlinkage long sys_delete_module(const char __user *name_user, + unsigned int flags); + +/* kernel/posix-timers.c */ +asmlinkage long sys_timer_create(clockid_t which_clock, + struct sigevent __user *timer_event_spec, + timer_t __user * created_timer_id); +asmlinkage long sys_timer_gettime(timer_t timer_id, + struct itimerspec __user *setting); +asmlinkage long sys_timer_getoverrun(timer_t timer_id); +asmlinkage long sys_timer_settime(timer_t timer_id, int flags, + const struct itimerspec __user *new_setting, + struct itimerspec __user *old_setting); +asmlinkage long sys_timer_delete(timer_t timer_id); +asmlinkage long sys_clock_settime(clockid_t which_clock, + const struct timespec __user *tp); +asmlinkage long sys_clock_gettime(clockid_t which_clock, + struct timespec __user *tp); +asmlinkage long sys_clock_getres(clockid_t which_clock, + struct timespec __user *tp); +asmlinkage long sys_clock_nanosleep(clockid_t which_clock, int flags, + const struct timespec __user *rqtp, + struct timespec __user *rmtp); + +/* kernel/printk.c */ +asmlinkage long sys_syslog(int type, char __user *buf, int len); + +/* kernel/ptrace.c */ +asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, + unsigned long data); +/* kernel/sched/core.c */ + +asmlinkage long sys_sched_setparam(pid_t pid, + struct sched_param __user *param); +asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, + struct sched_param __user *param); +asmlinkage long sys_sched_getscheduler(pid_t pid); +asmlinkage long sys_sched_getparam(pid_t pid, + struct sched_param __user *param); +asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, + unsigned long __user *user_mask_ptr); +asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, + unsigned long __user *user_mask_ptr); +asmlinkage long sys_sched_yield(void); +asmlinkage long sys_sched_get_priority_max(int policy); +asmlinkage long sys_sched_get_priority_min(int policy); +asmlinkage long sys_sched_rr_get_interval(pid_t pid, + struct timespec __user *interval); + +/* kernel/signal.c */ +asmlinkage long sys_restart_syscall(void); +asmlinkage long sys_kill(pid_t pid, int sig); +asmlinkage long sys_tkill(pid_t pid, int sig); +asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig); +asmlinkage long sys_sigaltstack(const struct sigaltstack __user *uss, + struct sigaltstack __user *uoss); +asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize); +#ifndef CONFIG_ODD_RT_SIGACTION +asmlinkage long sys_rt_sigaction(int, + const struct sigaction __user *, + struct sigaction __user *, + size_t); +#endif +asmlinkage long sys_rt_sigprocmask(int how, sigset_t __user *set, + sigset_t __user *oset, size_t sigsetsize); +asmlinkage long sys_rt_sigpending(sigset_t __user *set, size_t sigsetsize); +asmlinkage long sys_rt_sigtimedwait(const sigset_t __user *uthese, + siginfo_t __user *uinfo, + const struct timespec __user *uts, size_t sigsetsize); -asmlinkage long sys_gethostname(char __user *name, int len); +asmlinkage long sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo); + +/* kernel/sys.c */ +asmlinkage long sys_setpriority(int which, int who, int niceval); +asmlinkage long sys_getpriority(int which, int who); +asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, + void __user *arg); +asmlinkage long sys_setregid(gid_t rgid, gid_t egid); +asmlinkage long sys_setgid(gid_t gid); +asmlinkage long sys_setreuid(uid_t ruid, uid_t euid); +asmlinkage long sys_setuid(uid_t uid); +asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid); +asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid); +asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid); +asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid); +asmlinkage long sys_setfsuid(uid_t uid); +asmlinkage long sys_setfsgid(gid_t gid); +asmlinkage long sys_times(struct tms __user *tbuf); +asmlinkage long sys_setpgid(pid_t pid, pid_t pgid); +asmlinkage long sys_getpgid(pid_t pid); +asmlinkage long sys_getsid(pid_t pid); +asmlinkage long sys_setsid(void); +asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist); +asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist); +asmlinkage long sys_newuname(struct new_utsname __user *name); asmlinkage long sys_sethostname(char __user *name, int len); asmlinkage long sys_setdomainname(char __user *name, int len); -asmlinkage long sys_newuname(struct new_utsname __user *name); -asmlinkage long sys_uname(struct old_utsname __user *); -asmlinkage long sys_olduname(struct oldold_utsname __user *); - asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim); -#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT -asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim); -#endif asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim); -asmlinkage long sys_prlimit64(pid_t pid, unsigned int resource, - const struct rlimit64 __user *new_rlim, - struct rlimit64 __user *old_rlim); asmlinkage long sys_getrusage(int who, struct rusage __user *ru); asmlinkage long sys_umask(int mask); +asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5); +asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct getcpu_cache __user *cache); + +/* kernel/time.c */ +asmlinkage long sys_gettimeofday(struct timeval __user *tv, + struct timezone __user *tz); +asmlinkage long sys_settimeofday(struct timeval __user *tv, + struct timezone __user *tz); +asmlinkage long sys_adjtimex(struct timex __user *txc_p); + +/* kernel/timer.c */ +asmlinkage long sys_getpid(void); +asmlinkage long sys_getppid(void); +asmlinkage long sys_getuid(void); +asmlinkage long sys_geteuid(void); +asmlinkage long sys_getgid(void); +asmlinkage long sys_getegid(void); +asmlinkage long sys_gettid(void); +asmlinkage long sys_sysinfo(struct sysinfo __user *info); + +/* ipc/mqueue.c */ +asmlinkage long sys_mq_open(const char __user *name, int oflag, umode_t mode, struct mq_attr __user *attr); +asmlinkage long sys_mq_unlink(const char __user *name); +asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *msg_ptr, size_t msg_len, unsigned int msg_prio, const struct timespec __user *abs_timeout); +asmlinkage long sys_mq_timedreceive(mqd_t mqdes, char __user *msg_ptr, size_t msg_len, unsigned int __user *msg_prio, const struct timespec __user *abs_timeout); +asmlinkage long sys_mq_notify(mqd_t mqdes, const struct sigevent __user *notification); +asmlinkage long sys_mq_getsetattr(mqd_t mqdes, const struct mq_attr __user *mqstat, struct mq_attr __user *omqstat); +/* ipc/msg.c */ asmlinkage long sys_msgget(key_t key, int msgflg); -asmlinkage long sys_msgsnd(int msqid, struct msgbuf __user *msgp, - size_t msgsz, int msgflg); +asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, long msgtyp, int msgflg); -asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); +asmlinkage long sys_msgsnd(int msqid, struct msgbuf __user *msgp, + size_t msgsz, int msgflg); +/* ipc/sem.c */ asmlinkage long sys_semget(key_t key, int nsems, int semflg); -asmlinkage long sys_semop(int semid, struct sembuf __user *sops, - unsigned nsops); asmlinkage long sys_semctl(int semid, int semnum, int cmd, unsigned long arg); asmlinkage long sys_semtimedop(int semid, struct sembuf __user *sops, unsigned nsops, const struct timespec __user *timeout); -asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg); +asmlinkage long sys_semop(int semid, struct sembuf __user *sops, + unsigned nsops); + +/* ipc/shm.c */ asmlinkage long sys_shmget(key_t key, size_t size, int flag); -asmlinkage long sys_shmdt(char __user *shmaddr); asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); -asmlinkage long sys_ipc(unsigned int call, int first, unsigned long second, - unsigned long third, void __user *ptr, long fifth); +asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg); +asmlinkage long sys_shmdt(char __user *shmaddr); -asmlinkage long sys_mq_open(const char __user *name, int oflag, umode_t mode, struct mq_attr __user *attr); -asmlinkage long sys_mq_unlink(const char __user *name); -asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *msg_ptr, size_t msg_len, unsigned int msg_prio, const struct timespec __user *abs_timeout); -asmlinkage long sys_mq_timedreceive(mqd_t mqdes, char __user *msg_ptr, size_t msg_len, unsigned int __user *msg_prio, const struct timespec __user *abs_timeout); -asmlinkage long sys_mq_notify(mqd_t mqdes, const struct sigevent __user *notification); -asmlinkage long sys_mq_getsetattr(mqd_t mqdes, const struct mq_attr __user *mqstat, struct mq_attr __user *omqstat); +/* net/socket.c */ +asmlinkage long sys_socket(int, int, int); +asmlinkage long sys_socketpair(int, int, int, int __user *); +asmlinkage long sys_bind(int, struct sockaddr __user *, int); +asmlinkage long sys_listen(int, int); +asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *); +asmlinkage long sys_connect(int, struct sockaddr __user *, int); +asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *); +asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *); +asmlinkage long sys_sendto(int, void __user *, size_t, unsigned, + struct sockaddr __user *, int); +asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned, + struct sockaddr __user *, int __user *); +asmlinkage long sys_setsockopt(int fd, int level, int optname, + char __user *optval, int optlen); +asmlinkage long sys_getsockopt(int fd, int level, int optname, + char __user *optval, int __user *optlen); +asmlinkage long sys_shutdown(int, int); +asmlinkage long sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags); +asmlinkage long sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags); -asmlinkage long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn); -asmlinkage long sys_pciconfig_read(unsigned long bus, unsigned long dfn, - unsigned long off, unsigned long len, - void __user *buf); -asmlinkage long sys_pciconfig_write(unsigned long bus, unsigned long dfn, - unsigned long off, unsigned long len, - void __user *buf); +/* mm/filemap.c */ +asmlinkage long sys_readahead(int fd, loff_t offset, size_t count); -asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5); -asmlinkage long sys_swapon(const char __user *specialfile, int swap_flags); -asmlinkage long sys_swapoff(const char __user *specialfile); -asmlinkage long sys_sysctl(struct __sysctl_args __user *args); -asmlinkage long sys_sysinfo(struct sysinfo __user *info); -asmlinkage long sys_sysfs(int option, - unsigned long arg1, unsigned long arg2); -asmlinkage long sys_syslog(int type, char __user *buf, int len); -asmlinkage long sys_uselib(const char __user *library); -asmlinkage long sys_ni_syscall(void); -asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, - unsigned long data); +/* mm/nommu.c, also with MMU */ +asmlinkage long sys_brk(unsigned long brk); +asmlinkage long sys_munmap(unsigned long addr, size_t len); +asmlinkage long sys_mremap(unsigned long addr, + unsigned long old_len, unsigned long new_len, + unsigned long flags, unsigned long new_addr); +/* security/keys/keyctl.c */ asmlinkage long sys_add_key(const char __user *_type, const char __user *_description, const void __user *_payload, size_t plen, key_serial_t destringid); - asmlinkage long sys_request_key(const char __user *_type, const char __user *_description, const char __user *_callout_info, key_serial_t destringid); - asmlinkage long sys_keyctl(int cmd, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); -asmlinkage long sys_ioprio_set(int which, int who, int ioprio); -asmlinkage long sys_ioprio_get(int which, int who); -asmlinkage long sys_set_mempolicy(int mode, const unsigned long __user *nmask, - unsigned long maxnode); -asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, - const unsigned long __user *from, - const unsigned long __user *to); -asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, - const void __user * __user *pages, - const int __user *nodes, - int __user *status, - int flags); -asmlinkage long sys_mbind(unsigned long start, unsigned long len, - unsigned long mode, - const unsigned long __user *nmask, - unsigned long maxnode, - unsigned flags); -asmlinkage long sys_get_mempolicy(int __user *policy, - unsigned long __user *nmask, - unsigned long maxnode, - unsigned long addr, unsigned long flags); - -asmlinkage long sys_inotify_init(void); -asmlinkage long sys_inotify_init1(int flags); -asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, - u32 mask); -asmlinkage long sys_inotify_rm_watch(int fd, __s32 wd); - -asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, - __u32 __user *ustatus); -asmlinkage long sys_spu_create(const char __user *name, - unsigned int flags, umode_t mode, int fd); - -asmlinkage long sys_mknodat(int dfd, const char __user * filename, umode_t mode, - unsigned dev); -asmlinkage long sys_mkdirat(int dfd, const char __user * pathname, umode_t mode); -asmlinkage long sys_unlinkat(int dfd, const char __user * pathname, int flag); -asmlinkage long sys_symlinkat(const char __user * oldname, - int newdfd, const char __user * newname); -asmlinkage long sys_linkat(int olddfd, const char __user *oldname, - int newdfd, const char __user *newname, int flags); -asmlinkage long sys_renameat(int olddfd, const char __user * oldname, - int newdfd, const char __user * newname); -asmlinkage long sys_renameat2(int olddfd, const char __user *oldname, - int newdfd, const char __user *newname, - unsigned int flags); -asmlinkage long sys_futimesat(int dfd, const char __user *filename, - struct timeval __user *utimes); -asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode); -asmlinkage long sys_fchmodat(int dfd, const char __user * filename, - umode_t mode); -asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, - gid_t group, int flag); -asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, - umode_t mode); -asmlinkage long sys_newfstatat(int dfd, const char __user *filename, - struct stat __user *statbuf, int flag); -asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *buf, - int bufsiz); -asmlinkage long sys_utimensat(int dfd, const char __user *filename, - struct timespec __user *utimes, int flags); -asmlinkage long sys_unshare(unsigned long unshare_flags); - -asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, - int fd_out, loff_t __user *off_out, - size_t len, unsigned int flags); - -asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, - unsigned long nr_segs, unsigned int flags); - -asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags); - -asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, - unsigned int flags); -asmlinkage long sys_sync_file_range2(int fd, unsigned int flags, - loff_t offset, loff_t nbytes); -asmlinkage long sys_get_robust_list(int pid, - struct robust_list_head __user * __user *head_ptr, - size_t __user *len_ptr); -asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, - size_t len); -asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct getcpu_cache __user *cache); -asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask); -asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask, size_t sizemask, int flags); -asmlinkage long sys_timerfd_create(int clockid, int flags); -asmlinkage long sys_timerfd_settime(int ufd, int flags, - const struct itimerspec __user *utmr, - struct itimerspec __user *otmr); -asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr); -asmlinkage long sys_eventfd(unsigned int count); -asmlinkage long sys_eventfd2(unsigned int count, int flags); -asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags); -asmlinkage long sys_userfaultfd(int flags); -asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); -asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int); -asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *, - fd_set __user *, struct timespec __user *, - void __user *); -asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int, - struct timespec __user *, const sigset_t __user *, - size_t); -asmlinkage long sys_fanotify_init(unsigned int flags, unsigned int event_f_flags); -asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, - u64 mask, int fd, - const char __user *pathname); -asmlinkage long sys_syncfs(int fd); - -asmlinkage long sys_fork(void); -asmlinkage long sys_vfork(void); +/* arch/example/kernel/sys_example.c */ #ifdef CONFIG_CLONE_BACKWARDS asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, unsigned long, int __user *); @@ -879,26 +735,80 @@ asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int __user *, unsigned long); #endif #endif - asmlinkage long sys_execve(const char __user *filename, const char __user *const __user *argv, const char __user *const __user *envp); +/* mm/fadvise.c */ +asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice); + +/* mm/, CONFIG_MMU only */ +asmlinkage long sys_swapon(const char __user *specialfile, int swap_flags); +asmlinkage long sys_swapoff(const char __user *specialfile); +asmlinkage long sys_mprotect(unsigned long start, size_t len, + unsigned long prot); +asmlinkage long sys_msync(unsigned long start, size_t len, int flags); +asmlinkage long sys_mlock(unsigned long start, size_t len); +asmlinkage long sys_munlock(unsigned long start, size_t len); +asmlinkage long sys_mlockall(int flags); +asmlinkage long sys_munlockall(void); +asmlinkage long sys_mincore(unsigned long start, size_t len, + unsigned char __user * vec); +asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior); +asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, + unsigned long prot, unsigned long pgoff, + unsigned long flags); +asmlinkage long sys_mbind(unsigned long start, unsigned long len, + unsigned long mode, + const unsigned long __user *nmask, + unsigned long maxnode, + unsigned flags); +asmlinkage long sys_get_mempolicy(int __user *policy, + unsigned long __user *nmask, + unsigned long maxnode, + unsigned long addr, unsigned long flags); +asmlinkage long sys_set_mempolicy(int mode, const unsigned long __user *nmask, + unsigned long maxnode); +asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, + const unsigned long __user *from, + const unsigned long __user *to); +asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, + const void __user * __user *pages, + const int __user *nodes, + int __user *status, + int flags); + +asmlinkage long sys_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, + siginfo_t __user *uinfo); asmlinkage long sys_perf_event_open( struct perf_event_attr __user *attr_uptr, pid_t pid, int cpu, int group_fd, unsigned long flags); +asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int); +asmlinkage long sys_recvmmsg(int fd, struct mmsghdr __user *msg, + unsigned int vlen, unsigned flags, + struct timespec __user *timeout); -asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff); -asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg); +asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr, + int options, struct rusage __user *ru); +asmlinkage long sys_prlimit64(pid_t pid, unsigned int resource, + const struct rlimit64 __user *new_rlim, + struct rlimit64 __user *old_rlim); +asmlinkage long sys_fanotify_init(unsigned int flags, unsigned int event_f_flags); +asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, + u64 mask, int fd, + const char __user *pathname); asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name, struct file_handle __user *handle, int __user *mnt_id, int flag); asmlinkage long sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); +asmlinkage long sys_clock_adjtime(clockid_t which_clock, + struct timex __user *tx); +asmlinkage long sys_syncfs(int fd); asmlinkage long sys_setns(int fd, int nstype); +asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg, + unsigned int vlen, unsigned flags); asmlinkage long sys_process_vm_readv(pid_t pid, const struct iovec __user *lvec, unsigned long liovcnt, @@ -911,39 +821,259 @@ asmlinkage long sys_process_vm_writev(pid_t pid, const struct iovec __user *rvec, unsigned long riovcnt, unsigned long flags); - asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); +asmlinkage long sys_sched_setattr(pid_t pid, + struct sched_attr __user *attr, + unsigned int flags); +asmlinkage long sys_sched_getattr(pid_t pid, + struct sched_attr __user *attr, + unsigned int size, + unsigned int flags); +asmlinkage long sys_renameat2(int olddfd, const char __user *oldname, + int newdfd, const char __user *newname, + unsigned int flags); asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, const char __user *uargs); asmlinkage long sys_getrandom(char __user *buf, size_t count, unsigned int flags); +asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags); asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size); - asmlinkage long sys_execveat(int dfd, const char __user *filename, const char __user *const __user *argv, const char __user *const __user *envp, int flags); - +asmlinkage long sys_userfaultfd(int flags); asmlinkage long sys_membarrier(int cmd, int flags); +asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags); asmlinkage long sys_copy_file_range(int fd_in, loff_t __user *off_in, int fd_out, loff_t __user *off_out, size_t len, unsigned int flags); - -asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags); - +asmlinkage long sys_preadv2(unsigned long fd, const struct iovec __user *vec, + unsigned long vlen, unsigned long pos_l, unsigned long pos_h, + rwf_t flags); +asmlinkage long sys_pwritev2(unsigned long fd, const struct iovec __user *vec, + unsigned long vlen, unsigned long pos_l, unsigned long pos_h, + rwf_t flags); asmlinkage long sys_pkey_mprotect(unsigned long start, size_t len, unsigned long prot, int pkey); asmlinkage long sys_pkey_alloc(unsigned long flags, unsigned long init_val); asmlinkage long sys_pkey_free(int pkey); -asmlinkage long sys_kexec_load(unsigned long entry, - unsigned long nr_segments, - struct kexec_segment __user *segments, - unsigned long flags); asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, unsigned mask, struct statx __user *buffer); +/* + * Architecture-specific system calls + */ + +/* arch/x86/kernel/ioport.c */ +asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on); + +/* pciconfig: alpha, arm, arm64, ia64, sparc */ +asmlinkage long sys_pciconfig_read(unsigned long bus, unsigned long dfn, + unsigned long off, unsigned long len, + void __user *buf); +asmlinkage long sys_pciconfig_write(unsigned long bus, unsigned long dfn, + unsigned long off, unsigned long len, + void __user *buf); +asmlinkage long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn); + +/* powerpc */ +asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, + __u32 __user *ustatus); +asmlinkage long sys_spu_create(const char __user *name, + unsigned int flags, umode_t mode, int fd); + + +/* + * Deprecated system calls which are still defined in + * include/uapi/asm-generic/unistd.h and wanted by >= 1 arch + */ + +/* __ARCH_WANT_SYSCALL_NO_AT */ +asmlinkage long sys_open(const char __user *filename, + int flags, umode_t mode); +asmlinkage long sys_link(const char __user *oldname, + const char __user *newname); +asmlinkage long sys_unlink(const char __user *pathname); +asmlinkage long sys_mknod(const char __user *filename, umode_t mode, + unsigned dev); +asmlinkage long sys_chmod(const char __user *filename, umode_t mode); +asmlinkage long sys_chown(const char __user *filename, + uid_t user, gid_t group); +asmlinkage long sys_mkdir(const char __user *pathname, umode_t mode); +asmlinkage long sys_rmdir(const char __user *pathname); +asmlinkage long sys_lchown(const char __user *filename, + uid_t user, gid_t group); +asmlinkage long sys_access(const char __user *filename, int mode); +asmlinkage long sys_rename(const char __user *oldname, + const char __user *newname); +asmlinkage long sys_symlink(const char __user *old, const char __user *new); +asmlinkage long sys_utimes(char __user *filename, + struct timeval __user *utimes); +#if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64) +asmlinkage long sys_stat64(const char __user *filename, + struct stat64 __user *statbuf); +asmlinkage long sys_lstat64(const char __user *filename, + struct stat64 __user *statbuf); +#endif + +/* __ARCH_WANT_SYSCALL_NO_FLAGS */ +asmlinkage long sys_pipe(int __user *fildes); +asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd); +asmlinkage long sys_epoll_create(int size); +asmlinkage long sys_inotify_init(void); +asmlinkage long sys_eventfd(unsigned int count); +asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask); + +/* __ARCH_WANT_SYSCALL_OFF_T */ +asmlinkage long sys_sendfile(int out_fd, int in_fd, + off_t __user *offset, size_t count); +asmlinkage long sys_newstat(const char __user *filename, + struct stat __user *statbuf); +asmlinkage long sys_newlstat(const char __user *filename, + struct stat __user *statbuf); +asmlinkage long sys_fadvise64(int fd, loff_t offset, size_t len, int advice); + +/* __ARCH_WANT_SYSCALL_DEPRECATED */ +asmlinkage long sys_alarm(unsigned int seconds); +asmlinkage long sys_getpgrp(void); +asmlinkage long sys_pause(void); +asmlinkage long sys_time(time_t __user *tloc); +asmlinkage long sys_utime(char __user *filename, + struct utimbuf __user *times); +asmlinkage long sys_creat(const char __user *pathname, umode_t mode); +asmlinkage long sys_getdents(unsigned int fd, + struct linux_dirent __user *dirent, + unsigned int count); +asmlinkage long sys_futimesat(int dfd, const char __user *filename, + struct timeval __user *utimes); +asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, + fd_set __user *exp, struct timeval __user *tvp); +asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, + int timeout); +asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events, + int maxevents, int timeout); +asmlinkage long sys_ustat(unsigned dev, struct ustat __user *ubuf); +asmlinkage long sys_vfork(void); +asmlinkage long sys_recv(int, void __user *, size_t, unsigned); +asmlinkage long sys_send(int, void __user *, size_t, unsigned); +asmlinkage long sys_bdflush(int func, long data); +asmlinkage long sys_oldumount(char __user *name); +asmlinkage long sys_uselib(const char __user *library); +asmlinkage long sys_sysctl(struct __sysctl_args __user *args); +asmlinkage long sys_sysfs(int option, + unsigned long arg1, unsigned long arg2); +asmlinkage long sys_fork(void); + +/* obsolete: kernel/time/time.c */ +asmlinkage long sys_stime(time_t __user *tptr); + +/* obsolete: kernel/signal.c */ +asmlinkage long sys_sigpending(old_sigset_t __user *uset); +asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set, + old_sigset_t __user *oset); +#ifdef CONFIG_OLD_SIGSUSPEND +asmlinkage long sys_sigsuspend(old_sigset_t mask); +#endif + +#ifdef CONFIG_OLD_SIGSUSPEND3 +asmlinkage long sys_sigsuspend(int unused1, int unused2, old_sigset_t mask); +#endif + +#ifdef CONFIG_OLD_SIGACTION +asmlinkage long sys_sigaction(int, const struct old_sigaction __user *, + struct old_sigaction __user *); +#endif +asmlinkage long sys_sgetmask(void); +asmlinkage long sys_ssetmask(int newmask); +asmlinkage long sys_signal(int sig, __sighandler_t handler); + +/* obsolete: kernel/sched/core.c */ +asmlinkage long sys_nice(int increment); + +/* obsolete: kernel/kexec_file.c */ +asmlinkage long sys_kexec_file_load(int kernel_fd, int initrd_fd, + unsigned long cmdline_len, + const char __user *cmdline_ptr, + unsigned long flags); + +/* obsolete: kernel/exit.c */ +asmlinkage long sys_waitpid(pid_t pid, int __user *stat_addr, int options); + +/* obsolete: kernel/uid16.c */ +#ifdef CONFIG_HAVE_UID16 +asmlinkage long sys_chown16(const char __user *filename, + old_uid_t user, old_gid_t group); +asmlinkage long sys_lchown16(const char __user *filename, + old_uid_t user, old_gid_t group); +asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group); +asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid); +asmlinkage long sys_setgid16(old_gid_t gid); +asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid); +asmlinkage long sys_setuid16(old_uid_t uid); +asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid); +asmlinkage long sys_getresuid16(old_uid_t __user *ruid, + old_uid_t __user *euid, old_uid_t __user *suid); +asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid); +asmlinkage long sys_getresgid16(old_gid_t __user *rgid, + old_gid_t __user *egid, old_gid_t __user *sgid); +asmlinkage long sys_setfsuid16(old_uid_t uid); +asmlinkage long sys_setfsgid16(old_gid_t gid); +asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist); +asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist); +asmlinkage long sys_getuid16(void); +asmlinkage long sys_geteuid16(void); +asmlinkage long sys_getgid16(void); +asmlinkage long sys_getegid16(void); +#endif + +/* obsolete: net/socket.c */ +asmlinkage long sys_socketcall(int call, unsigned long __user *args); + +/* obsolete: fs/stat.c */ +asmlinkage long sys_stat(const char __user *filename, + struct __old_kernel_stat __user *statbuf); +asmlinkage long sys_lstat(const char __user *filename, + struct __old_kernel_stat __user *statbuf); +asmlinkage long sys_fstat(unsigned int fd, + struct __old_kernel_stat __user *statbuf); +asmlinkage long sys_readlink(const char __user *path, + char __user *buf, int bufsiz); + +/* obsolete: fs/select.c */ +asmlinkage long sys_old_select(struct sel_arg_struct __user *arg); + +/* obsolete: fs/readdir.c */ +asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int); + +/* obsolete: kernel/sys.c */ +asmlinkage long sys_gethostname(char __user *name, int len); +asmlinkage long sys_uname(struct old_utsname __user *); +asmlinkage long sys_olduname(struct oldold_utsname __user *); +#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT +asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim); +#endif + +/* obsolete: ipc */ +asmlinkage long sys_ipc(unsigned int call, int first, unsigned long second, + unsigned long third, void __user *ptr, long fifth); + +/* obsolete: mm/ */ +asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff); +asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg); + + +/* + * Not a real system call, but a placeholder for syscalls which are + * not implemented -- see kernel/sys_ni.c + */ +asmlinkage long sys_ni_syscall(void); + + /* * Kernel code should not call syscalls (i.e., sys_xyzyyz()) directly. * Instead, use one of the functions which work equivalently, such as -- cgit v1.2.3 From 0621150d4a4e17ca6c104ec6e6bcfe6127b33c90 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 25 Mar 2018 20:18:18 +0200 Subject: net: remove compat_sys_*() prototypes from net/compat.h As the syscall functions should only be called from the system call table but not from elsewhere in the kernel, it is sufficient that they are defined in linux/compat.h. Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/net/compat.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/include/net/compat.h b/include/net/compat.h index a91bea80b9fc..4c6d75612b6c 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -44,17 +44,6 @@ int compat_sock_get_timestampns(struct sock *, struct timespec __user *); int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *, struct sockaddr __user **, struct iovec **); struct sock_fprog __user *get_compat_bpf_fprog(char __user *optval); -asmlinkage long compat_sys_sendmsg(int, struct compat_msghdr __user *, - unsigned int); -asmlinkage long compat_sys_sendmmsg(int, struct compat_mmsghdr __user *, - unsigned int, unsigned int); -asmlinkage long compat_sys_recvmsg(int, struct compat_msghdr __user *, - unsigned int); -asmlinkage long compat_sys_recvmmsg(int, struct compat_mmsghdr __user *, - unsigned int, unsigned int, - struct compat_timespec __user *); -asmlinkage long compat_sys_getsockopt(int, int, int, char __user *, - int __user *); int put_cmsg_compat(struct msghdr*, int, int, int, void *); int cmsghdr_from_user_compat_to_kern(struct msghdr *, struct sock *, -- cgit v1.2.3 From c679a08983db7c1eb09930570b92ff7c9fd59c1c Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 25 Mar 2018 23:04:48 +0200 Subject: syscalls: sort syscall prototypes in include/linux/compat.h Shuffle the syscall prototypes in include/linux/compat.h around so that they are kept in the same order as in include/uapi/asm-generic/unistd.h. The individual entries are kept the same, and neither modified to bring them in line with kernel coding style nor wrapped in proper ifdefs -- as an exception to this, add the prefix "asmlinkage" where it was missing. Cc: Arnd Bergmann Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/compat.h | 655 ++++++++++++++++++++++++++++--------------------- 1 file changed, 378 insertions(+), 277 deletions(-) diff --git a/include/linux/compat.h b/include/linux/compat.h index f1649a5e6716..f881cce627f6 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -305,10 +305,6 @@ extern int put_compat_rusage(const struct rusage *, struct compat_siginfo; -extern asmlinkage long compat_sys_waitid(int, compat_pid_t, - struct compat_siginfo __user *, int, - struct compat_rusage __user *); - struct compat_dirent { u32 d_ino; compat_off_t d_off; @@ -422,90 +418,6 @@ struct compat_msgbuf; extern void compat_exit_robust_list(struct task_struct *curr); -asmlinkage long -compat_sys_set_robust_list(struct compat_robust_list_head __user *head, - compat_size_t len); -asmlinkage long -compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, - compat_size_t __user *len_ptr); - -asmlinkage long compat_sys_ipc(u32, int, int, u32, compat_uptr_t, u32); -asmlinkage long compat_sys_shmat(int shmid, compat_uptr_t shmaddr, int shmflg); -asmlinkage long compat_sys_semctl(int semid, int semnum, int cmd, int arg); -asmlinkage long compat_sys_msgsnd(int msqid, compat_uptr_t msgp, - compat_ssize_t msgsz, int msgflg); -asmlinkage long compat_sys_msgrcv(int msqid, compat_uptr_t msgp, - compat_ssize_t msgsz, compat_long_t msgtyp, int msgflg); -long compat_sys_msgctl(int first, int second, void __user *uptr); -long compat_sys_shmctl(int first, int second, void __user *uptr); -long compat_sys_semtimedop(int semid, struct sembuf __user *tsems, - unsigned nsems, const struct compat_timespec __user *timeout); -asmlinkage long compat_sys_keyctl(u32 option, - u32 arg2, u32 arg3, u32 arg4, u32 arg5); -asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u32); - -asmlinkage ssize_t compat_sys_readv(compat_ulong_t fd, - const struct compat_iovec __user *vec, compat_ulong_t vlen); -asmlinkage ssize_t compat_sys_writev(compat_ulong_t fd, - const struct compat_iovec __user *vec, compat_ulong_t vlen); -asmlinkage ssize_t compat_sys_preadv(compat_ulong_t fd, - const struct compat_iovec __user *vec, - compat_ulong_t vlen, u32 pos_low, u32 pos_high); -asmlinkage ssize_t compat_sys_pwritev(compat_ulong_t fd, - const struct compat_iovec __user *vec, - compat_ulong_t vlen, u32 pos_low, u32 pos_high); -asmlinkage ssize_t compat_sys_preadv2(compat_ulong_t fd, - const struct compat_iovec __user *vec, - compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags); -asmlinkage ssize_t compat_sys_pwritev2(compat_ulong_t fd, - const struct compat_iovec __user *vec, - compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags); - -asmlinkage long compat_sys_quotactl32(unsigned int cmd, - const char __user *special, qid_t id, void __user *addr); - -#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 -asmlinkage long compat_sys_preadv64(unsigned long fd, - const struct compat_iovec __user *vec, - unsigned long vlen, loff_t pos); -#endif - -#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 -asmlinkage long compat_sys_pwritev64(unsigned long fd, - const struct compat_iovec __user *vec, - unsigned long vlen, loff_t pos); -#endif - -#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 -asmlinkage long compat_sys_readv64v2(unsigned long fd, - const struct compat_iovec __user *vec, - unsigned long vlen, loff_t pos, rwf_t flags); -#endif - -#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 -asmlinkage long compat_sys_pwritev64v2(unsigned long fd, - const struct compat_iovec __user *vec, - unsigned long vlen, loff_t pos, rwf_t flags); -#endif - -asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int); - -asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv, - const compat_uptr_t __user *envp); -asmlinkage long compat_sys_execveat(int dfd, const char __user *filename, - const compat_uptr_t __user *argv, - const compat_uptr_t __user *envp, int flags); - -asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, - compat_ulong_t __user *outp, compat_ulong_t __user *exp, - struct compat_timeval __user *tvp); - -asmlinkage long compat_sys_old_select(struct compat_sel_arg_struct __user *arg); - -asmlinkage long compat_sys_wait4(compat_pid_t pid, - compat_uint_t __user *stat_addr, int options, - struct compat_rusage __user *ru); - #define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t)) #define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG) @@ -518,13 +430,6 @@ int copy_siginfo_from_user32(siginfo_t *to, const struct compat_siginfo __user * int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *from); int get_compat_sigevent(struct sigevent *event, const struct compat_sigevent __user *u_event); -long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, - struct compat_siginfo __user *uinfo); -#ifdef CONFIG_COMPAT_OLD_SIGACTION -asmlinkage long compat_sys_sigaction(int sig, - const struct compat_old_sigaction __user *act, - struct compat_old_sigaction __user *oact); -#endif static inline int compat_timeval_compare(struct compat_timeval *lhs, struct compat_timeval *rhs) @@ -546,13 +451,6 @@ static inline int compat_timespec_compare(struct compat_timespec *lhs, return lhs->tv_nsec - rhs->tv_nsec; } -asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, - struct timezone __user *tz); -asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, - struct timezone __user *tz); - -asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); - extern int get_compat_sigset(sigset_t *set, const compat_sigset_t __user *compat); /* @@ -578,110 +476,132 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, #endif } -asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, - compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes, - const compat_ulong_t __user *new_nodes); - extern int compat_ptrace_request(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data); extern long compat_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data); -asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, - compat_long_t addr, compat_long_t data); -asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t); +struct epoll_event; /* fortunately, this one is fixed-layout */ + +extern ssize_t compat_rw_copy_check_uvector(int type, + const struct compat_iovec __user *uvector, + unsigned long nr_segs, + unsigned long fast_segs, struct iovec *fast_pointer, + struct iovec **ret_pointer); + +extern void __user *compat_alloc_user_space(unsigned long len); + +int compat_restore_altstack(const compat_stack_t __user *uss); +int __compat_save_altstack(compat_stack_t __user *, unsigned long); +#define compat_save_altstack_ex(uss, sp) do { \ + compat_stack_t __user *__uss = uss; \ + struct task_struct *t = current; \ + put_user_ex(ptr_to_compat((void __user *)t->sas_ss_sp), &__uss->ss_sp); \ + put_user_ex(t->sas_ss_flags, &__uss->ss_flags); \ + put_user_ex(t->sas_ss_size, &__uss->ss_size); \ + if (t->sas_ss_flags & SS_AUTODISARM) \ + sas_ss_reset(t); \ +} while (0); + /* - * epoll (fs/eventpoll.c) compat bits follow ... + * These syscall function prototypes are kept in the same order as + * include/uapi/asm-generic/unistd.h. Architecture specific entries go below, + * followed by deprecated or obsolete system calls. + * + * Please note that these prototypes here are only provided for information + * purposes, for static analysis, and for linking from the syscall table. + * These functions should not be called elsewhere from kernel code. */ -struct epoll_event; /* fortunately, this one is fixed-layout */ +asmlinkage long compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p); +asmlinkage long compat_sys_io_submit(compat_aio_context_t ctx_id, int nr, + u32 __user *iocb); +asmlinkage long compat_sys_io_getevents(compat_aio_context_t ctx_id, + compat_long_t min_nr, + compat_long_t nr, + struct io_event __user *events, + struct compat_timespec __user *timeout); + +/* fs/cookies.c */ +asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t); + +/* fs/eventpoll.c */ asmlinkage long compat_sys_epoll_pwait(int epfd, struct epoll_event __user *events, int maxevents, int timeout, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); -asmlinkage long compat_sys_utime(const char __user *filename, - struct compat_utimbuf __user *t); -asmlinkage long compat_sys_utimensat(unsigned int dfd, - const char __user *filename, - struct compat_timespec __user *t, - int flags); +/* fs/fcntl.c */ +asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd, + compat_ulong_t arg); +asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, + compat_ulong_t arg); -asmlinkage long compat_sys_time(compat_time_t __user *tloc); -asmlinkage long compat_sys_stime(compat_time_t __user *tptr); -asmlinkage long compat_sys_signalfd(int ufd, - const compat_sigset_t __user *sigmask, - compat_size_t sigsetsize); -asmlinkage long compat_sys_timerfd_settime(int ufd, int flags, - const struct compat_itimerspec __user *utmr, - struct compat_itimerspec __user *otmr); -asmlinkage long compat_sys_timerfd_gettime(int ufd, - struct compat_itimerspec __user *otmr); +/* fs/ioctl.c */ +asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, + compat_ulong_t arg); -asmlinkage long compat_sys_move_pages(pid_t pid, compat_ulong_t nr_pages, - __u32 __user *pages, - const int __user *nodes, - int __user *status, - int flags); -asmlinkage long compat_sys_futimesat(unsigned int dfd, - const char __user *filename, - struct compat_timeval __user *t); -asmlinkage long compat_sys_utimes(const char __user *filename, - struct compat_timeval __user *t); -asmlinkage long compat_sys_newstat(const char __user *filename, - struct compat_stat __user *statbuf); -asmlinkage long compat_sys_newlstat(const char __user *filename, - struct compat_stat __user *statbuf); -asmlinkage long compat_sys_newfstatat(unsigned int dfd, - const char __user *filename, - struct compat_stat __user *statbuf, - int flag); -asmlinkage long compat_sys_newfstat(unsigned int fd, - struct compat_stat __user *statbuf); +/* fs/namespace.c */ +asmlinkage long compat_sys_mount(const char __user *dev_name, + const char __user *dir_name, + const char __user *type, compat_ulong_t flags, + const void __user *data); + +/* fs/open.c */ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf); -asmlinkage long compat_sys_fstatfs(unsigned int fd, - struct compat_statfs __user *buf); asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf); +asmlinkage long compat_sys_fstatfs(unsigned int fd, + struct compat_statfs __user *buf); asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf); -asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, - compat_ulong_t arg); -asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd, - compat_ulong_t arg); -asmlinkage long compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p); -asmlinkage long compat_sys_io_getevents(compat_aio_context_t ctx_id, - compat_long_t min_nr, - compat_long_t nr, - struct io_event __user *events, - struct compat_timespec __user *timeout); -asmlinkage long compat_sys_io_submit(compat_aio_context_t ctx_id, int nr, - u32 __user *iocb); -asmlinkage long compat_sys_mount(const char __user *dev_name, - const char __user *dir_name, - const char __user *type, compat_ulong_t flags, - const void __user *data); -asmlinkage long compat_sys_old_readdir(unsigned int fd, - struct compat_old_linux_dirent __user *, - unsigned int count); +asmlinkage long compat_sys_truncate(const char __user *, compat_off_t); +asmlinkage long compat_sys_ftruncate(unsigned int, compat_ulong_t); +/* No generic prototype for truncate64, ftruncate64, fallocate */ +asmlinkage long compat_sys_openat(int dfd, const char __user *filename, + int flags, umode_t mode); + +/* fs/readdir.c */ asmlinkage long compat_sys_getdents(unsigned int fd, struct compat_linux_dirent __user *dirent, unsigned int count); -asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *, - unsigned int nr_segs, unsigned int flags); -asmlinkage long compat_sys_open(const char __user *filename, int flags, - umode_t mode); -asmlinkage long compat_sys_openat(int dfd, const char __user *filename, - int flags, umode_t mode); -asmlinkage long compat_sys_open_by_handle_at(int mountdirfd, - struct file_handle __user *handle, - int flags); -asmlinkage long compat_sys_truncate(const char __user *, compat_off_t); -asmlinkage long compat_sys_ftruncate(unsigned int, compat_ulong_t); + +/* fs/read_write.c */ +asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int); +asmlinkage ssize_t compat_sys_readv(compat_ulong_t fd, + const struct compat_iovec __user *vec, compat_ulong_t vlen); +asmlinkage ssize_t compat_sys_writev(compat_ulong_t fd, + const struct compat_iovec __user *vec, compat_ulong_t vlen); +/* No generic prototype for pread64 and pwrite64 */ +asmlinkage ssize_t compat_sys_preadv(compat_ulong_t fd, + const struct compat_iovec __user *vec, + compat_ulong_t vlen, u32 pos_low, u32 pos_high); +asmlinkage ssize_t compat_sys_pwritev(compat_ulong_t fd, + const struct compat_iovec __user *vec, + compat_ulong_t vlen, u32 pos_low, u32 pos_high); +#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 +asmlinkage long compat_sys_preadv64(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos); +#endif + +#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 +asmlinkage long compat_sys_pwritev64(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos); +#endif + +/* fs/sendfile.c */ +asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, + compat_off_t __user *offset, compat_size_t count); +asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd, + compat_loff_t __user *offset, compat_size_t count); + +/* fs/select.c */ asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, @@ -692,110 +612,149 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, struct compat_timespec __user *tsp, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); + +/* fs/signalfd.c */ asmlinkage long compat_sys_signalfd4(int ufd, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize, int flags); -asmlinkage long compat_sys_get_mempolicy(int __user *policy, - compat_ulong_t __user *nmask, - compat_ulong_t maxnode, - compat_ulong_t addr, - compat_ulong_t flags); -asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask, - compat_ulong_t maxnode); -asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, - compat_ulong_t mode, - compat_ulong_t __user *nmask, - compat_ulong_t maxnode, compat_ulong_t flags); -asmlinkage long compat_sys_setsockopt(int fd, int level, int optname, - char __user *optval, unsigned int optlen); -asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, - unsigned flags); -asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, - unsigned vlen, unsigned int flags); -asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, - unsigned int flags); -asmlinkage long compat_sys_recv(int fd, void __user *buf, compat_size_t len, - unsigned flags); -asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, compat_size_t len, - unsigned flags, struct sockaddr __user *addr, - int __user *addrlen); -asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, - unsigned vlen, unsigned int flags, - struct compat_timespec __user *timeout); +/* fs/splice.c */ +asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *, + unsigned int nr_segs, unsigned int flags); + +/* fs/stat.c */ +asmlinkage long compat_sys_newfstatat(unsigned int dfd, + const char __user *filename, + struct compat_stat __user *statbuf, + int flag); +asmlinkage long compat_sys_newfstat(unsigned int fd, + struct compat_stat __user *statbuf); + +/* fs/sync.c: No generic prototype for sync_file_range and sync_file_range2 */ + +/* fs/timerfd.c */ +asmlinkage long compat_sys_timerfd_gettime(int ufd, + struct compat_itimerspec __user *otmr); +asmlinkage long compat_sys_timerfd_settime(int ufd, int flags, + const struct compat_itimerspec __user *utmr, + struct compat_itimerspec __user *otmr); + +/* fs/utimes.c */ +asmlinkage long compat_sys_utimensat(unsigned int dfd, + const char __user *filename, + struct compat_timespec __user *t, + int flags); + +/* kernel/exit.c */ +asmlinkage long compat_sys_waitid(int, compat_pid_t, + struct compat_siginfo __user *, int, + struct compat_rusage __user *); + + + +/* kernel/futex.c */ +asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, + struct compat_timespec __user *utime, u32 __user *uaddr2, + u32 val3); +asmlinkage long +compat_sys_set_robust_list(struct compat_robust_list_head __user *head, + compat_size_t len); +asmlinkage long +compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, + compat_size_t __user *len_ptr); + +/* kernel/hrtimer.c */ asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp, struct compat_timespec __user *rmtp); + +/* kernel/itimer.c */ asmlinkage long compat_sys_getitimer(int which, struct compat_itimerval __user *it); asmlinkage long compat_sys_setitimer(int which, struct compat_itimerval __user *in, struct compat_itimerval __user *out); -asmlinkage long compat_sys_times(struct compat_tms __user *tbuf); -asmlinkage long compat_sys_setrlimit(unsigned int resource, - struct compat_rlimit __user *rlim); -asmlinkage long compat_sys_getrlimit(unsigned int resource, - struct compat_rlimit __user *rlim); -asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru); -asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid, - unsigned int len, - compat_ulong_t __user *user_mask_ptr); -asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, - unsigned int len, - compat_ulong_t __user *user_mask_ptr); + +/* kernel/kexec.c */ +asmlinkage long compat_sys_kexec_load(compat_ulong_t entry, + compat_ulong_t nr_segments, + struct compat_kexec_segment __user *, + compat_ulong_t flags); + +/* kernel/posix-timers.c */ asmlinkage long compat_sys_timer_create(clockid_t which_clock, struct compat_sigevent __user *timer_event_spec, timer_t __user *created_timer_id); +asmlinkage long compat_sys_timer_gettime(timer_t timer_id, + struct compat_itimerspec __user *setting); asmlinkage long compat_sys_timer_settime(timer_t timer_id, int flags, struct compat_itimerspec __user *new, struct compat_itimerspec __user *old); -asmlinkage long compat_sys_timer_gettime(timer_t timer_id, - struct compat_itimerspec __user *setting); asmlinkage long compat_sys_clock_settime(clockid_t which_clock, struct compat_timespec __user *tp); asmlinkage long compat_sys_clock_gettime(clockid_t which_clock, struct compat_timespec __user *tp); -asmlinkage long compat_sys_clock_adjtime(clockid_t which_clock, - struct compat_timex __user *tp); asmlinkage long compat_sys_clock_getres(clockid_t which_clock, struct compat_timespec __user *tp); asmlinkage long compat_sys_clock_nanosleep(clockid_t which_clock, int flags, struct compat_timespec __user *rqtp, struct compat_timespec __user *rmtp); -asmlinkage long compat_sys_rt_sigtimedwait(compat_sigset_t __user *uthese, - struct compat_siginfo __user *uinfo, - struct compat_timespec __user *uts, compat_size_t sigsetsize); + +/* kernel/ptrace.c */ +asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, + compat_long_t addr, compat_long_t data); + +/* kernel/sched/core.c */ +asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid, + unsigned int len, + compat_ulong_t __user *user_mask_ptr); +asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, + unsigned int len, + compat_ulong_t __user *user_mask_ptr); +asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, + struct compat_timespec __user *interval); + +/* kernel/signal.c */ +asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr, + compat_stack_t __user *uoss_ptr); asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat_size_t sigsetsize); -asmlinkage long compat_sys_rt_sigprocmask(int how, compat_sigset_t __user *set, - compat_sigset_t __user *oset, - compat_size_t sigsetsize); -asmlinkage long compat_sys_rt_sigpending(compat_sigset_t __user *uset, - compat_size_t sigsetsize); #ifndef CONFIG_ODD_RT_SIGACTION asmlinkage long compat_sys_rt_sigaction(int, const struct compat_sigaction __user *, struct compat_sigaction __user *, compat_size_t); #endif +asmlinkage long compat_sys_rt_sigprocmask(int how, compat_sigset_t __user *set, + compat_sigset_t __user *oset, + compat_size_t sigsetsize); +asmlinkage long compat_sys_rt_sigpending(compat_sigset_t __user *uset, + compat_size_t sigsetsize); +asmlinkage long compat_sys_rt_sigtimedwait(compat_sigset_t __user *uthese, + struct compat_siginfo __user *uinfo, + struct compat_timespec __user *uts, compat_size_t sigsetsize); asmlinkage long compat_sys_rt_sigqueueinfo(compat_pid_t pid, int sig, struct compat_siginfo __user *uinfo); +/* No generic prototype for rt_sigreturn */ + +/* kernel/sys.c */ +asmlinkage long compat_sys_times(struct compat_tms __user *tbuf); +asmlinkage long compat_sys_getrlimit(unsigned int resource, + struct compat_rlimit __user *rlim); +asmlinkage long compat_sys_setrlimit(unsigned int resource, + struct compat_rlimit __user *rlim); +asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru); + +/* kernel/time.c */ +asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, + struct timezone __user *tz); +asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, + struct timezone __user *tz); +asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); + +/* kernel/timer.c */ asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info); -asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, - compat_ulong_t arg); -asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, - struct compat_timespec __user *utime, u32 __user *uaddr2, - u32 val3); -asmlinkage long compat_sys_getsockopt(int fd, int level, int optname, - char __user *optval, int __user *optlen); -asmlinkage long compat_sys_kexec_load(compat_ulong_t entry, - compat_ulong_t nr_segments, - struct compat_kexec_segment __user *, - compat_ulong_t flags); -asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes, - const struct compat_mq_attr __user *u_mqstat, - struct compat_mq_attr __user *u_omqstat); -asmlinkage long compat_sys_mq_notify(mqd_t mqdes, - const struct compat_sigevent __user *u_notification); + +/* ipc/mqueue.c */ asmlinkage long compat_sys_mq_open(const char __user *u_name, int oflag, compat_mode_t mode, struct compat_mq_attr __user *u_attr); @@ -807,17 +766,92 @@ asmlinkage ssize_t compat_sys_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr, compat_size_t msg_len, unsigned int __user *u_msg_prio, const struct compat_timespec __user *u_abs_timeout); -asmlinkage long compat_sys_socketcall(int call, u32 __user *args); -asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args); +asmlinkage long compat_sys_mq_notify(mqd_t mqdes, + const struct compat_sigevent __user *u_notification); +asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes, + const struct compat_mq_attr __user *u_mqstat, + struct compat_mq_attr __user *u_omqstat); -extern ssize_t compat_rw_copy_check_uvector(int type, - const struct compat_iovec __user *uvector, - unsigned long nr_segs, - unsigned long fast_segs, struct iovec *fast_pointer, - struct iovec **ret_pointer); +/* ipc/msg.c */ +asmlinkage long compat_sys_msgctl(int first, int second, void __user *uptr); +asmlinkage long compat_sys_msgrcv(int msqid, compat_uptr_t msgp, + compat_ssize_t msgsz, compat_long_t msgtyp, int msgflg); +asmlinkage long compat_sys_msgsnd(int msqid, compat_uptr_t msgp, + compat_ssize_t msgsz, int msgflg); -extern void __user *compat_alloc_user_space(unsigned long len); +/* ipc/sem.c */ +asmlinkage long compat_sys_semctl(int semid, int semnum, int cmd, int arg); +asmlinkage long compat_sys_semtimedop(int semid, struct sembuf __user *tsems, + unsigned nsems, const struct compat_timespec __user *timeout); + +/* ipc/shm.c */ +asmlinkage long compat_sys_shmctl(int first, int second, void __user *uptr); +asmlinkage long compat_sys_shmat(int shmid, compat_uptr_t shmaddr, int shmflg); + +/* net/socket.c */ +asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, compat_size_t len, + unsigned flags, struct sockaddr __user *addr, + int __user *addrlen); +asmlinkage long compat_sys_setsockopt(int fd, int level, int optname, + char __user *optval, unsigned int optlen); +asmlinkage long compat_sys_getsockopt(int fd, int level, int optname, + char __user *optval, int __user *optlen); +asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, + unsigned flags); +asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, + unsigned int flags); + +/* mm/filemap.c: No generic prototype for readahead */ +/* security/keys/keyctl.c */ +asmlinkage long compat_sys_keyctl(u32 option, + u32 arg2, u32 arg3, u32 arg4, u32 arg5); + +/* arch/example/kernel/sys_example.c */ +asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv, + const compat_uptr_t __user *envp); + +/* mm/fadvise.c: No generic prototype for fadvise64_64 */ + +/* mm/, CONFIG_MMU only */ +asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, + compat_ulong_t mode, + compat_ulong_t __user *nmask, + compat_ulong_t maxnode, compat_ulong_t flags); +asmlinkage long compat_sys_get_mempolicy(int __user *policy, + compat_ulong_t __user *nmask, + compat_ulong_t maxnode, + compat_ulong_t addr, + compat_ulong_t flags); +asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask, + compat_ulong_t maxnode); +asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, + compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes, + const compat_ulong_t __user *new_nodes); +asmlinkage long compat_sys_move_pages(pid_t pid, compat_ulong_t nr_pages, + __u32 __user *pages, + const int __user *nodes, + int __user *status, + int flags); + +asmlinkage long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, + compat_pid_t pid, int sig, + struct compat_siginfo __user *uinfo); +asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, + unsigned vlen, unsigned int flags, + struct compat_timespec __user *timeout); +asmlinkage long compat_sys_wait4(compat_pid_t pid, + compat_uint_t __user *stat_addr, int options, + struct compat_rusage __user *ru); +asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32, + int, const char __user *); +asmlinkage long compat_sys_open_by_handle_at(int mountdirfd, + struct file_handle __user *handle, + int flags); +asmlinkage long compat_sys_clock_adjtime(clockid_t which_clock, + struct compat_timex __user *tp); +asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, + unsigned vlen, unsigned int flags); asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid, const struct compat_iovec __user *lvec, compat_ulong_t liovcnt, const struct compat_iovec __user *rvec, @@ -826,14 +860,89 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid, const struct compat_iovec __user *lvec, compat_ulong_t liovcnt, const struct compat_iovec __user *rvec, compat_ulong_t riovcnt, compat_ulong_t flags); +asmlinkage long compat_sys_execveat(int dfd, const char __user *filename, + const compat_uptr_t __user *argv, + const compat_uptr_t __user *envp, int flags); +asmlinkage ssize_t compat_sys_preadv2(compat_ulong_t fd, + const struct compat_iovec __user *vec, + compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags); +asmlinkage ssize_t compat_sys_pwritev2(compat_ulong_t fd, + const struct compat_iovec __user *vec, + compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags); +#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 +asmlinkage long compat_sys_readv64v2(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos, rwf_t flags); +#endif -asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, - compat_off_t __user *offset, compat_size_t count); -asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd, - compat_loff_t __user *offset, compat_size_t count); -asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr, - compat_stack_t __user *uoss_ptr); +#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 +asmlinkage long compat_sys_pwritev64v2(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos, rwf_t flags); +#endif + + +/* + * Architecture-specific system calls + */ + +/* fs/quota/compat.c -- x86 only */ +asmlinkage long compat_sys_quotactl32(unsigned int cmd, + const char __user *special, qid_t id, void __user *addr); + +/* arch_prctl -- x86 */ +asmlinkage long compat_sys_arch_prctl(int option, unsigned long arg2); + + +/* + * Deprecated system calls which are still defined in + * include/uapi/asm-generic/unistd.h and wanted by >= 1 arch + */ + +/* __ARCH_WANT_SYSCALL_NO_AT */ +asmlinkage long compat_sys_open(const char __user *filename, int flags, + umode_t mode); +asmlinkage long compat_sys_utimes(const char __user *filename, + struct compat_timeval __user *t); + +/* __ARCH_WANT_SYSCALL_NO_FLAGS */ +asmlinkage long compat_sys_signalfd(int ufd, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize); + +/* __ARCH_WANT_SYSCALL_OFF_T */ +asmlinkage long compat_sys_newstat(const char __user *filename, + struct compat_stat __user *statbuf); +asmlinkage long compat_sys_newlstat(const char __user *filename, + struct compat_stat __user *statbuf); + +/* __ARCH_WANT_SYSCALL_DEPRECATED */ +asmlinkage long compat_sys_time(compat_time_t __user *tloc); +asmlinkage long compat_sys_utime(const char __user *filename, + struct compat_utimbuf __user *t); +asmlinkage long compat_sys_futimesat(unsigned int dfd, + const char __user *filename, + struct compat_timeval __user *t); +asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, + compat_ulong_t __user *outp, compat_ulong_t __user *exp, + struct compat_timeval __user *tvp); +asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u32); +asmlinkage long compat_sys_recv(int fd, void __user *buf, compat_size_t len, + unsigned flags); +asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args); + +/* obsolete: fs/readdir.c */ +asmlinkage long compat_sys_old_readdir(unsigned int fd, + struct compat_old_linux_dirent __user *, + unsigned int count); + +/* obsolete: fs/select.c */ +asmlinkage long compat_sys_old_select(struct compat_sel_arg_struct __user *arg); + +/* obsolete: ipc */ +asmlinkage long compat_sys_ipc(u32, int, int, u32, compat_uptr_t, u32); +/* obsolete: kernel/signal.c */ #ifdef __ARCH_WANT_SYS_SIGPENDING asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set); #endif @@ -842,26 +951,18 @@ asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set); asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *nset, compat_old_sigset_t __user *oset); #endif +#ifdef CONFIG_COMPAT_OLD_SIGACTION +asmlinkage long compat_sys_sigaction(int sig, + const struct compat_old_sigaction __user *act, + struct compat_old_sigaction __user *oact); +#endif -int compat_restore_altstack(const compat_stack_t __user *uss); -int __compat_save_altstack(compat_stack_t __user *, unsigned long); -#define compat_save_altstack_ex(uss, sp) do { \ - compat_stack_t __user *__uss = uss; \ - struct task_struct *t = current; \ - put_user_ex(ptr_to_compat((void __user *)t->sas_ss_sp), &__uss->ss_sp); \ - put_user_ex(t->sas_ss_flags, &__uss->ss_flags); \ - put_user_ex(t->sas_ss_size, &__uss->ss_size); \ - if (t->sas_ss_flags & SS_AUTODISARM) \ - sas_ss_reset(t); \ -} while (0); - -asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, - struct compat_timespec __user *interval); +/* obsolete: kernel/time/time.c */ +asmlinkage long compat_sys_stime(compat_time_t __user *tptr); -asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32, - int, const char __user *); +/* obsolete: net/socket.c */ +asmlinkage long compat_sys_socketcall(int call, u32 __user *args); -asmlinkage long compat_sys_arch_prctl(int option, unsigned long arg2); /* * For most but not all architectures, "am I in a compat syscall?" and -- cgit v1.2.3 From 3e2052e5dd4062ccc7a10e8860aa7d2e58627001 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 22 Mar 2018 14:09:17 +0100 Subject: syscalls/x86: auto-create compat_sys_*() prototypes compat_sys_*() functions are no longer called from within the kernel on x86 except from the system call table. Linking the system call does not require compat_sys_*() function prototypes at least on x86. Therefore, generate compat_sys_*() prototypes on-the-fly within the COMPAT_SYSCALL_DEFINEx() macro, and remove x86-specific prototypes from various header files. Suggested-by: Andy Lutomirski Cc: Arnd Bergmann Cc: David S. Miller Cc: netdev@vger.kernel.org Cc: Thomas Gleixner Cc: Andi Kleen Cc: Ingo Molnar Cc: Andrew Morton Cc: Al Viro Cc: x86@kernel.org Signed-off-by: Dominik Brodowski --- arch/x86/ia32/ia32_signal.c | 1 - arch/x86/ia32/sys_ia32.c | 2 +- arch/x86/include/asm/sys_ia32.h | 64 ----------------------------------------- include/linux/compat.h | 17 ++--------- 4 files changed, 4 insertions(+), 80 deletions(-) delete mode 100644 arch/x86/include/asm/sys_ia32.h diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 41c671854642..86b1341cba9a 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -33,7 +33,6 @@ #include #include #include -#include #include /* diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index bd8a7020b9a7..11ef7b7c9cc8 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -47,7 +47,7 @@ #include #include #include -#include +#include #define AA(__x) ((unsigned long)(__x)) diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h deleted file mode 100644 index 2ee6e3b96656..000000000000 --- a/arch/x86/include/asm/sys_ia32.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * sys_ia32.h - Linux ia32 syscall interfaces - * - * Copyright (c) 2008 Jaswinder Singh Rajput - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#ifndef _ASM_X86_SYS_IA32_H -#define _ASM_X86_SYS_IA32_H - -#ifdef CONFIG_COMPAT - -#include -#include -#include -#include -#include -#include - -/* ia32/sys_ia32.c */ -asmlinkage long compat_sys_x86_truncate64(const char __user *, unsigned long, - unsigned long); -asmlinkage long compat_sys_x86_ftruncate64(unsigned int, unsigned long, - unsigned long); - -asmlinkage long compat_sys_x86_stat64(const char __user *, - struct stat64 __user *); -asmlinkage long compat_sys_x86_lstat64(const char __user *, - struct stat64 __user *); -asmlinkage long compat_sys_x86_fstat64(unsigned int, struct stat64 __user *); -asmlinkage long compat_sys_x86_fstatat(unsigned int, const char __user *, - struct stat64 __user *, int); -struct mmap_arg_struct32; -asmlinkage long compat_sys_x86_mmap(struct mmap_arg_struct32 __user *); - -asmlinkage long compat_sys_x86_pread(unsigned int, char __user *, u32, u32, - u32); -asmlinkage long compat_sys_x86_pwrite(unsigned int, const char __user *, u32, - u32, u32); - -asmlinkage long compat_sys_x86_fadvise64_64(int, __u32, __u32, __u32, __u32, - int); - -asmlinkage ssize_t compat_sys_x86_readahead(int, unsigned int, unsigned int, - size_t); -asmlinkage long compat_sys_x86_sync_file_range(int, unsigned int, unsigned int, - unsigned int, unsigned int, - int); -asmlinkage long compat_sys_x86_fadvise64(int, unsigned int, unsigned int, - size_t, int); -asmlinkage long compat_sys_x86_fallocate(int, int, unsigned int, unsigned int, - unsigned int, unsigned int); -asmlinkage long compat_sys_x86_clone(unsigned long, unsigned long, int __user *, - unsigned long, int __user *); - -/* ia32/ia32_signal.c */ -asmlinkage long sys32_sigreturn(void); -asmlinkage long sys32_rt_sigreturn(void); - -#endif /* CONFIG_COMPAT */ - -#endif /* _ASM_X86_SYS_IA32_H */ diff --git a/include/linux/compat.h b/include/linux/compat.h index f881cce627f6..8cb8710db0ab 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -49,6 +49,7 @@ COMPAT_SYSCALL_DEFINEx(6, _##name, __VA_ARGS__) #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ + asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))\ __attribute__((alias(__stringify(compat_SyS##name)))); \ static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ @@ -507,8 +508,8 @@ int __compat_save_altstack(compat_stack_t __user *, unsigned long); /* * These syscall function prototypes are kept in the same order as - * include/uapi/asm-generic/unistd.h. Architecture specific entries go below, - * followed by deprecated or obsolete system calls. + * include/uapi/asm-generic/unistd.h. Deprecated or obsolete system calls + * go below. * * Please note that these prototypes here are only provided for information * purposes, for static analysis, and for linking from the syscall table. @@ -882,18 +883,6 @@ asmlinkage long compat_sys_pwritev64v2(unsigned long fd, #endif -/* - * Architecture-specific system calls - */ - -/* fs/quota/compat.c -- x86 only */ -asmlinkage long compat_sys_quotactl32(unsigned int cmd, - const char __user *special, qid_t id, void __user *addr); - -/* arch_prctl -- x86 */ -asmlinkage long compat_sys_arch_prctl(int option, unsigned long arg2); - - /* * Deprecated system calls which are still defined in * include/uapi/asm-generic/unistd.h and wanted by >= 1 arch -- cgit v1.2.3 From 70dd4b3160798b647b7f30baf9fb6e8a5933d4e2 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 6 Mar 2018 19:53:01 +0100 Subject: kernel/sys_ni: sort cond_syscall() entries Shuffle the cond_syscall() entries in kernel/sys_ni.c around so that they are kept in the same order as in include/uapi/asm-generic/unistd.h. For better structuring, add the same comments as in that file, but keep a few additional comments and extend the commentary where it seems useful. Signed-off-by: Dominik Brodowski --- kernel/sys_ni.c | 506 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 332 insertions(+), 174 deletions(-) diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 951dbda5c2b4..0c1538f5a559 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -17,245 +17,403 @@ asmlinkage long sys_ni_syscall(void) return -ENOSYS; } -cond_syscall(sys_quotactl); -cond_syscall(compat_sys_quotactl32); -cond_syscall(sys_acct); +/* + * This list is kept in the same order as include/uapi/asm-generic/unistd.h. + * Architecture specific entries go below, followed by deprecated or obsolete + * system calls. + */ + +cond_syscall(sys_io_setup); +cond_syscall(compat_sys_io_setup); +cond_syscall(sys_io_destroy); +cond_syscall(sys_io_submit); +cond_syscall(compat_sys_io_submit); +cond_syscall(sys_io_cancel); +cond_syscall(sys_io_getevents); +cond_syscall(compat_sys_io_getevents); + +/* fs/xattr.c */ + +/* fs/dcache.c */ + +/* fs/cookies.c */ cond_syscall(sys_lookup_dcookie); cond_syscall(compat_sys_lookup_dcookie); -cond_syscall(sys_swapon); -cond_syscall(sys_swapoff); + +/* fs/eventfd.c */ +cond_syscall(sys_eventfd2); + +/* fs/eventfd.c */ +cond_syscall(sys_epoll_create1); +cond_syscall(sys_epoll_ctl); +cond_syscall(sys_epoll_pwait); +cond_syscall(compat_sys_epoll_pwait); + +/* fs/fcntl.c */ + +/* fs/inotify_user.c */ +cond_syscall(sys_inotify_init1); +cond_syscall(sys_inotify_add_watch); +cond_syscall(sys_inotify_rm_watch); + +/* fs/ioctl.c */ + +/* fs/ioprio.c */ +cond_syscall(sys_ioprio_set); +cond_syscall(sys_ioprio_get); + +/* fs/locks.c */ +cond_syscall(sys_flock); + +/* fs/namei.c */ + +/* fs/namespace.c */ + +/* fs/nfsctl.c */ + +/* fs/open.c */ + +/* fs/pipe.c */ + +/* fs/quota.c */ +cond_syscall(sys_quotactl); + +/* fs/readdir.c */ + +/* fs/read_write.c */ + +/* fs/sendfile.c */ + +/* fs/select.c */ + +/* fs/signalfd.c */ +cond_syscall(sys_signalfd4); +cond_syscall(compat_sys_signalfd4); + +/* fs/splice.c */ + +/* fs/stat.c */ + +/* fs/sync.c */ + +/* fs/timerfd.c */ +cond_syscall(sys_timerfd_create); +cond_syscall(sys_timerfd_settime); +cond_syscall(compat_sys_timerfd_settime); +cond_syscall(sys_timerfd_gettime); +cond_syscall(compat_sys_timerfd_gettime); + +/* fs/utimes.c */ + +/* kernel/acct.c */ +cond_syscall(sys_acct); + +/* kernel/capability.c */ +cond_syscall(sys_capget); +cond_syscall(sys_capset); + +/* kernel/exec_domain.c */ + +/* kernel/exit.c */ + +/* kernel/fork.c */ + +/* kernel/futex.c */ +cond_syscall(sys_futex); +cond_syscall(compat_sys_futex); +cond_syscall(sys_set_robust_list); +cond_syscall(compat_sys_set_robust_list); +cond_syscall(sys_get_robust_list); +cond_syscall(compat_sys_get_robust_list); + +/* kernel/hrtimer.c */ + +/* kernel/itimer.c */ + +/* kernel/kexec.c */ cond_syscall(sys_kexec_load); cond_syscall(compat_sys_kexec_load); -cond_syscall(sys_kexec_file_load); + +/* kernel/module.c */ cond_syscall(sys_init_module); -cond_syscall(sys_finit_module); cond_syscall(sys_delete_module); + +/* kernel/posix-timers.c */ + +/* kernel/printk.c */ +cond_syscall(sys_syslog); + +/* kernel/ptrace.c */ + +/* kernel/sched/core.c */ + +/* kernel/signal.c */ + +/* kernel/sys.c */ +cond_syscall(sys_setregid); +cond_syscall(sys_setgid); +cond_syscall(sys_setreuid); +cond_syscall(sys_setuid); +cond_syscall(sys_setresuid); +cond_syscall(sys_getresuid); +cond_syscall(sys_setresgid); +cond_syscall(sys_getresgid); +cond_syscall(sys_setfsuid); +cond_syscall(sys_setfsgid); +cond_syscall(sys_setgroups); +cond_syscall(sys_getgroups); + +/* kernel/time.c */ + +/* kernel/timer.c */ + +/* ipc/mqueue.c */ +cond_syscall(sys_mq_open); +cond_syscall(compat_sys_mq_open); +cond_syscall(sys_mq_unlink); +cond_syscall(sys_mq_timedsend); +cond_syscall(compat_sys_mq_timedsend); +cond_syscall(sys_mq_timedreceive); +cond_syscall(compat_sys_mq_timedreceive); +cond_syscall(sys_mq_notify); +cond_syscall(compat_sys_mq_notify); +cond_syscall(sys_mq_getsetattr); +cond_syscall(compat_sys_mq_getsetattr); + +/* ipc/msg.c */ +cond_syscall(sys_msgget); +cond_syscall(sys_msgctl); +cond_syscall(compat_sys_msgctl); +cond_syscall(sys_msgrcv); +cond_syscall(compat_sys_msgrcv); +cond_syscall(sys_msgsnd); +cond_syscall(compat_sys_msgsnd); + +/* ipc/sem.c */ +cond_syscall(sys_semget); +cond_syscall(sys_semctl); +cond_syscall(compat_sys_semctl); +cond_syscall(sys_semtimedop); +cond_syscall(compat_sys_semtimedop); +cond_syscall(sys_semop); + +/* ipc/shm.c */ +cond_syscall(sys_shmget); +cond_syscall(sys_shmctl); +cond_syscall(compat_sys_shmctl); +cond_syscall(sys_shmat); +cond_syscall(compat_sys_shmat); +cond_syscall(sys_shmdt); + +/* net/socket.c */ +cond_syscall(sys_socket); cond_syscall(sys_socketpair); cond_syscall(sys_bind); cond_syscall(sys_listen); cond_syscall(sys_accept); -cond_syscall(sys_accept4); cond_syscall(sys_connect); cond_syscall(sys_getsockname); cond_syscall(sys_getpeername); -cond_syscall(sys_sendto); -cond_syscall(sys_send); -cond_syscall(sys_recvfrom); -cond_syscall(sys_recv); -cond_syscall(sys_socket); cond_syscall(sys_setsockopt); cond_syscall(compat_sys_setsockopt); cond_syscall(sys_getsockopt); cond_syscall(compat_sys_getsockopt); +cond_syscall(sys_sendto); cond_syscall(sys_shutdown); +cond_syscall(sys_recvfrom); +cond_syscall(compat_sys_recvfrom); cond_syscall(sys_sendmsg); -cond_syscall(sys_sendmmsg); cond_syscall(compat_sys_sendmsg); -cond_syscall(compat_sys_sendmmsg); cond_syscall(sys_recvmsg); -cond_syscall(sys_recvmmsg); cond_syscall(compat_sys_recvmsg); -cond_syscall(compat_sys_recv); -cond_syscall(compat_sys_recvfrom); -cond_syscall(compat_sys_recvmmsg); -cond_syscall(sys_socketcall); -cond_syscall(sys_futex); -cond_syscall(compat_sys_futex); -cond_syscall(sys_set_robust_list); -cond_syscall(compat_sys_set_robust_list); -cond_syscall(sys_get_robust_list); -cond_syscall(compat_sys_get_robust_list); -cond_syscall(sys_epoll_create); -cond_syscall(sys_epoll_create1); -cond_syscall(sys_epoll_ctl); -cond_syscall(sys_epoll_wait); -cond_syscall(sys_epoll_pwait); -cond_syscall(compat_sys_epoll_pwait); -cond_syscall(sys_semget); -cond_syscall(sys_semop); -cond_syscall(sys_semtimedop); -cond_syscall(compat_sys_semtimedop); -cond_syscall(sys_semctl); -cond_syscall(compat_sys_semctl); -cond_syscall(sys_msgget); -cond_syscall(sys_msgsnd); -cond_syscall(compat_sys_msgsnd); -cond_syscall(sys_msgrcv); -cond_syscall(compat_sys_msgrcv); -cond_syscall(sys_msgctl); -cond_syscall(compat_sys_msgctl); -cond_syscall(sys_shmget); -cond_syscall(sys_shmat); -cond_syscall(compat_sys_shmat); -cond_syscall(sys_shmdt); -cond_syscall(sys_shmctl); -cond_syscall(compat_sys_shmctl); -cond_syscall(sys_mq_open); -cond_syscall(sys_mq_unlink); -cond_syscall(sys_mq_timedsend); -cond_syscall(sys_mq_timedreceive); -cond_syscall(sys_mq_notify); -cond_syscall(sys_mq_getsetattr); -cond_syscall(compat_sys_mq_open); -cond_syscall(compat_sys_mq_timedsend); -cond_syscall(compat_sys_mq_timedreceive); -cond_syscall(compat_sys_mq_notify); -cond_syscall(compat_sys_mq_getsetattr); -cond_syscall(sys_mbind); -cond_syscall(sys_get_mempolicy); -cond_syscall(sys_set_mempolicy); -cond_syscall(compat_sys_mbind); -cond_syscall(compat_sys_get_mempolicy); -cond_syscall(compat_sys_set_mempolicy); + +/* mm/filemap.c */ + +/* mm/nommu.c, also with MMU */ +cond_syscall(sys_mremap); + +/* security/keys/keyctl.c */ cond_syscall(sys_add_key); cond_syscall(sys_request_key); cond_syscall(sys_keyctl); cond_syscall(compat_sys_keyctl); -cond_syscall(compat_sys_socketcall); -cond_syscall(sys_inotify_init); -cond_syscall(sys_inotify_init1); -cond_syscall(sys_inotify_add_watch); -cond_syscall(sys_inotify_rm_watch); -cond_syscall(sys_migrate_pages); -cond_syscall(sys_move_pages); -cond_syscall(sys_chown16); -cond_syscall(sys_fchown16); -cond_syscall(sys_getegid16); -cond_syscall(sys_geteuid16); -cond_syscall(sys_getgid16); -cond_syscall(sys_getgroups16); -cond_syscall(sys_getresgid16); -cond_syscall(sys_getresuid16); -cond_syscall(sys_getuid16); -cond_syscall(sys_lchown16); -cond_syscall(sys_setfsgid16); -cond_syscall(sys_setfsuid16); -cond_syscall(sys_setgid16); -cond_syscall(sys_setgroups16); -cond_syscall(sys_setregid16); -cond_syscall(sys_setresgid16); -cond_syscall(sys_setresuid16); -cond_syscall(sys_setreuid16); -cond_syscall(sys_setuid16); -cond_syscall(sys_sgetmask); -cond_syscall(sys_ssetmask); -cond_syscall(sys_vm86old); -cond_syscall(sys_vm86); -cond_syscall(sys_modify_ldt); -cond_syscall(sys_ipc); -cond_syscall(compat_sys_ipc); -cond_syscall(compat_sys_sysctl); -cond_syscall(sys_flock); -cond_syscall(sys_io_setup); -cond_syscall(sys_io_destroy); -cond_syscall(sys_io_submit); -cond_syscall(sys_io_cancel); -cond_syscall(sys_io_getevents); -cond_syscall(compat_sys_io_setup); -cond_syscall(compat_sys_io_submit); -cond_syscall(compat_sys_io_getevents); -cond_syscall(sys_sysfs); -cond_syscall(sys_syslog); -cond_syscall(sys_process_vm_readv); -cond_syscall(sys_process_vm_writev); -cond_syscall(compat_sys_process_vm_readv); -cond_syscall(compat_sys_process_vm_writev); -cond_syscall(sys_uselib); -cond_syscall(sys_fadvise64); -cond_syscall(sys_fadvise64_64); -cond_syscall(sys_madvise); -cond_syscall(sys_setuid); -cond_syscall(sys_setregid); -cond_syscall(sys_setgid); -cond_syscall(sys_setreuid); -cond_syscall(sys_setresuid); -cond_syscall(sys_getresuid); -cond_syscall(sys_setresgid); -cond_syscall(sys_getresgid); -cond_syscall(sys_setgroups); -cond_syscall(sys_getgroups); -cond_syscall(sys_setfsuid); -cond_syscall(sys_setfsgid); -cond_syscall(sys_capget); -cond_syscall(sys_capset); -cond_syscall(sys_copy_file_range); -/* arch-specific weak syscall entries */ -cond_syscall(sys_pciconfig_read); -cond_syscall(sys_pciconfig_write); -cond_syscall(sys_pciconfig_iobase); -cond_syscall(compat_sys_s390_ipc); -cond_syscall(ppc_rtas); -cond_syscall(sys_spu_run); -cond_syscall(sys_spu_create); -cond_syscall(sys_subpage_prot); -cond_syscall(sys_s390_pci_mmio_read); -cond_syscall(sys_s390_pci_mmio_write); +/* arch/example/kernel/sys_example.c */ -/* mmu depending weak syscall entries */ +/* mm/fadvise.c */ +cond_syscall(sys_fadvise64_64); + +/* mm/, CONFIG_MMU only */ +cond_syscall(sys_swapon); +cond_syscall(sys_swapoff); cond_syscall(sys_mprotect); cond_syscall(sys_msync); cond_syscall(sys_mlock); cond_syscall(sys_munlock); cond_syscall(sys_mlockall); cond_syscall(sys_munlockall); -cond_syscall(sys_mlock2); cond_syscall(sys_mincore); cond_syscall(sys_madvise); -cond_syscall(sys_mremap); cond_syscall(sys_remap_file_pages); -cond_syscall(compat_sys_move_pages); +cond_syscall(sys_mbind); +cond_syscall(compat_sys_mbind); +cond_syscall(sys_get_mempolicy); +cond_syscall(compat_sys_get_mempolicy); +cond_syscall(sys_set_mempolicy); +cond_syscall(compat_sys_set_mempolicy); +cond_syscall(sys_migrate_pages); cond_syscall(compat_sys_migrate_pages); +cond_syscall(sys_move_pages); +cond_syscall(compat_sys_move_pages); -/* block-layer dependent */ -cond_syscall(sys_bdflush); -cond_syscall(sys_ioprio_set); -cond_syscall(sys_ioprio_get); - -/* New file descriptors */ -cond_syscall(sys_signalfd); -cond_syscall(sys_signalfd4); -cond_syscall(compat_sys_signalfd); -cond_syscall(compat_sys_signalfd4); -cond_syscall(sys_timerfd_create); -cond_syscall(sys_timerfd_settime); -cond_syscall(sys_timerfd_gettime); -cond_syscall(compat_sys_timerfd_settime); -cond_syscall(compat_sys_timerfd_gettime); -cond_syscall(sys_eventfd); -cond_syscall(sys_eventfd2); -cond_syscall(sys_memfd_create); -cond_syscall(sys_userfaultfd); - -/* performance counters: */ cond_syscall(sys_perf_event_open); +cond_syscall(sys_accept4); +cond_syscall(sys_recvmmsg); +cond_syscall(compat_sys_recvmmsg); + +/* + * Architecture specific syscalls: see further below + */ -/* fanotify! */ +/* fanotify */ cond_syscall(sys_fanotify_init); cond_syscall(sys_fanotify_mark); -cond_syscall(compat_sys_fanotify_mark); /* open by handle */ cond_syscall(sys_name_to_handle_at); cond_syscall(sys_open_by_handle_at); cond_syscall(compat_sys_open_by_handle_at); +cond_syscall(sys_sendmmsg); +cond_syscall(compat_sys_sendmmsg); +cond_syscall(sys_process_vm_readv); +cond_syscall(compat_sys_process_vm_readv); +cond_syscall(sys_process_vm_writev); +cond_syscall(compat_sys_process_vm_writev); + /* compare kernel pointers */ cond_syscall(sys_kcmp); +cond_syscall(sys_finit_module); + /* operate on Secure Computing state */ cond_syscall(sys_seccomp); +cond_syscall(sys_memfd_create); + /* access BPF programs and maps */ cond_syscall(sys_bpf); /* execveat */ cond_syscall(sys_execveat); +cond_syscall(sys_userfaultfd); + /* membarrier */ cond_syscall(sys_membarrier); +cond_syscall(sys_mlock2); + +cond_syscall(sys_copy_file_range); + /* memory protection keys */ cond_syscall(sys_pkey_mprotect); cond_syscall(sys_pkey_alloc); cond_syscall(sys_pkey_free); + + +/* + * Architecture specific weak syscall entries. + */ + +/* pciconfig: alpha, arm, arm64, ia64, sparc */ +cond_syscall(sys_pciconfig_read); +cond_syscall(sys_pciconfig_write); +cond_syscall(sys_pciconfig_iobase); + +/* sys_socketcall: arm, mips, x86, ... */ +cond_syscall(sys_socketcall); +cond_syscall(compat_sys_socketcall); + +/* compat syscalls for arm64, x86, ... */ +cond_syscall(compat_sys_sysctl); +cond_syscall(compat_sys_fanotify_mark); + +/* x86 */ +cond_syscall(sys_vm86old); +cond_syscall(sys_modify_ldt); +cond_syscall(compat_sys_quotactl32); +cond_syscall(sys_vm86); +cond_syscall(sys_kexec_file_load); + +/* s390 */ +cond_syscall(sys_s390_pci_mmio_read); +cond_syscall(sys_s390_pci_mmio_write); +cond_syscall(compat_sys_s390_ipc); + +/* powerpc */ +cond_syscall(ppc_rtas); +cond_syscall(sys_spu_run); +cond_syscall(sys_spu_create); +cond_syscall(sys_subpage_prot); + + +/* + * Deprecated system calls which are still defined in + * include/uapi/asm-generic/unistd.h and wanted by >= 1 arch + */ + +/* __ARCH_WANT_SYSCALL_NO_FLAGS */ +cond_syscall(sys_epoll_create); +cond_syscall(sys_inotify_init); +cond_syscall(sys_eventfd); +cond_syscall(sys_signalfd); +cond_syscall(compat_sys_signalfd); + +/* __ARCH_WANT_SYSCALL_OFF_T */ +cond_syscall(sys_fadvise64); + +/* __ARCH_WANT_SYSCALL_DEPRECATED */ +cond_syscall(sys_epoll_wait); +cond_syscall(sys_recv); +cond_syscall(compat_sys_recv); +cond_syscall(sys_send); +cond_syscall(sys_bdflush); +cond_syscall(sys_uselib); + + +/* + * The syscalls below are not found in include/uapi/asm-generic/unistd.h + */ + +/* obsolete: SGETMASK_SYSCALL */ +cond_syscall(sys_sgetmask); +cond_syscall(sys_ssetmask); + +/* obsolete: SYSFS_SYSCALL */ +cond_syscall(sys_sysfs); + +/* obsolete: __ARCH_WANT_SYS_IPC */ +cond_syscall(sys_ipc); +cond_syscall(compat_sys_ipc); + +/* obsolete: UID16 */ +cond_syscall(sys_chown16); +cond_syscall(sys_fchown16); +cond_syscall(sys_getegid16); +cond_syscall(sys_geteuid16); +cond_syscall(sys_getgid16); +cond_syscall(sys_getgroups16); +cond_syscall(sys_getresgid16); +cond_syscall(sys_getresuid16); +cond_syscall(sys_getuid16); +cond_syscall(sys_lchown16); +cond_syscall(sys_setfsgid16); +cond_syscall(sys_setfsuid16); +cond_syscall(sys_setgid16); +cond_syscall(sys_setgroups16); +cond_syscall(sys_setregid16); +cond_syscall(sys_setresgid16); +cond_syscall(sys_setresuid16); +cond_syscall(sys_setreuid16); +cond_syscall(sys_setuid16); -- cgit v1.2.3 From 67a7acd3773a94df2e671601a288685485463cf9 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 4 Mar 2018 19:06:35 +0100 Subject: kernel/sys_ni: remove {sys_,sys_compat} from cond_syscall definitions This keeps it in line with the SYSCALL_DEFINEx() / COMPAT_SYSCALL_DEFINEx() calling convention. Signed-off-by: Dominik Brodowski --- Documentation/process/adding-syscalls.rst | 2 +- kernel/sys_ni.c | 433 +++++++++++++++--------------- 2 files changed, 219 insertions(+), 216 deletions(-) diff --git a/Documentation/process/adding-syscalls.rst b/Documentation/process/adding-syscalls.rst index 556613744556..314c8bf6f2a2 100644 --- a/Documentation/process/adding-syscalls.rst +++ b/Documentation/process/adding-syscalls.rst @@ -222,7 +222,7 @@ your new syscall number may get adjusted to resolve conflicts. The file ``kernel/sys_ni.c`` provides a fallback stub implementation of each system call, returning ``-ENOSYS``. Add your new system call here too:: - cond_syscall(sys_xyzzy); + COND_SYSCALL(xyzzy); Your new kernel functionality, and the system call that controls it, should normally be optional, so add a ``CONFIG`` option (typically to diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 0c1538f5a559..6cafc008f6db 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -17,53 +17,56 @@ asmlinkage long sys_ni_syscall(void) return -ENOSYS; } +#define COND_SYSCALL(name) cond_syscall(sys_##name) +#define COND_SYSCALL_COMPAT(name) cond_syscall(compat_sys_##name) + /* * This list is kept in the same order as include/uapi/asm-generic/unistd.h. * Architecture specific entries go below, followed by deprecated or obsolete * system calls. */ -cond_syscall(sys_io_setup); -cond_syscall(compat_sys_io_setup); -cond_syscall(sys_io_destroy); -cond_syscall(sys_io_submit); -cond_syscall(compat_sys_io_submit); -cond_syscall(sys_io_cancel); -cond_syscall(sys_io_getevents); -cond_syscall(compat_sys_io_getevents); +COND_SYSCALL(io_setup); +COND_SYSCALL_COMPAT(io_setup); +COND_SYSCALL(io_destroy); +COND_SYSCALL(io_submit); +COND_SYSCALL_COMPAT(io_submit); +COND_SYSCALL(io_cancel); +COND_SYSCALL(io_getevents); +COND_SYSCALL_COMPAT(io_getevents); /* fs/xattr.c */ /* fs/dcache.c */ /* fs/cookies.c */ -cond_syscall(sys_lookup_dcookie); -cond_syscall(compat_sys_lookup_dcookie); +COND_SYSCALL(lookup_dcookie); +COND_SYSCALL_COMPAT(lookup_dcookie); /* fs/eventfd.c */ -cond_syscall(sys_eventfd2); +COND_SYSCALL(eventfd2); /* fs/eventfd.c */ -cond_syscall(sys_epoll_create1); -cond_syscall(sys_epoll_ctl); -cond_syscall(sys_epoll_pwait); -cond_syscall(compat_sys_epoll_pwait); +COND_SYSCALL(epoll_create1); +COND_SYSCALL(epoll_ctl); +COND_SYSCALL(epoll_pwait); +COND_SYSCALL_COMPAT(epoll_pwait); /* fs/fcntl.c */ /* fs/inotify_user.c */ -cond_syscall(sys_inotify_init1); -cond_syscall(sys_inotify_add_watch); -cond_syscall(sys_inotify_rm_watch); +COND_SYSCALL(inotify_init1); +COND_SYSCALL(inotify_add_watch); +COND_SYSCALL(inotify_rm_watch); /* fs/ioctl.c */ /* fs/ioprio.c */ -cond_syscall(sys_ioprio_set); -cond_syscall(sys_ioprio_get); +COND_SYSCALL(ioprio_set); +COND_SYSCALL(ioprio_get); /* fs/locks.c */ -cond_syscall(sys_flock); +COND_SYSCALL(flock); /* fs/namei.c */ @@ -76,7 +79,7 @@ cond_syscall(sys_flock); /* fs/pipe.c */ /* fs/quota.c */ -cond_syscall(sys_quotactl); +COND_SYSCALL(quotactl); /* fs/readdir.c */ @@ -87,8 +90,8 @@ cond_syscall(sys_quotactl); /* fs/select.c */ /* fs/signalfd.c */ -cond_syscall(sys_signalfd4); -cond_syscall(compat_sys_signalfd4); +COND_SYSCALL(signalfd4); +COND_SYSCALL_COMPAT(signalfd4); /* fs/splice.c */ @@ -97,20 +100,20 @@ cond_syscall(compat_sys_signalfd4); /* fs/sync.c */ /* fs/timerfd.c */ -cond_syscall(sys_timerfd_create); -cond_syscall(sys_timerfd_settime); -cond_syscall(compat_sys_timerfd_settime); -cond_syscall(sys_timerfd_gettime); -cond_syscall(compat_sys_timerfd_gettime); +COND_SYSCALL(timerfd_create); +COND_SYSCALL(timerfd_settime); +COND_SYSCALL_COMPAT(timerfd_settime); +COND_SYSCALL(timerfd_gettime); +COND_SYSCALL_COMPAT(timerfd_gettime); /* fs/utimes.c */ /* kernel/acct.c */ -cond_syscall(sys_acct); +COND_SYSCALL(acct); /* kernel/capability.c */ -cond_syscall(sys_capget); -cond_syscall(sys_capset); +COND_SYSCALL(capget); +COND_SYSCALL(capset); /* kernel/exec_domain.c */ @@ -119,29 +122,29 @@ cond_syscall(sys_capset); /* kernel/fork.c */ /* kernel/futex.c */ -cond_syscall(sys_futex); -cond_syscall(compat_sys_futex); -cond_syscall(sys_set_robust_list); -cond_syscall(compat_sys_set_robust_list); -cond_syscall(sys_get_robust_list); -cond_syscall(compat_sys_get_robust_list); +COND_SYSCALL(futex); +COND_SYSCALL_COMPAT(futex); +COND_SYSCALL(set_robust_list); +COND_SYSCALL_COMPAT(set_robust_list); +COND_SYSCALL(get_robust_list); +COND_SYSCALL_COMPAT(get_robust_list); /* kernel/hrtimer.c */ /* kernel/itimer.c */ /* kernel/kexec.c */ -cond_syscall(sys_kexec_load); -cond_syscall(compat_sys_kexec_load); +COND_SYSCALL(kexec_load); +COND_SYSCALL_COMPAT(kexec_load); /* kernel/module.c */ -cond_syscall(sys_init_module); -cond_syscall(sys_delete_module); +COND_SYSCALL(init_module); +COND_SYSCALL(delete_module); /* kernel/posix-timers.c */ /* kernel/printk.c */ -cond_syscall(sys_syslog); +COND_SYSCALL(syslog); /* kernel/ptrace.c */ @@ -150,176 +153,176 @@ cond_syscall(sys_syslog); /* kernel/signal.c */ /* kernel/sys.c */ -cond_syscall(sys_setregid); -cond_syscall(sys_setgid); -cond_syscall(sys_setreuid); -cond_syscall(sys_setuid); -cond_syscall(sys_setresuid); -cond_syscall(sys_getresuid); -cond_syscall(sys_setresgid); -cond_syscall(sys_getresgid); -cond_syscall(sys_setfsuid); -cond_syscall(sys_setfsgid); -cond_syscall(sys_setgroups); -cond_syscall(sys_getgroups); +COND_SYSCALL(setregid); +COND_SYSCALL(setgid); +COND_SYSCALL(setreuid); +COND_SYSCALL(setuid); +COND_SYSCALL(setresuid); +COND_SYSCALL(getresuid); +COND_SYSCALL(setresgid); +COND_SYSCALL(getresgid); +COND_SYSCALL(setfsuid); +COND_SYSCALL(setfsgid); +COND_SYSCALL(setgroups); +COND_SYSCALL(getgroups); /* kernel/time.c */ /* kernel/timer.c */ /* ipc/mqueue.c */ -cond_syscall(sys_mq_open); -cond_syscall(compat_sys_mq_open); -cond_syscall(sys_mq_unlink); -cond_syscall(sys_mq_timedsend); -cond_syscall(compat_sys_mq_timedsend); -cond_syscall(sys_mq_timedreceive); -cond_syscall(compat_sys_mq_timedreceive); -cond_syscall(sys_mq_notify); -cond_syscall(compat_sys_mq_notify); -cond_syscall(sys_mq_getsetattr); -cond_syscall(compat_sys_mq_getsetattr); +COND_SYSCALL(mq_open); +COND_SYSCALL_COMPAT(mq_open); +COND_SYSCALL(mq_unlink); +COND_SYSCALL(mq_timedsend); +COND_SYSCALL_COMPAT(mq_timedsend); +COND_SYSCALL(mq_timedreceive); +COND_SYSCALL_COMPAT(mq_timedreceive); +COND_SYSCALL(mq_notify); +COND_SYSCALL_COMPAT(mq_notify); +COND_SYSCALL(mq_getsetattr); +COND_SYSCALL_COMPAT(mq_getsetattr); /* ipc/msg.c */ -cond_syscall(sys_msgget); -cond_syscall(sys_msgctl); -cond_syscall(compat_sys_msgctl); -cond_syscall(sys_msgrcv); -cond_syscall(compat_sys_msgrcv); -cond_syscall(sys_msgsnd); -cond_syscall(compat_sys_msgsnd); +COND_SYSCALL(msgget); +COND_SYSCALL(msgctl); +COND_SYSCALL_COMPAT(msgctl); +COND_SYSCALL(msgrcv); +COND_SYSCALL_COMPAT(msgrcv); +COND_SYSCALL(msgsnd); +COND_SYSCALL_COMPAT(msgsnd); /* ipc/sem.c */ -cond_syscall(sys_semget); -cond_syscall(sys_semctl); -cond_syscall(compat_sys_semctl); -cond_syscall(sys_semtimedop); -cond_syscall(compat_sys_semtimedop); -cond_syscall(sys_semop); +COND_SYSCALL(semget); +COND_SYSCALL(semctl); +COND_SYSCALL_COMPAT(semctl); +COND_SYSCALL(semtimedop); +COND_SYSCALL_COMPAT(semtimedop); +COND_SYSCALL(semop); /* ipc/shm.c */ -cond_syscall(sys_shmget); -cond_syscall(sys_shmctl); -cond_syscall(compat_sys_shmctl); -cond_syscall(sys_shmat); -cond_syscall(compat_sys_shmat); -cond_syscall(sys_shmdt); +COND_SYSCALL(shmget); +COND_SYSCALL(shmctl); +COND_SYSCALL_COMPAT(shmctl); +COND_SYSCALL(shmat); +COND_SYSCALL_COMPAT(shmat); +COND_SYSCALL(shmdt); /* net/socket.c */ -cond_syscall(sys_socket); -cond_syscall(sys_socketpair); -cond_syscall(sys_bind); -cond_syscall(sys_listen); -cond_syscall(sys_accept); -cond_syscall(sys_connect); -cond_syscall(sys_getsockname); -cond_syscall(sys_getpeername); -cond_syscall(sys_setsockopt); -cond_syscall(compat_sys_setsockopt); -cond_syscall(sys_getsockopt); -cond_syscall(compat_sys_getsockopt); -cond_syscall(sys_sendto); -cond_syscall(sys_shutdown); -cond_syscall(sys_recvfrom); -cond_syscall(compat_sys_recvfrom); -cond_syscall(sys_sendmsg); -cond_syscall(compat_sys_sendmsg); -cond_syscall(sys_recvmsg); -cond_syscall(compat_sys_recvmsg); +COND_SYSCALL(socket); +COND_SYSCALL(socketpair); +COND_SYSCALL(bind); +COND_SYSCALL(listen); +COND_SYSCALL(accept); +COND_SYSCALL(connect); +COND_SYSCALL(getsockname); +COND_SYSCALL(getpeername); +COND_SYSCALL(setsockopt); +COND_SYSCALL_COMPAT(setsockopt); +COND_SYSCALL(getsockopt); +COND_SYSCALL_COMPAT(getsockopt); +COND_SYSCALL(sendto); +COND_SYSCALL(shutdown); +COND_SYSCALL(recvfrom); +COND_SYSCALL_COMPAT(recvfrom); +COND_SYSCALL(sendmsg); +COND_SYSCALL_COMPAT(sendmsg); +COND_SYSCALL(recvmsg); +COND_SYSCALL_COMPAT(recvmsg); /* mm/filemap.c */ /* mm/nommu.c, also with MMU */ -cond_syscall(sys_mremap); +COND_SYSCALL(mremap); /* security/keys/keyctl.c */ -cond_syscall(sys_add_key); -cond_syscall(sys_request_key); -cond_syscall(sys_keyctl); -cond_syscall(compat_sys_keyctl); +COND_SYSCALL(add_key); +COND_SYSCALL(request_key); +COND_SYSCALL(keyctl); +COND_SYSCALL_COMPAT(keyctl); /* arch/example/kernel/sys_example.c */ /* mm/fadvise.c */ -cond_syscall(sys_fadvise64_64); +COND_SYSCALL(fadvise64_64); /* mm/, CONFIG_MMU only */ -cond_syscall(sys_swapon); -cond_syscall(sys_swapoff); -cond_syscall(sys_mprotect); -cond_syscall(sys_msync); -cond_syscall(sys_mlock); -cond_syscall(sys_munlock); -cond_syscall(sys_mlockall); -cond_syscall(sys_munlockall); -cond_syscall(sys_mincore); -cond_syscall(sys_madvise); -cond_syscall(sys_remap_file_pages); -cond_syscall(sys_mbind); -cond_syscall(compat_sys_mbind); -cond_syscall(sys_get_mempolicy); -cond_syscall(compat_sys_get_mempolicy); -cond_syscall(sys_set_mempolicy); -cond_syscall(compat_sys_set_mempolicy); -cond_syscall(sys_migrate_pages); -cond_syscall(compat_sys_migrate_pages); -cond_syscall(sys_move_pages); -cond_syscall(compat_sys_move_pages); - -cond_syscall(sys_perf_event_open); -cond_syscall(sys_accept4); -cond_syscall(sys_recvmmsg); -cond_syscall(compat_sys_recvmmsg); +COND_SYSCALL(swapon); +COND_SYSCALL(swapoff); +COND_SYSCALL(mprotect); +COND_SYSCALL(msync); +COND_SYSCALL(mlock); +COND_SYSCALL(munlock); +COND_SYSCALL(mlockall); +COND_SYSCALL(munlockall); +COND_SYSCALL(mincore); +COND_SYSCALL(madvise); +COND_SYSCALL(remap_file_pages); +COND_SYSCALL(mbind); +COND_SYSCALL_COMPAT(mbind); +COND_SYSCALL(get_mempolicy); +COND_SYSCALL_COMPAT(get_mempolicy); +COND_SYSCALL(set_mempolicy); +COND_SYSCALL_COMPAT(set_mempolicy); +COND_SYSCALL(migrate_pages); +COND_SYSCALL_COMPAT(migrate_pages); +COND_SYSCALL(move_pages); +COND_SYSCALL_COMPAT(move_pages); + +COND_SYSCALL(perf_event_open); +COND_SYSCALL(accept4); +COND_SYSCALL(recvmmsg); +COND_SYSCALL_COMPAT(recvmmsg); /* * Architecture specific syscalls: see further below */ /* fanotify */ -cond_syscall(sys_fanotify_init); -cond_syscall(sys_fanotify_mark); +COND_SYSCALL(fanotify_init); +COND_SYSCALL(fanotify_mark); /* open by handle */ -cond_syscall(sys_name_to_handle_at); -cond_syscall(sys_open_by_handle_at); -cond_syscall(compat_sys_open_by_handle_at); +COND_SYSCALL(name_to_handle_at); +COND_SYSCALL(open_by_handle_at); +COND_SYSCALL_COMPAT(open_by_handle_at); -cond_syscall(sys_sendmmsg); -cond_syscall(compat_sys_sendmmsg); -cond_syscall(sys_process_vm_readv); -cond_syscall(compat_sys_process_vm_readv); -cond_syscall(sys_process_vm_writev); -cond_syscall(compat_sys_process_vm_writev); +COND_SYSCALL(sendmmsg); +COND_SYSCALL_COMPAT(sendmmsg); +COND_SYSCALL(process_vm_readv); +COND_SYSCALL_COMPAT(process_vm_readv); +COND_SYSCALL(process_vm_writev); +COND_SYSCALL_COMPAT(process_vm_writev); /* compare kernel pointers */ -cond_syscall(sys_kcmp); +COND_SYSCALL(kcmp); -cond_syscall(sys_finit_module); +COND_SYSCALL(finit_module); /* operate on Secure Computing state */ -cond_syscall(sys_seccomp); +COND_SYSCALL(seccomp); -cond_syscall(sys_memfd_create); +COND_SYSCALL(memfd_create); /* access BPF programs and maps */ -cond_syscall(sys_bpf); +COND_SYSCALL(bpf); /* execveat */ -cond_syscall(sys_execveat); +COND_SYSCALL(execveat); -cond_syscall(sys_userfaultfd); +COND_SYSCALL(userfaultfd); /* membarrier */ -cond_syscall(sys_membarrier); +COND_SYSCALL(membarrier); -cond_syscall(sys_mlock2); +COND_SYSCALL(mlock2); -cond_syscall(sys_copy_file_range); +COND_SYSCALL(copy_file_range); /* memory protection keys */ -cond_syscall(sys_pkey_mprotect); -cond_syscall(sys_pkey_alloc); -cond_syscall(sys_pkey_free); +COND_SYSCALL(pkey_mprotect); +COND_SYSCALL(pkey_alloc); +COND_SYSCALL(pkey_free); /* @@ -327,35 +330,35 @@ cond_syscall(sys_pkey_free); */ /* pciconfig: alpha, arm, arm64, ia64, sparc */ -cond_syscall(sys_pciconfig_read); -cond_syscall(sys_pciconfig_write); -cond_syscall(sys_pciconfig_iobase); +COND_SYSCALL(pciconfig_read); +COND_SYSCALL(pciconfig_write); +COND_SYSCALL(pciconfig_iobase); /* sys_socketcall: arm, mips, x86, ... */ -cond_syscall(sys_socketcall); -cond_syscall(compat_sys_socketcall); +COND_SYSCALL(socketcall); +COND_SYSCALL_COMPAT(socketcall); /* compat syscalls for arm64, x86, ... */ -cond_syscall(compat_sys_sysctl); -cond_syscall(compat_sys_fanotify_mark); +COND_SYSCALL_COMPAT(sysctl); +COND_SYSCALL_COMPAT(fanotify_mark); /* x86 */ -cond_syscall(sys_vm86old); -cond_syscall(sys_modify_ldt); -cond_syscall(compat_sys_quotactl32); -cond_syscall(sys_vm86); -cond_syscall(sys_kexec_file_load); +COND_SYSCALL(vm86old); +COND_SYSCALL(modify_ldt); +COND_SYSCALL_COMPAT(quotactl32); +COND_SYSCALL(vm86); +COND_SYSCALL(kexec_file_load); /* s390 */ -cond_syscall(sys_s390_pci_mmio_read); -cond_syscall(sys_s390_pci_mmio_write); -cond_syscall(compat_sys_s390_ipc); +COND_SYSCALL(s390_pci_mmio_read); +COND_SYSCALL(s390_pci_mmio_write); +COND_SYSCALL_COMPAT(s390_ipc); /* powerpc */ cond_syscall(ppc_rtas); -cond_syscall(sys_spu_run); -cond_syscall(sys_spu_create); -cond_syscall(sys_subpage_prot); +COND_SYSCALL(spu_run); +COND_SYSCALL(spu_create); +COND_SYSCALL(subpage_prot); /* @@ -364,22 +367,22 @@ cond_syscall(sys_subpage_prot); */ /* __ARCH_WANT_SYSCALL_NO_FLAGS */ -cond_syscall(sys_epoll_create); -cond_syscall(sys_inotify_init); -cond_syscall(sys_eventfd); -cond_syscall(sys_signalfd); -cond_syscall(compat_sys_signalfd); +COND_SYSCALL(epoll_create); +COND_SYSCALL(inotify_init); +COND_SYSCALL(eventfd); +COND_SYSCALL(signalfd); +COND_SYSCALL_COMPAT(signalfd); /* __ARCH_WANT_SYSCALL_OFF_T */ -cond_syscall(sys_fadvise64); +COND_SYSCALL(fadvise64); /* __ARCH_WANT_SYSCALL_DEPRECATED */ -cond_syscall(sys_epoll_wait); -cond_syscall(sys_recv); -cond_syscall(compat_sys_recv); -cond_syscall(sys_send); -cond_syscall(sys_bdflush); -cond_syscall(sys_uselib); +COND_SYSCALL(epoll_wait); +COND_SYSCALL(recv); +COND_SYSCALL_COMPAT(recv); +COND_SYSCALL(send); +COND_SYSCALL(bdflush); +COND_SYSCALL(uselib); /* @@ -387,33 +390,33 @@ cond_syscall(sys_uselib); */ /* obsolete: SGETMASK_SYSCALL */ -cond_syscall(sys_sgetmask); -cond_syscall(sys_ssetmask); +COND_SYSCALL(sgetmask); +COND_SYSCALL(ssetmask); /* obsolete: SYSFS_SYSCALL */ -cond_syscall(sys_sysfs); +COND_SYSCALL(sysfs); /* obsolete: __ARCH_WANT_SYS_IPC */ -cond_syscall(sys_ipc); -cond_syscall(compat_sys_ipc); +COND_SYSCALL(ipc); +COND_SYSCALL_COMPAT(ipc); /* obsolete: UID16 */ -cond_syscall(sys_chown16); -cond_syscall(sys_fchown16); -cond_syscall(sys_getegid16); -cond_syscall(sys_geteuid16); -cond_syscall(sys_getgid16); -cond_syscall(sys_getgroups16); -cond_syscall(sys_getresgid16); -cond_syscall(sys_getresuid16); -cond_syscall(sys_getuid16); -cond_syscall(sys_lchown16); -cond_syscall(sys_setfsgid16); -cond_syscall(sys_setfsuid16); -cond_syscall(sys_setgid16); -cond_syscall(sys_setgroups16); -cond_syscall(sys_setregid16); -cond_syscall(sys_setresgid16); -cond_syscall(sys_setresuid16); -cond_syscall(sys_setreuid16); -cond_syscall(sys_setuid16); +COND_SYSCALL(chown16); +COND_SYSCALL(fchown16); +COND_SYSCALL(getegid16); +COND_SYSCALL(geteuid16); +COND_SYSCALL(getgid16); +COND_SYSCALL(getgroups16); +COND_SYSCALL(getresgid16); +COND_SYSCALL(getresuid16); +COND_SYSCALL(getuid16); +COND_SYSCALL(lchown16); +COND_SYSCALL(setfsgid16); +COND_SYSCALL(setfsuid16); +COND_SYSCALL(setgid16); +COND_SYSCALL(setgroups16); +COND_SYSCALL(setregid16); +COND_SYSCALL(setresgid16); +COND_SYSCALL(setresuid16); +COND_SYSCALL(setreuid16); +COND_SYSCALL(setuid16); -- cgit v1.2.3 From c9a211951c7c79cfb5de888d7d9550872868b086 Mon Sep 17 00:00:00 2001 From: Howard McLauchlan Date: Wed, 21 Mar 2018 18:59:08 -0700 Subject: bpf: whitelist all syscalls for error injection Error injection is a useful mechanism to fail arbitrary kernel functions. However, it is often hard to guarantee an error propagates appropriately to user space programs. By injecting into syscalls, we can return arbitrary values to user space directly; this increases flexibility and robustness in testing, allowing us to test user space error paths effectively. The following script, for example, fails calls to sys_open() from a given pid: from bcc import BPF from sys import argv pid = argv[1] prog = r""" int kprobe__SyS_open(struct pt_regs *ctx, const char *pathname, int flags) { u32 pid = bpf_get_current_pid_tgid(); if (pid == %s) bpf_override_return(ctx, -ENOMEM); return 0; } """ % pid b = BPF(text=prog) while 1: b.perf_buffer_poll() This patch whitelists all syscalls defined with SYSCALL_DEFINE and COMPAT_SYSCALL_DEFINE for error injection. These changes are not intended to be considered stable, and would normally be configured off. Signed-off-by: Howard McLauchlan Signed-off-by: Dominik Brodowski --- include/linux/compat.h | 3 +++ include/linux/syscalls.h | 3 +++ 2 files changed, 6 insertions(+) diff --git a/include/linux/compat.h b/include/linux/compat.h index 8cb8710db0ab..9847c5a013c3 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -33,6 +33,8 @@ #endif #define COMPAT_SYSCALL_DEFINE0(name) \ + asmlinkage long compat_sys_##name(void); \ + ALLOW_ERROR_INJECTION(compat_sys_##name, ERRNO); \ asmlinkage long compat_sys_##name(void) #define COMPAT_SYSCALL_DEFINE1(name, ...) \ @@ -52,6 +54,7 @@ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))\ __attribute__((alias(__stringify(compat_SyS##name)))); \ + ALLOW_ERROR_INJECTION(compat_sys##name, ERRNO); \ static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));\ asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))\ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 1f223b7cf16d..b961184f597a 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -191,6 +191,8 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) #define SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long sys_##sname(void); \ + ALLOW_ERROR_INJECTION(sys_##sname, ERRNO); \ asmlinkage long sys_##sname(void) #define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) @@ -210,6 +212,7 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) #define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(SyS##name)))); \ + ALLOW_ERROR_INJECTION(sys##name, ERRNO); \ static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ -- cgit v1.2.3