From 0ab32b6f1b88444524e52429fab334ff96683a3f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 5 Nov 2015 18:46:03 -0800 Subject: uaccess: reimplement probe_kernel_address() using probe_kernel_read() probe_kernel_address() is basically the same as the (later added) probe_kernel_read(). The return value on EFAULT is a bit different: probe_kernel_address() returns number-of-bytes-not-copied whereas probe_kernel_read() returns -EFAULT. All callers have been checked, none cared. probe_kernel_read() can be overridden by the architecture whereas probe_kernel_address() cannot. parisc, blackfin and um do this, to insert additional checking. Hence this patch possibly fixes obscure bugs, although there are only two probe_kernel_address() callsites outside arch/. My first attempt involved removing probe_kernel_address() entirely and converting all callsites to use probe_kernel_read() directly, but that got tiresome. This patch shrinks mm/slab_common.o by 218 bytes. For a single probe_kernel_address() callsite. Cc: Steven Miao Cc: Jeff Dike Cc: Richard Weinberger Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/alignment.c | 2 +- arch/powerpc/sysdev/fsl_pci.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 00b7f7de28a1..7d5f4c736a16 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -803,7 +803,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) } } } else { - fault = probe_kernel_address(instrptr, instr); + fault = probe_kernel_address((void *)instrptr, instr); instr = __mem_to_opcode_arm(instr); } diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index ebc1f412cf49..13b9bcf5485e 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -999,7 +999,7 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs) ret = get_user(regs->nip, &inst); pagefault_enable(); } else { - ret = probe_kernel_address(regs->nip, inst); + ret = probe_kernel_address((void *)regs->nip, inst); } if (mcheck_handle_load(regs, inst)) { -- cgit v1.2.3 From c118baf802562688d46e6002f2b5fe66b947da21 Mon Sep 17 00:00:00 2001 From: Raghavendra K T Date: Thu, 5 Nov 2015 18:46:29 -0800 Subject: arch/powerpc/mm/numa.c: do not allocate bootmem memory for non existing nodes With the setup_nr_nodes(), we have already initialized node_possible_map. So it is safe to use for_each_node here. There are many places in the kernel that use hardcoded 'for' loop with nr_node_ids, because all other architectures have numa nodes populated serially. That should be reason we had maintained the same for powerpc. But, since sparse numa node ids possible on powerpc, we unnecessarily allocate memory for non existent numa nodes. For e.g., on a system with 0,1,16,17 as numa nodes nr_node_ids=18 and we allocate memory for nodes 2-14. This patch we allocate memory for only existing numa nodes. The patch is boot tested on a 4 node tuleta, confirming with printks that it works as expected. Signed-off-by: Raghavendra K T Cc: Vladimir Davydov Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Anton Blanchard Cc: Nishanth Aravamudan Cc: Greg Kurz Cc: Grant Likely Cc: Nikunj A Dadhania Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 8b9502adaf79..8d8a541211d0 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -80,7 +80,7 @@ static void __init setup_node_to_cpumask_map(void) setup_nr_node_ids(); /* allocate the map */ - for (node = 0; node < nr_node_ids; node++) + for_each_node(node) alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); /* cpumask_of_node() will now work */ -- cgit v1.2.3 From 25add7ec708170e4eaef1f9793a07803b2fb5c71 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 5 Nov 2015 18:51:03 -0800 Subject: kasan: update log messages We decided to use KASAN as the short name of the tool and KernelAddressSanitizer as the full one. Update log messages according to that. Signed-off-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Alexander Potapenko Cc: Konstantin Serebryany Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/kasan_init_64.c | 2 +- mm/kasan/kasan.c | 2 +- mm/kasan/report.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 9ce5da27b136..d470cf219a2d 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -126,5 +126,5 @@ void __init kasan_init(void) __flush_tlb_all(); init_task.kasan_depth = 0; - pr_info("Kernel address sanitizer initialized\n"); + pr_info("KernelAddressSanitizer initialized\n"); } diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 1104cb0171e2..be9b78d7c2c4 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -518,7 +518,7 @@ static int kasan_mem_notifier(struct notifier_block *nb, static int __init kasan_memhotplug_init(void) { - pr_err("WARNING: KASan doesn't support memory hot-add\n"); + pr_err("WARNING: KASAN doesn't support memory hot-add\n"); pr_err("Memory hot-add will be disabled\n"); hotplug_memory_notifier(kasan_mem_notifier, 0); diff --git a/mm/kasan/report.c b/mm/kasan/report.c index be53a8f9e5dd..ae6bd3697c91 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -91,7 +91,7 @@ static void print_error_description(struct kasan_access_info *info) break; } - pr_err("BUG: KASan: %s in %pS at addr %p\n", + pr_err("BUG: KASAN: %s in %pS at addr %p\n", bug_type, (void *)info->ip, info->access_addr); pr_err("%s of size %zu by task %s/%d\n", @@ -224,7 +224,7 @@ static void kasan_report_error(struct kasan_access_info *info) bug_type = "user-memory-access"; else bug_type = "wild-memory-access"; - pr_err("BUG: KASan: %s on address %p\n", + pr_err("BUG: KASAN: %s on address %p\n", bug_type, info->access_addr); pr_err("%s of size %zu by task %s/%d\n", info->is_write ? "Write" : "Read", -- cgit v1.2.3 From c63f06dd15797736c31dc86e6392811d0bac34a1 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 5 Nov 2015 18:51:09 -0800 Subject: kasan: move KASAN_SANITIZE in arch/x86/boot/Makefile Move KASAN_SANITIZE in arch/x86/boot/Makefile above the comment related to SVGA_MODE, since the comment refers to 'the next line'. Signed-off-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Alexander Potapenko Cc: Konstantin Serebryany Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/boot/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 0d553e54171b..2ee62dba0373 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -9,13 +9,13 @@ # Changed by many, many contributors over the years. # +KASAN_SANITIZE := n + # If you want to preset the SVGA mode, uncomment the next line and # set SVGA_MODE to whatever number you want. # Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. # The number is the same as you would ordinarily press at bootup. -KASAN_SANITIZE := n - SVGA_MODE := -DSVGA_MODE=NORMAL_VGA targets := vmlinux.bin setup.bin setup.elf bzImage -- cgit v1.2.3 From a8ca5d0ecbdde5cc3d7accacbd69968b0c98764e Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Thu, 5 Nov 2015 18:51:33 -0800 Subject: mm: mlock: add new mlock system call With the refactored mlock code, introduce a new system call for mlock. The new call will allow the user to specify what lock states are being added. mlock2 is trivial at the moment, but a follow on patch will add a new mlock state making it useful. Signed-off-by: Eric B Munson Acked-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Heiko Carstens Cc: Geert Uytterhoeven Cc: Catalin Marinas Cc: Stephen Rothwell Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Michael Kerrisk Cc: Ralf Baechle Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + include/linux/syscalls.h | 2 ++ include/uapi/asm-generic/unistd.h | 4 +++- kernel/sys_ni.c | 1 + mm/mlock.c | 8 ++++++++ 6 files changed, 16 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index caa2c712d1e7..f17705e1332c 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -382,3 +382,4 @@ 373 i386 shutdown sys_shutdown 374 i386 userfaultfd sys_userfaultfd 375 i386 membarrier sys_membarrier +376 i386 mlock2 sys_mlock2 diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 278842fdf1f6..314a90bfc09c 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -331,6 +331,7 @@ 322 64 execveat stub_execveat 323 common userfaultfd sys_userfaultfd 324 common membarrier sys_membarrier +325 common mlock2 sys_mlock2 # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a460e2ef2843..a156b82dd14c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -887,4 +887,6 @@ asmlinkage long sys_execveat(int dfd, const char __user *filename, asmlinkage long sys_membarrier(int cmd, int flags); +asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags); + #endif diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index ee124009e12a..1324b0292ec2 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -713,9 +713,11 @@ __SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat) __SYSCALL(__NR_userfaultfd, sys_userfaultfd) #define __NR_membarrier 283 __SYSCALL(__NR_membarrier, sys_membarrier) +#define __NR_mlock2 284 +__SYSCALL(__NR_mlock2, sys_mlock2) #undef __NR_syscalls -#define __NR_syscalls 284 +#define __NR_syscalls 285 /* * All syscalls below here should go away really, diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index a02decf15583..0623787ec67a 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -194,6 +194,7 @@ cond_syscall(sys_mlock); cond_syscall(sys_munlock); cond_syscall(sys_mlockall); cond_syscall(sys_munlockall); +cond_syscall(sys_mlock2); cond_syscall(sys_mincore); cond_syscall(sys_madvise); cond_syscall(sys_mremap); diff --git a/mm/mlock.c b/mm/mlock.c index fbd8c03f7b37..35dcf8fa7195 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -644,6 +644,14 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) return do_mlock(start, len, VM_LOCKED); } +SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags) +{ + if (flags) + return -EINVAL; + + return do_mlock(start, len, VM_LOCKED); +} + SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) { int ret; -- cgit v1.2.3 From b0f205c2a3082dd9081f9a94e50658c5fa906ff1 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Thu, 5 Nov 2015 18:51:39 -0800 Subject: mm: mlock: add mlock flags to enable VM_LOCKONFAULT usage The previous patch introduced a flag that specified pages in a VMA should be placed on the unevictable LRU, but they should not be made present when the area is created. This patch adds the ability to set this state via the new mlock system calls. We add MLOCK_ONFAULT for mlock2 and MCL_ONFAULT for mlockall. MLOCK_ONFAULT will set the VM_LOCKONFAULT modifier for VM_LOCKED. MCL_ONFAULT should be used as a modifier to the two other mlockall flags. When used with MCL_CURRENT, all current mappings will be marked with VM_LOCKED | VM_LOCKONFAULT. When used with MCL_FUTURE, the mm->def_flags will be marked with VM_LOCKED | VM_LOCKONFAULT. When used with both MCL_CURRENT and MCL_FUTURE, all current mappings and mm->def_flags will be marked with VM_LOCKED | VM_LOCKONFAULT. Prior to this patch, mlockall() will unconditionally clear the mm->def_flags any time it is called without MCL_FUTURE. This behavior is maintained after adding MCL_ONFAULT. If a call to mlockall(MCL_FUTURE) is followed by mlockall(MCL_CURRENT), the mm->def_flags will be cleared and new VMAs will be unlocked. This remains true with or without MCL_ONFAULT in either mlockall() invocation. munlock() will unconditionally clear both vma flags. munlockall() unconditionally clears for VMA flags on all VMAs and in the mm->def_flags field. Signed-off-by: Eric B Munson Acked-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Jonathan Corbet Cc: Catalin Marinas Cc: Geert Uytterhoeven Cc: Guenter Roeck Cc: Heiko Carstens Cc: Kirill A. Shutemov Cc: Michael Kerrisk Cc: Ralf Baechle Cc: Shuah Khan Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/uapi/asm/mman.h | 3 ++ arch/mips/include/uapi/asm/mman.h | 6 ++++ arch/parisc/include/uapi/asm/mman.h | 3 ++ arch/powerpc/include/uapi/asm/mman.h | 1 + arch/sparc/include/uapi/asm/mman.h | 1 + arch/tile/include/uapi/asm/mman.h | 1 + arch/xtensa/include/uapi/asm/mman.h | 6 ++++ include/uapi/asm-generic/mman-common.h | 5 ++++ include/uapi/asm-generic/mman.h | 1 + mm/mlock.c | 51 +++++++++++++++++++++++++--------- 10 files changed, 65 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h index 0086b472bc2b..f2f949671798 100644 --- a/arch/alpha/include/uapi/asm/mman.h +++ b/arch/alpha/include/uapi/asm/mman.h @@ -37,6 +37,9 @@ #define MCL_CURRENT 8192 /* lock all currently mapped pages */ #define MCL_FUTURE 16384 /* lock all additions to address space */ +#define MCL_ONFAULT 32768 /* lock all pages that are faulted in */ + +#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ #define MADV_NORMAL 0 /* no further special treatment */ #define MADV_RANDOM 1 /* expect random page references */ diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h index cfcb876cae6b..97c03f468924 100644 --- a/arch/mips/include/uapi/asm/mman.h +++ b/arch/mips/include/uapi/asm/mman.h @@ -61,6 +61,12 @@ */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ +#define MCL_ONFAULT 4 /* lock all pages that are faulted in */ + +/* + * Flags for mlock + */ +#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ #define MADV_NORMAL 0 /* no further special treatment */ #define MADV_RANDOM 1 /* expect random page references */ diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index 294d251ca7b2..ecc3ae1ca28e 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h @@ -31,6 +31,9 @@ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ +#define MCL_ONFAULT 4 /* lock all pages that are faulted in */ + +#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ #define MADV_NORMAL 0 /* no further special treatment */ #define MADV_RANDOM 1 /* expect random page references */ diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h index 6ea26df0a73c..03c06ba7464f 100644 --- a/arch/powerpc/include/uapi/asm/mman.h +++ b/arch/powerpc/include/uapi/asm/mman.h @@ -22,6 +22,7 @@ #define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ +#define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ diff --git a/arch/sparc/include/uapi/asm/mman.h b/arch/sparc/include/uapi/asm/mman.h index 0b14df33cffa..9765896ecb2c 100644 --- a/arch/sparc/include/uapi/asm/mman.h +++ b/arch/sparc/include/uapi/asm/mman.h @@ -17,6 +17,7 @@ #define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ +#define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ diff --git a/arch/tile/include/uapi/asm/mman.h b/arch/tile/include/uapi/asm/mman.h index 81b8fc348d63..63ee13faf17d 100644 --- a/arch/tile/include/uapi/asm/mman.h +++ b/arch/tile/include/uapi/asm/mman.h @@ -36,6 +36,7 @@ */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ +#define MCL_ONFAULT 4 /* lock all pages that are faulted in */ #endif /* _ASM_TILE_MMAN_H */ diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h index 201aec0e0446..360944e1da52 100644 --- a/arch/xtensa/include/uapi/asm/mman.h +++ b/arch/xtensa/include/uapi/asm/mman.h @@ -74,6 +74,12 @@ */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ +#define MCL_ONFAULT 4 /* lock all pages that are faulted in */ + +/* + * Flags for mlock + */ +#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ #define MADV_NORMAL 0 /* no further special treatment */ #define MADV_RANDOM 1 /* expect random page references */ diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index ddc3b36f1046..a74dd84bbb6d 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -25,6 +25,11 @@ # define MAP_UNINITIALIZED 0x0 /* Don't support this flag */ #endif +/* + * Flags for mlock + */ +#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ + #define MS_ASYNC 1 /* sync memory asynchronously */ #define MS_INVALIDATE 2 /* invalidate the caches */ #define MS_SYNC 4 /* synchronous memory sync */ diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h index e9fe6fd2a074..7162cd4cca73 100644 --- a/include/uapi/asm-generic/mman.h +++ b/include/uapi/asm-generic/mman.h @@ -17,5 +17,6 @@ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ +#define MCL_ONFAULT 4 /* lock all pages that are faulted in */ #endif /* __ASM_GENERIC_MMAN_H */ diff --git a/mm/mlock.c b/mm/mlock.c index ca3894113b97..339d9e0949b6 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -506,7 +506,8 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm)) - goto out; /* don't set VM_LOCKED, don't count */ + /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */ + goto out; pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, @@ -577,7 +578,7 @@ static int apply_vma_lock_flags(unsigned long start, size_t len, prev = vma; for (nstart = start ; ; ) { - vm_flags_t newflags = vma->vm_flags & ~VM_LOCKED; + vm_flags_t newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; newflags |= flags; @@ -646,10 +647,15 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags) { - if (flags) + vm_flags_t vm_flags = VM_LOCKED; + + if (flags & ~MLOCK_ONFAULT) return -EINVAL; - return do_mlock(start, len, VM_LOCKED); + if (flags & MLOCK_ONFAULT) + vm_flags |= VM_LOCKONFAULT; + + return do_mlock(start, len, vm_flags); } SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) @@ -666,24 +672,43 @@ SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) return ret; } +/* + * Take the MCL_* flags passed into mlockall (or 0 if called from munlockall) + * and translate into the appropriate modifications to mm->def_flags and/or the + * flags for all current VMAs. + * + * There are a couple of subtleties with this. If mlockall() is called multiple + * times with different flags, the values do not necessarily stack. If mlockall + * is called once including the MCL_FUTURE flag and then a second time without + * it, VM_LOCKED and VM_LOCKONFAULT will be cleared from mm->def_flags. + */ static int apply_mlockall_flags(int flags) { struct vm_area_struct * vma, * prev = NULL; + vm_flags_t to_add = 0; - if (flags & MCL_FUTURE) + current->mm->def_flags &= VM_LOCKED_CLEAR_MASK; + if (flags & MCL_FUTURE) { current->mm->def_flags |= VM_LOCKED; - else - current->mm->def_flags &= ~VM_LOCKED; - if (flags == MCL_FUTURE) - goto out; + if (flags & MCL_ONFAULT) + current->mm->def_flags |= VM_LOCKONFAULT; + + if (!(flags & MCL_CURRENT)) + goto out; + } + + if (flags & MCL_CURRENT) { + to_add |= VM_LOCKED; + if (flags & MCL_ONFAULT) + to_add |= VM_LOCKONFAULT; + } for (vma = current->mm->mmap; vma ; vma = prev->vm_next) { vm_flags_t newflags; - newflags = vma->vm_flags & ~VM_LOCKED; - if (flags & MCL_CURRENT) - newflags |= VM_LOCKED; + newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; + newflags |= to_add; /* Ignore errors */ mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); @@ -698,7 +723,7 @@ SYSCALL_DEFINE1(mlockall, int, flags) unsigned long lock_limit; int ret; - if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE))) + if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT))) return -EINVAL; if (!can_do_mlock()) -- cgit v1.2.3