summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-19 21:34:15 +0300
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2023-07-01 14:16:25 +0300
commitc4b31d1b694e101cae7469a20762647185e11721 (patch)
tree69e8ae10e0b1e986ccffac7817e9f71c9cc7b7d5
parent6a6b5616c3d04eba12dd0abc0522e5bae5f1ee5a (diff)
downloadlinux-c4b31d1b694e101cae7469a20762647185e11721.tar.xz
execve: expand new process stack manually ahead of time
commit f313c51d26aa87e69633c9b46efb37a930faca71 upstream. This is a small step towards a model where GUP itself would not expand the stack, and any user that needs GUP to not look up existing mappings, but actually expand on them, would have to do so manually before-hand, and with the mm lock held for writing. It turns out that execve() already did almost exactly that, except it didn't take the mm lock at all (it's single-threaded so no locking technically needed, but it could cause lockdep errors). And it only did it for the CONFIG_STACK_GROWSUP case, since in that case GUP has obviously never expanded the stack downwards. So just make that CONFIG_STACK_GROWSUP case do the right thing with locking, and enable it generally. This will eventually help GUP, and in the meantime avoids a special case and the lockdep issue. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> [6.1 Minor context from still having FOLL_FORCE flags set] Signed-off-by: Samuel Mendoza-Jonas <samjonas@amazon.com> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--fs/exec.c37
1 files changed, 21 insertions, 16 deletions
diff --git a/fs/exec.c b/fs/exec.c
index 7da70211b25e..0d20f7d08696 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -198,34 +198,39 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
int write)
{
struct page *page;
+ struct vm_area_struct *vma = bprm->vma;
+ struct mm_struct *mm = bprm->mm;
int ret;
- unsigned int gup_flags = FOLL_FORCE;
-#ifdef CONFIG_STACK_GROWSUP
- if (write) {
- /* We claim to hold the lock - nobody to race with */
- ret = expand_downwards(bprm->vma, pos, true);
- if (ret < 0)
+ /*
+ * Avoid relying on expanding the stack down in GUP (which
+ * does not work for STACK_GROWSUP anyway), and just do it
+ * by hand ahead of time.
+ */
+ if (write && pos < vma->vm_start) {
+ mmap_write_lock(mm);
+ ret = expand_downwards(vma, pos, true);
+ if (unlikely(ret < 0)) {
+ mmap_write_unlock(mm);
return NULL;
- }
-#endif
-
- if (write)
- gup_flags |= FOLL_WRITE;
+ }
+ mmap_write_downgrade(mm);
+ } else
+ mmap_read_lock(mm);
/*
* We are doing an exec(). 'current' is the process
- * doing the exec and bprm->mm is the new process's mm.
+ * doing the exec and 'mm' is the new process's mm.
*/
- mmap_read_lock(bprm->mm);
- ret = get_user_pages_remote(bprm->mm, pos, 1, gup_flags,
+ ret = get_user_pages_remote(mm, pos, 1,
+ write ? FOLL_WRITE : 0,
&page, NULL, NULL);
- mmap_read_unlock(bprm->mm);
+ mmap_read_unlock(mm);
if (ret <= 0)
return NULL;
if (write)
- acct_arg_size(bprm, vma_pages(bprm->vma));
+ acct_arg_size(bprm, vma_pages(vma));
return page;
}