summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/userfaultfd.c35
1 files changed, 32 insertions, 3 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index af88ef6fffff..a14d63e945f4 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -262,7 +262,7 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
struct userfaultfd_ctx *ctx;
struct userfaultfd_wait_queue uwq;
int ret;
- bool must_wait;
+ bool must_wait, return_to_userland;
BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
@@ -327,6 +327,9 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
uwq.msg = userfault_msg(address, flags, reason);
uwq.ctx = ctx;
+ return_to_userland = (flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
+ (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
+
spin_lock(&ctx->fault_pending_wqh.lock);
/*
* After the __add_wait_queue the uwq is visible to userland
@@ -338,14 +341,16 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
* following the spin_unlock to happen before the list_add in
* __add_wait_queue.
*/
- set_current_state(TASK_KILLABLE);
+ set_current_state(return_to_userland ? TASK_INTERRUPTIBLE :
+ TASK_KILLABLE);
spin_unlock(&ctx->fault_pending_wqh.lock);
must_wait = userfaultfd_must_wait(ctx, address, flags, reason);
up_read(&mm->mmap_sem);
if (likely(must_wait && !ACCESS_ONCE(ctx->released) &&
- !fatal_signal_pending(current))) {
+ (return_to_userland ? !signal_pending(current) :
+ !fatal_signal_pending(current)))) {
wake_up_poll(&ctx->fd_wqh, POLLIN);
schedule();
ret |= VM_FAULT_MAJOR;
@@ -353,6 +358,30 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
__set_current_state(TASK_RUNNING);
+ if (return_to_userland) {
+ if (signal_pending(current) &&
+ !fatal_signal_pending(current)) {
+ /*
+ * If we got a SIGSTOP or SIGCONT and this is
+ * a normal userland page fault, just let
+ * userland return so the signal will be
+ * handled and gdb debugging works. The page
+ * fault code immediately after we return from
+ * this function is going to release the
+ * mmap_sem and it's not depending on it
+ * (unlike gup would if we were not to return
+ * VM_FAULT_RETRY).
+ *
+ * If a fatal signal is pending we still take
+ * the streamlined VM_FAULT_RETRY failure path
+ * and there's no need to retake the mmap_sem
+ * in such case.
+ */
+ down_read(&mm->mmap_sem);
+ ret = 0;
+ }
+ }
+
/*
* Here we race with the list_del; list_add in
* userfaultfd_ctx_read(), however because we don't ever run