From a8f072c1d624a627b67f2ace2f0c25d856ef4e54 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 2 Jun 2011 11:13:59 +0200 Subject: job control: rename signal->group_stop and flags to jobctl and update them signal->group_stop currently hosts mostly group stop related flags; however, it's gonna be used for wider purposes and the GROUP_STOP_ flag prefix becomes confusing. Rename signal->group_stop to signal->jobctl and rename all GROUP_STOP_* flags to JOBCTL_*. Bit position macros JOBCTL_*_BIT are defined and JOBCTL_* flags are defined in terms of them to allow using bitops later. While at it, reassign JOBCTL_TRAPPING to bit 22 to better accomodate future additions. This doesn't cause any functional change. -v2: JOBCTL_*_BIT macros added as suggested by Linus. Signed-off-by: Tejun Heo Cc: Linus Torvalds Signed-off-by: Oleg Nesterov --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index ea5f748906a8..8986bb0f9dc2 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1772,7 +1772,7 @@ static int zap_process(struct task_struct *start, int exit_code) t = start; do { - task_clear_group_stop_pending(t); + task_clear_jobctl_stop_pending(t); if (t != current && t->mm) { sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); -- cgit v1.2.3 From 3759a0d94c18764247b66511d1038f2b93aa95de Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 2 Jun 2011 11:14:00 +0200 Subject: job control: introduce JOBCTL_PENDING_MASK and task_clear_jobctl_pending() This patch introduces JOBCTL_PENDING_MASK and replaces task_clear_jobctl_stop_pending() with task_clear_jobctl_pending() which takes an extra @mask argument. JOBCTL_PENDING_MASK is currently equal to JOBCTL_STOP_PENDING but future patches will add more bits. recalc_sigpending_tsk() is updated to use JOBCTL_PENDING_MASK instead. task_clear_jobctl_pending() takes @mask which in subset of JOBCTL_PENDING_MASK and clears the relevant jobctl bits. If JOBCTL_STOP_PENDING is set, other STOP bits are cleared together. All task_clear_jobctl_stop_pending() users are updated to call task_clear_jobctl_pending() with JOBCTL_STOP_PENDING which is functionally identical to task_clear_jobctl_stop_pending(). This patch doesn't cause any functional change. Signed-off-by: Tejun Heo Signed-off-by: Oleg Nesterov --- fs/exec.c | 2 +- include/linux/sched.h | 5 ++++- kernel/signal.c | 27 +++++++++++++++++---------- 3 files changed, 22 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 8986bb0f9dc2..4402105287cb 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1772,7 +1772,7 @@ static int zap_process(struct task_struct *start, int exit_code) t = start; do { - task_clear_jobctl_stop_pending(t); + task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING); if (t != current && t->mm) { sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); diff --git a/include/linux/sched.h b/include/linux/sched.h index b0dd064eb4fc..5a958b17f9fe 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1817,7 +1817,10 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define JOBCTL_STOP_CONSUME (1 << JOBCTL_STOP_CONSUME_BIT) #define JOBCTL_TRAPPING (1 << JOBCTL_TRAPPING_BIT) -extern void task_clear_jobctl_stop_pending(struct task_struct *task); +#define JOBCTL_PENDING_MASK JOBCTL_STOP_PENDING + +extern void task_clear_jobctl_pending(struct task_struct *task, + unsigned int mask); #ifdef CONFIG_PREEMPT_RCU diff --git a/kernel/signal.c b/kernel/signal.c index 62a6c3bb9f0d..288d952fa3b8 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -124,7 +124,7 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked) static int recalc_sigpending_tsk(struct task_struct *t) { - if ((t->jobctl & JOBCTL_STOP_PENDING) || + if ((t->jobctl & JOBCTL_PENDING_MASK) || PENDING(&t->pending, &t->blocked) || PENDING(&t->signal->shared_pending, &t->blocked)) { set_tsk_thread_flag(t, TIF_SIGPENDING); @@ -245,18 +245,25 @@ static void task_clear_jobctl_trapping(struct task_struct *task) } /** - * task_clear_jobctl_stop_pending - clear pending group stop + * task_clear_jobctl_pending - clear jobctl pending bits * @task: target task + * @mask: pending bits to clear * - * Clear group stop states for @task. + * Clear @mask from @task->jobctl. @mask must be subset of + * %JOBCTL_PENDING_MASK. If %JOBCTL_STOP_PENDING is being cleared, other + * STOP bits are cleared together. * * CONTEXT: * Must be called with @task->sighand->siglock held. */ -void task_clear_jobctl_stop_pending(struct task_struct *task) +void task_clear_jobctl_pending(struct task_struct *task, unsigned int mask) { - task->jobctl &= ~(JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME | - JOBCTL_STOP_DEQUEUED); + BUG_ON(mask & ~JOBCTL_PENDING_MASK); + + if (mask & JOBCTL_STOP_PENDING) + mask |= JOBCTL_STOP_CONSUME | JOBCTL_STOP_DEQUEUED; + + task->jobctl &= ~mask; } /** @@ -282,7 +289,7 @@ static bool task_participate_group_stop(struct task_struct *task) WARN_ON_ONCE(!(task->jobctl & JOBCTL_STOP_PENDING)); - task_clear_jobctl_stop_pending(task); + task_clear_jobctl_pending(task, JOBCTL_STOP_PENDING); if (!consume) return false; @@ -810,7 +817,7 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns) rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending); t = p; do { - task_clear_jobctl_stop_pending(t); + task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING); rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending); wake_up_state(t, __TASK_STOPPED); } while_each_thread(p, t); @@ -926,7 +933,7 @@ static void complete_signal(int sig, struct task_struct *p, int group) signal->group_stop_count = 0; t = p; do { - task_clear_jobctl_stop_pending(t); + task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING); sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); } while_each_thread(p, t); @@ -1161,7 +1168,7 @@ int zap_other_threads(struct task_struct *p) p->signal->group_stop_count = 0; while_each_thread(p, t) { - task_clear_jobctl_stop_pending(t); + task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING); count++; /* Don't bother with already dead threads */ -- cgit v1.2.3 From 6dfca32984237a8a011b5bf367e53341a265b2a4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 2 Jun 2011 11:14:00 +0200 Subject: job control: make task_clear_jobctl_pending() clear TRAPPING automatically JOBCTL_TRAPPING indicates that ptracer is waiting for tracee to (re)transit into TRACED. task_clear_jobctl_pending() must be called when either tracee enters TRACED or the transition is cancelled for some reason. The former is achieved by explicitly calling task_clear_jobctl_pending() in ptrace_stop() and the latter by calling it at the end of do_signal_stop(). Calling task_clear_jobctl_trapping() at the end of do_signal_stop() limits the scope TRAPPING can be used and is fragile in that seemingly unrelated changes to tracee's control flow can lead to stuck TRAPPING. We already have task_clear_jobctl_pending() calls on those cancelling events to clear JOBCTL_STOP_PENDING. Cancellations can be handled by making those call sites use JOBCTL_PENDING_MASK instead and updating task_clear_jobctl_pending() such that task_clear_jobctl_trapping() is called automatically if no stop/trap is pending. This patch makes the above changes and removes the fallback task_clear_jobctl_trapping() call from do_signal_stop(). Signed-off-by: Tejun Heo Signed-off-by: Oleg Nesterov --- fs/exec.c | 2 +- kernel/signal.c | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 4402105287cb..a9f2b3631bdb 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1772,7 +1772,7 @@ static int zap_process(struct task_struct *start, int exit_code) t = start; do { - task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING); + task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); if (t != current && t->mm) { sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); diff --git a/kernel/signal.c b/kernel/signal.c index 288d952fa3b8..637a171b65b6 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -253,6 +253,9 @@ static void task_clear_jobctl_trapping(struct task_struct *task) * %JOBCTL_PENDING_MASK. If %JOBCTL_STOP_PENDING is being cleared, other * STOP bits are cleared together. * + * If clearing of @mask leaves no stop or trap pending, this function calls + * task_clear_jobctl_trapping(). + * * CONTEXT: * Must be called with @task->sighand->siglock held. */ @@ -264,6 +267,9 @@ void task_clear_jobctl_pending(struct task_struct *task, unsigned int mask) mask |= JOBCTL_STOP_CONSUME | JOBCTL_STOP_DEQUEUED; task->jobctl &= ~mask; + + if (!(task->jobctl & JOBCTL_PENDING_MASK)) + task_clear_jobctl_trapping(task); } /** @@ -933,7 +939,7 @@ static void complete_signal(int sig, struct task_struct *p, int group) signal->group_stop_count = 0; t = p; do { - task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING); + task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); } while_each_thread(p, t); @@ -1168,7 +1174,7 @@ int zap_other_threads(struct task_struct *p) p->signal->group_stop_count = 0; while_each_thread(p, t) { - task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING); + task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); count++; /* Don't bother with already dead threads */ @@ -1964,9 +1970,6 @@ retry: goto retry; } - /* PTRACE_ATTACH might have raced with task killing, clear trapping */ - task_clear_jobctl_trapping(current); - spin_unlock_irq(¤t->sighand->siglock); tracehook_finish_jctl(); -- cgit v1.2.3 From a288eecce5253cc1565d400a52b9b476a157e040 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 17 Jun 2011 16:50:37 +0200 Subject: ptrace: kill trivial tracehooks At this point, tracehooks aren't useful to mainline kernel and mostly just add an extra layer of obfuscation. Although they have comments, without actual in-kernel users, it is difficult to tell what are their assumptions and they're actually trying to achieve. To mainline kernel, they just aren't worth keeping around. This patch kills the following trivial tracehooks. * Ones testing whether task is ptraced. Replace with ->ptrace test. tracehook_expect_breakpoints() tracehook_consider_ignored_signal() tracehook_consider_fatal_signal() * ptrace_event() wrappers. Call directly. tracehook_report_exec() tracehook_report_exit() tracehook_report_vfork_done() * ptrace_release_task() wrapper. Call directly. tracehook_finish_release_task() * noop tracehook_prepare_release_task() tracehook_report_death() This doesn't introduce any behavior change. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Cc: Martin Schwidefsky Signed-off-by: Oleg Nesterov --- arch/s390/kernel/traps.c | 4 +- fs/exec.c | 2 +- include/linux/tracehook.h | 156 ---------------------------------------------- kernel/exit.c | 7 +-- kernel/fork.c | 2 +- kernel/signal.c | 8 +-- mm/nommu.c | 3 +- 7 files changed, 11 insertions(+), 171 deletions(-) (limited to 'fs') diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index a65d2e82f61d..a63d34c3611e 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -331,7 +331,7 @@ void __kprobes do_per_trap(struct pt_regs *regs) { if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) return; - if (tracehook_consider_fatal_signal(current, SIGTRAP)) + if (current->ptrace) force_sig(SIGTRAP, current); } @@ -425,7 +425,7 @@ static void __kprobes illegal_op(struct pt_regs *regs, long pgm_int_code, if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) return; if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) { - if (tracehook_consider_fatal_signal(current, SIGTRAP)) + if (current->ptrace) force_sig(SIGTRAP, current); else signal = SIGILL; diff --git a/fs/exec.c b/fs/exec.c index a9f2b3631bdb..b37030d0a50b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1384,7 +1384,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) bprm->recursion_depth = depth; if (retval >= 0) { if (depth == 0) - tracehook_report_exec(fmt, bprm, regs); + ptrace_event(PTRACE_EVENT_EXEC, 0); put_binfmt(fmt); allow_write_access(bprm->file); if (bprm->file) diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 3b68aa842a92..8b06d4f2b814 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -51,21 +51,6 @@ #include struct linux_binprm; -/** - * tracehook_expect_breakpoints - guess if task memory might be touched - * @task: current task, making a new mapping - * - * Return nonzero if @task is expected to want breakpoint insertion in - * its memory at some point. A zero return is no guarantee it won't - * be done, but this is a hint that it's known to be likely. - * - * May be called with @task->mm->mmap_sem held for writing. - */ -static inline int tracehook_expect_breakpoints(struct task_struct *task) -{ - return (task->ptrace & PT_PTRACED) != 0; -} - /* * ptrace report for syscall entry and exit looks identical. */ @@ -183,42 +168,6 @@ static inline struct task_struct *tracehook_tracer_task(struct task_struct *tsk) return NULL; } -/** - * tracehook_report_exec - a successful exec was completed - * @fmt: &struct linux_binfmt that performed the exec - * @bprm: &struct linux_binprm containing exec details - * @regs: user-mode register state - * - * An exec just completed, we are shortly going to return to user mode. - * The freshly initialized register state can be seen and changed in @regs. - * The name, file and other pointers in @bprm are still on hand to be - * inspected, but will be freed as soon as this returns. - * - * Called with no locks, but with some kernel resources held live - * and a reference on @fmt->module. - */ -static inline void tracehook_report_exec(struct linux_binfmt *fmt, - struct linux_binprm *bprm, - struct pt_regs *regs) -{ - ptrace_event(PTRACE_EVENT_EXEC, 0); -} - -/** - * tracehook_report_exit - task has begun to exit - * @exit_code: pointer to value destined for @current->exit_code - * - * @exit_code points to the value passed to do_exit(), which tracing - * might change here. This is almost the first thing in do_exit(), - * before freeing any resources or setting the %PF_EXITING flag. - * - * Called with no locks held. - */ -static inline void tracehook_report_exit(long *exit_code) -{ - ptrace_event(PTRACE_EVENT_EXIT, *exit_code); -} - /** * tracehook_prepare_clone - prepare for new child to be cloned * @clone_flags: %CLONE_* flags from clone/fork/vfork system call @@ -319,52 +268,6 @@ static inline void tracehook_report_clone_complete(int trace, ptrace_event(trace, pid); } -/** - * tracehook_report_vfork_done - vfork parent's child has exited or exec'd - * @child: child task, already running - * @pid: new child's PID in the parent's namespace - * - * Called after a %CLONE_VFORK parent has waited for the child to complete. - * The clone/vfork system call will return immediately after this. - * The @child pointer may be invalid if a self-reaping child died and - * tracehook_report_clone() took no action to prevent it from self-reaping. - * - * Called with no locks held. - */ -static inline void tracehook_report_vfork_done(struct task_struct *child, - pid_t pid) -{ - ptrace_event(PTRACE_EVENT_VFORK_DONE, pid); -} - -/** - * tracehook_prepare_release_task - task is being reaped, clean up tracing - * @task: task in %EXIT_DEAD state - * - * This is called in release_task() just before @task gets finally reaped - * and freed. This would be the ideal place to remove and clean up any - * tracing-related state for @task. - * - * Called with no locks held. - */ -static inline void tracehook_prepare_release_task(struct task_struct *task) -{ -} - -/** - * tracehook_finish_release_task - final tracing clean-up - * @task: task in %EXIT_DEAD state - * - * This is called in release_task() when @task is being in the middle of - * being reaped. After this, there must be no tracing entanglements. - * - * Called with write_lock_irq(&tasklist_lock) held. - */ -static inline void tracehook_finish_release_task(struct task_struct *task) -{ - ptrace_release_task(task); -} - /** * tracehook_signal_handler - signal handler setup is complete * @sig: number of signal being delivered @@ -388,41 +291,6 @@ static inline void tracehook_signal_handler(int sig, siginfo_t *info, ptrace_notify(SIGTRAP); } -/** - * tracehook_consider_ignored_signal - suppress short-circuit of ignored signal - * @task: task receiving the signal - * @sig: signal number being sent - * - * Return zero iff tracing doesn't care to examine this ignored signal, - * so it can short-circuit normal delivery and never even get queued. - * - * Called with @task->sighand->siglock held. - */ -static inline int tracehook_consider_ignored_signal(struct task_struct *task, - int sig) -{ - return (task->ptrace & PT_PTRACED) != 0; -} - -/** - * tracehook_consider_fatal_signal - suppress special handling of fatal signal - * @task: task receiving the signal - * @sig: signal number being sent - * - * Return nonzero to prevent special handling of this termination signal. - * Normally handler for signal is %SIG_DFL. It can be %SIG_IGN if @sig is - * ignored, in which case force_sig() is about to reset it to %SIG_DFL. - * When this returns zero, this signal might cause a quick termination - * that does not give the debugger a chance to intercept the signal. - * - * Called with or without @task->sighand->siglock held. - */ -static inline int tracehook_consider_fatal_signal(struct task_struct *task, - int sig) -{ - return (task->ptrace & PT_PTRACED) != 0; -} - #define DEATH_REAP -1 #define DEATH_DELAYED_GROUP_LEADER -2 @@ -457,30 +325,6 @@ static inline int tracehook_notify_death(struct task_struct *task, return task->ptrace ? SIGCHLD : DEATH_DELAYED_GROUP_LEADER; } -/** - * tracehook_report_death - task is dead and ready to be reaped - * @task: @current task now exiting - * @signal: return value from tracheook_notify_death() - * @death_cookie: value passed back from tracehook_notify_death() - * @group_dead: nonzero if this was the last thread in the group to die - * - * Thread has just become a zombie or is about to self-reap. If positive, - * @signal is the signal number just sent to the parent (usually %SIGCHLD). - * If @signal is %DEATH_REAP, this thread will self-reap. If @signal is - * %DEATH_DELAYED_GROUP_LEADER, this is a delayed_group_leader() zombie. - * The @death_cookie was passed back by tracehook_notify_death(). - * - * If normal reaping is not inhibited, @task->exit_state might be changing - * in parallel. - * - * Called without locks. - */ -static inline void tracehook_report_death(struct task_struct *task, - int signal, void *death_cookie, - int group_dead) -{ -} - #ifdef TIF_NOTIFY_RESUME /** * set_notify_resume - cause tracehook_notify_resume() to be called diff --git a/kernel/exit.c b/kernel/exit.c index e5cc05644609..d49134a7f250 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -169,7 +169,6 @@ void release_task(struct task_struct * p) struct task_struct *leader; int zap_leader; repeat: - tracehook_prepare_release_task(p); /* don't need to get the RCU readlock here - the process is dead and * can't be modifying its own credentials. But shut RCU-lockdep up */ rcu_read_lock(); @@ -179,7 +178,7 @@ repeat: proc_flush_task(p); write_lock_irq(&tasklist_lock); - tracehook_finish_release_task(p); + ptrace_release_task(p); __exit_signal(p); /* @@ -868,8 +867,6 @@ static void exit_notify(struct task_struct *tsk, int group_dead) wake_up_process(tsk->signal->group_exit_task); write_unlock_irq(&tasklist_lock); - tracehook_report_death(tsk, signal, cookie, group_dead); - /* If the process is dead, release it - nobody will wait for it */ if (signal == DEATH_REAP) release_task(tsk); @@ -924,7 +921,7 @@ NORET_TYPE void do_exit(long code) */ set_fs(USER_DS); - tracehook_report_exit(&code); + ptrace_event(PTRACE_EVENT_EXIT, code); validate_creds_for_do_exit(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index 0276c30401a0..d4f0dff9d617 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1527,7 +1527,7 @@ long do_fork(unsigned long clone_flags, freezer_do_not_count(); wait_for_completion(&vfork); freezer_count(); - tracehook_report_vfork_done(p, nr); + ptrace_event(PTRACE_EVENT_VFORK_DONE, nr); } } else { nr = PTR_ERR(p); diff --git a/kernel/signal.c b/kernel/signal.c index 0f3370872506..1550aee34f42 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -87,7 +87,7 @@ static int sig_ignored(struct task_struct *t, int sig, int from_ancestor_ns) /* * Tracers may want to know about even ignored signals. */ - return !tracehook_consider_ignored_signal(t, sig); + return !t->ptrace; } /* @@ -493,7 +493,8 @@ int unhandled_signal(struct task_struct *tsk, int sig) return 1; if (handler != SIG_IGN && handler != SIG_DFL) return 0; - return !tracehook_consider_fatal_signal(tsk, sig); + /* if ptraced, let the tracer determine */ + return !tsk->ptrace; } /* @@ -981,8 +982,7 @@ static void complete_signal(int sig, struct task_struct *p, int group) if (sig_fatal(p, sig) && !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) && !sigismember(&t->real_blocked, sig) && - (sig == SIGKILL || - !tracehook_consider_fatal_signal(t, sig))) { + (sig == SIGKILL || !t->ptrace)) { /* * This signal will be fatal to the whole group. */ diff --git a/mm/nommu.c b/mm/nommu.c index 1fd0c51b10a6..54ae707bdae8 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -1087,7 +1086,7 @@ static unsigned long determine_vm_flags(struct file *file, * it's being traced - otherwise breakpoints set in it may interfere * with another untraced process */ - if ((flags & MAP_PRIVATE) && tracehook_expect_breakpoints(current)) + if ((flags & MAP_PRIVATE) && current->ptrace) vm_flags &= ~VM_MAYSHARE; return vm_flags; -- cgit v1.2.3 From 4b9d33e6d83cc05a8005a8f9a8b9677fa0f53626 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 17 Jun 2011 16:50:38 +0200 Subject: ptrace: kill clone/exec tracehooks At this point, tracehooks aren't useful to mainline kernel and mostly just add an extra layer of obfuscation. Although they have comments, without actual in-kernel users, it is difficult to tell what are their assumptions and they're actually trying to achieve. To mainline kernel, they just aren't worth keeping around. This patch kills the following clone and exec related tracehooks. tracehook_prepare_clone() tracehook_finish_clone() tracehook_report_clone() tracehook_report_clone_complete() tracehook_unsafe_exec() The changes are mostly trivial - logic is moved to the caller and comments are merged and adjusted appropriately. The only exception is in check_unsafe_exec() where LSM_UNSAFE_PTRACE* are OR'd to bprm->unsafe instead of setting it, which produces the same result as the field is always zero on entry. It also tests p->ptrace instead of (p->ptrace & PT_PTRACED) for consistency, which also gives the same result. This doesn't introduce any behavior change. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Oleg Nesterov --- fs/exec.c | 7 ++- include/linux/tracehook.h | 121 ---------------------------------------------- kernel/fork.c | 41 ++++++++++++---- 3 files changed, 38 insertions(+), 131 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index b37030d0a50b..8dca45b0dae8 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1224,7 +1224,12 @@ int check_unsafe_exec(struct linux_binprm *bprm) unsigned n_fs; int res = 0; - bprm->unsafe = tracehook_unsafe_exec(p); + if (p->ptrace) { + if (p->ptrace & PT_PTRACE_CAP) + bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP; + else + bprm->unsafe |= LSM_UNSAFE_PTRACE; + } n_fs = 1; spin_lock(&p->fs->lock); diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 8b06d4f2b814..bcc4ca762aee 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -129,27 +129,6 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step) ptrace_report_syscall(regs); } -/** - * tracehook_unsafe_exec - check for exec declared unsafe due to tracing - * @task: current task doing exec - * - * Return %LSM_UNSAFE_* bits applied to an exec because of tracing. - * - * @task->signal->cred_guard_mutex is held by the caller through the do_execve(). - */ -static inline int tracehook_unsafe_exec(struct task_struct *task) -{ - int unsafe = 0; - int ptrace = task->ptrace; - if (ptrace & PT_PTRACED) { - if (ptrace & PT_PTRACE_CAP) - unsafe |= LSM_UNSAFE_PTRACE_CAP; - else - unsafe |= LSM_UNSAFE_PTRACE; - } - return unsafe; -} - /** * tracehook_tracer_task - return the task that is tracing the given task * @tsk: task to consider @@ -168,106 +147,6 @@ static inline struct task_struct *tracehook_tracer_task(struct task_struct *tsk) return NULL; } -/** - * tracehook_prepare_clone - prepare for new child to be cloned - * @clone_flags: %CLONE_* flags from clone/fork/vfork system call - * - * This is called before a new user task is to be cloned. - * Its return value will be passed to tracehook_finish_clone(). - * - * Called with no locks held. - */ -static inline int tracehook_prepare_clone(unsigned clone_flags) -{ - int event = 0; - - if (clone_flags & CLONE_UNTRACED) - return 0; - - if (clone_flags & CLONE_VFORK) - event = PTRACE_EVENT_VFORK; - else if ((clone_flags & CSIGNAL) != SIGCHLD) - event = PTRACE_EVENT_CLONE; - else - event = PTRACE_EVENT_FORK; - - return ptrace_event_enabled(current, event) ? event : 0; -} - -/** - * tracehook_finish_clone - new child created and being attached - * @child: new child task - * @clone_flags: %CLONE_* flags from clone/fork/vfork system call - * @trace: return value from tracehook_prepare_clone() - * - * This is called immediately after adding @child to its parent's children list. - * The @trace value is that returned by tracehook_prepare_clone(). - * - * Called with current's siglock and write_lock_irq(&tasklist_lock) held. - */ -static inline void tracehook_finish_clone(struct task_struct *child, - unsigned long clone_flags, int trace) -{ - ptrace_init_task(child, (clone_flags & CLONE_PTRACE) || trace); -} - -/** - * tracehook_report_clone - in parent, new child is about to start running - * @regs: parent's user register state - * @clone_flags: flags from parent's system call - * @pid: new child's PID in the parent's namespace - * @child: new child task - * - * Called after a child is set up, but before it has been started running. - * This is not a good place to block, because the child has not started - * yet. Suspend the child here if desired, and then block in - * tracehook_report_clone_complete(). This must prevent the child from - * self-reaping if tracehook_report_clone_complete() uses the @child - * pointer; otherwise it might have died and been released by the time - * tracehook_report_clone_complete() is called. - * - * Called with no locks held, but the child cannot run until this returns. - */ -static inline void tracehook_report_clone(struct pt_regs *regs, - unsigned long clone_flags, - pid_t pid, struct task_struct *child) -{ - if (unlikely(child->ptrace)) { - /* - * It doesn't matter who attached/attaching to this - * task, the pending SIGSTOP is right in any case. - */ - sigaddset(&child->pending.signal, SIGSTOP); - set_tsk_thread_flag(child, TIF_SIGPENDING); - } -} - -/** - * tracehook_report_clone_complete - new child is running - * @trace: return value from tracehook_prepare_clone() - * @regs: parent's user register state - * @clone_flags: flags from parent's system call - * @pid: new child's PID in the parent's namespace - * @child: child task, already running - * - * This is called just after the child has started running. This is - * just before the clone/fork syscall returns, or blocks for vfork - * child completion if @clone_flags has the %CLONE_VFORK bit set. - * The @child pointer may be invalid if a self-reaping child died and - * tracehook_report_clone() took no action to prevent it from self-reaping. - * - * Called with no locks held. - */ -static inline void tracehook_report_clone_complete(int trace, - struct pt_regs *regs, - unsigned long clone_flags, - pid_t pid, - struct task_struct *child) -{ - if (unlikely(trace)) - ptrace_event(trace, pid); -} - /** * tracehook_signal_handler - signal handler setup is complete * @sig: number of signal being delivered diff --git a/kernel/fork.c b/kernel/fork.c index d4f0dff9d617..3c72a5b321a7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1340,7 +1340,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, } if (likely(p->pid)) { - tracehook_finish_clone(p, clone_flags, trace); + ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); if (thread_group_leader(p)) { if (is_child_reaper(pid)) @@ -1481,10 +1481,22 @@ long do_fork(unsigned long clone_flags, } /* - * When called from kernel_thread, don't do user tracing stuff. + * Determine whether and which event to report to ptracer. When + * called from kernel_thread or CLONE_UNTRACED is explicitly + * requested, no event is reported; otherwise, report if the event + * for the type of forking is enabled. */ - if (likely(user_mode(regs))) - trace = tracehook_prepare_clone(clone_flags); + if (likely(user_mode(regs)) && !(clone_flags & CLONE_UNTRACED)) { + if (clone_flags & CLONE_VFORK) + trace = PTRACE_EVENT_VFORK; + else if ((clone_flags & CSIGNAL) != SIGCHLD) + trace = PTRACE_EVENT_CLONE; + else + trace = PTRACE_EVENT_FORK; + + if (likely(!ptrace_event_enabled(current, trace))) + trace = 0; + } p = copy_process(clone_flags, stack_start, regs, stack_size, child_tidptr, NULL, trace); @@ -1508,20 +1520,31 @@ long do_fork(unsigned long clone_flags, } audit_finish_fork(p); - tracehook_report_clone(regs, clone_flags, nr, p); + + /* + * Child is ready but hasn't started running yet. Queue + * SIGSTOP if it's gonna be ptraced - it doesn't matter who + * attached/attaching to this task, the pending SIGSTOP is + * right in any case. + */ + if (unlikely(p->ptrace)) { + sigaddset(&p->pending.signal, SIGSTOP); + set_tsk_thread_flag(p, TIF_SIGPENDING); + } /* * We set PF_STARTING at creation in case tracing wants to * use this to distinguish a fully live task from one that - * hasn't gotten to tracehook_report_clone() yet. Now we - * clear it and set the child going. + * hasn't finished SIGSTOP raising yet. Now we clear it + * and set the child going. */ p->flags &= ~PF_STARTING; wake_up_new_task(p); - tracehook_report_clone_complete(trace, regs, - clone_flags, nr, p); + /* forking complete and child started to run, tell ptracer */ + if (unlikely(trace)) + ptrace_event(trace, nr); if (clone_flags & CLONE_VFORK) { freezer_do_not_count(); -- cgit v1.2.3 From 06d984737bac0545fe20bb5447ee488b95adb531 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 17 Jun 2011 16:50:40 +0200 Subject: ptrace: s/tracehook_tracer_task()/ptrace_parent()/ tracehook.h is on the way out. Rename tracehook_tracer_task() to ptrace_parent() and move it from tracehook.h to ptrace.h. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Cc: John Johansen Cc: Stephen Smalley Signed-off-by: Oleg Nesterov --- fs/proc/array.c | 2 +- fs/proc/base.c | 2 +- include/linux/ptrace.h | 18 ++++++++++++++++++ include/linux/tracehook.h | 18 ------------------ security/apparmor/domain.c | 2 +- security/selinux/hooks.c | 4 ++-- 6 files changed, 23 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/proc/array.c b/fs/proc/array.c index 9b45ee84fbcc..3a1dafd228d1 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -172,7 +172,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; tpid = 0; if (pid_alive(p)) { - struct task_struct *tracer = tracehook_tracer_task(p); + struct task_struct *tracer = ptrace_parent(p); if (tracer) tpid = task_pid_nr_ns(tracer, ns); } diff --git a/fs/proc/base.c b/fs/proc/base.c index 14def991d9dd..c883dad74b9a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -216,7 +216,7 @@ static struct mm_struct *__check_mem_permission(struct task_struct *task) if (task_is_stopped_or_traced(task)) { int match; rcu_read_lock(); - match = (tracehook_tracer_task(task) == current); + match = (ptrace_parent(task) == current); rcu_read_unlock(); if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) return mm; diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index b546fd6c3506..bb157bdd0c55 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -150,6 +150,24 @@ int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, unsigned long data); +/** + * ptrace_parent - return the task that is tracing the given task + * @task: task to consider + * + * Returns %NULL if no one is tracing @task, or the &struct task_struct + * pointer to its tracer. + * + * Must called under rcu_read_lock(). The pointer returned might be kept + * live only by RCU. During exec, this may be called with task_lock() held + * on @task, still held from when check_unsafe_exec() was called. + */ +static inline struct task_struct *ptrace_parent(struct task_struct *task) +{ + if (unlikely(task->ptrace)) + return rcu_dereference(task->parent); + return NULL; +} + /** * ptrace_event_enabled - test whether a ptrace event is enabled * @task: ptracee of interest diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index bcc4ca762aee..7a1bd12aeffa 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -129,24 +129,6 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step) ptrace_report_syscall(regs); } -/** - * tracehook_tracer_task - return the task that is tracing the given task - * @tsk: task to consider - * - * Returns NULL if no one is tracing @task, or the &struct task_struct - * pointer to its tracer. - * - * Must called under rcu_read_lock(). The pointer returned might be kept - * live only by RCU. During exec, this may be called with task_lock() - * held on @task, still held from when tracehook_unsafe_exec() was called. - */ -static inline struct task_struct *tracehook_tracer_task(struct task_struct *tsk) -{ - if (tsk->ptrace & PT_PTRACED) - return rcu_dereference(tsk->parent); - return NULL; -} - /** * tracehook_signal_handler - signal handler setup is complete * @sig: number of signal being delivered diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index c825c6e0b636..7312bf9f7afc 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -67,7 +67,7 @@ static int may_change_ptraced_domain(struct task_struct *task, int error = 0; rcu_read_lock(); - tracer = tracehook_tracer_task(task); + tracer = ptrace_parent(task); if (tracer) { /* released below */ cred = get_task_cred(tracer); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index a0d38459d650..fc07d18ed6fc 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2048,7 +2048,7 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm) u32 ptsid = 0; rcu_read_lock(); - tracer = tracehook_tracer_task(current); + tracer = ptrace_parent(current); if (likely(tracer != NULL)) { sec = __task_cred(tracer)->security; ptsid = sec->sid; @@ -5314,7 +5314,7 @@ static int selinux_setprocattr(struct task_struct *p, Otherwise, leave SID unchanged and fail. */ ptsid = 0; task_lock(p); - tracer = tracehook_tracer_task(p); + tracer = ptrace_parent(p); if (tracer) ptsid = task_sid(tracer); task_unlock(p); -- cgit v1.2.3 From 087806b1281563e4ae7a5bce3155f894af5f4118 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 22 Jun 2011 23:10:26 +0200 Subject: redefine thread_group_leader() as exit_signal >= 0 Change de_thread() to set old_leader->exit_signal = -1. This is good for the consistency, it is no longer the leader and all sub-threads have exit_signal = -1 set by copy_process(CLONE_THREAD). And this allows us to micro-optimize thread_group_leader(), it can simply check exit_signal >= 0. This also makes sense because we should move ->group_leader from task_struct to signal_struct. Signed-off-by: Oleg Nesterov Reviewed-by: Tejun Heo --- fs/exec.c | 1 + include/linux/sched.h | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 8dca45b0dae8..c3d517bfdd27 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -963,6 +963,7 @@ static int de_thread(struct task_struct *tsk) leader->group_leader = tsk; tsk->exit_signal = SIGCHLD; + leader->exit_signal = -1; BUG_ON(leader->exit_state != EXIT_ZOMBIE); leader->exit_state = EXIT_DEAD; diff --git a/include/linux/sched.h b/include/linux/sched.h index 39acee2c8929..b38ed51d5c64 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2284,8 +2284,10 @@ static inline int get_nr_threads(struct task_struct *tsk) return tsk->signal->nr_threads; } -/* de_thread depends on thread_group_leader not being a pid based check */ -#define thread_group_leader(p) (p == p->group_leader) +static inline bool thread_group_leader(struct task_struct *p) +{ + return p->exit_signal >= 0; +} /* Do to the insanities of de_thread it is possible for a process * to have the pid of the thread group leader without actually being -- cgit v1.2.3 From bb188d7e64deb0e9cf13a99f44ae0065de5352d6 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 29 Jun 2011 04:13:39 +0200 Subject: ptrace: make former thread ID available via PTRACE_GETEVENTMSG after PTRACE_EVENT_EXEC stop When multithreaded program execs under ptrace, all traced threads report WIFEXITED status, except for thread group leader and the thread which execs. Unless tracer tracks thread group relationship between tracees, which is a nontrivial task, it will not detect that execed thread no longer exists. This patch allows tracer to figure out which thread performed this exec, by requesting PTRACE_GETEVENTMSG in PTRACE_EVENT_EXEC stop. Another, samller problem which is solved by this patch is that tracer now can figure out which of the several concurrent execs in multithreaded program succeeded. Signed-off-by: Denys Vlasenko Signed-off-by: Oleg Nesterov --- fs/exec.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index c3d517bfdd27..b08367abf30e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1358,6 +1358,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) unsigned int depth = bprm->recursion_depth; int try,retval; struct linux_binfmt *fmt; + pid_t old_pid; retval = security_bprm_check(bprm); if (retval) @@ -1371,6 +1372,11 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) if (retval) return retval; + /* Need to fetch pid before load_binary changes it */ + rcu_read_lock(); + old_pid = task_pid_nr_ns(current, task_active_pid_ns(current->parent)); + rcu_read_unlock(); + retval = -ENOENT; for (try=0; try<2; try++) { read_lock(&binfmt_lock); @@ -1390,7 +1396,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) bprm->recursion_depth = depth; if (retval >= 0) { if (depth == 0) - ptrace_event(PTRACE_EVENT_EXEC, 0); + ptrace_event(PTRACE_EVENT_EXEC, + old_pid); put_binfmt(fmt); allow_write_access(bprm->file); if (bprm->file) -- cgit v1.2.3 From eac1b5e57d7abc836e78fd3fbcf77dbeed01edc9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 21 Jul 2011 20:00:43 +0200 Subject: ptrace: do_wait(traced_leader_killed_by_mt_exec) can block forever Test-case: void *tfunc(void *arg) { execvp("true", NULL); return NULL; } int main(void) { int pid; if (fork()) { pthread_t t; kill(getpid(), SIGSTOP); pthread_create(&t, NULL, tfunc, NULL); for (;;) pause(); } pid = getppid(); assert(ptrace(PTRACE_ATTACH, pid, 0,0) == 0); while (wait(NULL) > 0) ptrace(PTRACE_CONT, pid, 0,0); return 0; } It is racy, exit_notify() does __wake_up_parent() too. But in the likely case it triggers the problem: de_thread() does release_task() and the old leader goes away without the notification, the tracer sleeps in do_wait() without children/tracees. Change de_thread() to do __wake_up_parent(traced_leader->parent). Since it is already EXIT_DEAD we can do this without ptrace_unlink(), EXIT_DEAD threads do not exist from do_wait's pov. Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo --- fs/exec.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index b08367abf30e..d219541db06c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -967,6 +967,14 @@ static int de_thread(struct task_struct *tsk) BUG_ON(leader->exit_state != EXIT_ZOMBIE); leader->exit_state = EXIT_DEAD; + + /* + * We are going to release_task()->ptrace_unlink() silently, + * the tracer can sleep in do_wait(). EXIT_DEAD guarantees + * the tracer wont't block again waiting for this thread. + */ + if (unlikely(leader->ptrace)) + __wake_up_parent(leader, leader->parent); write_unlock_irq(&tasklist_lock); release_task(leader); -- cgit v1.2.3