From c91368c4889a0ee5dd06552adbb50ae54f5096fd Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 20 Dec 2012 14:11:30 -0500 Subject: uprobes: remove redundant check We checked for uprobe==NULL earlier, no need to redo that. Signed-off-by: Sasha Levin Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1356030701-16284-22-git-send-email-sasha.levin@oracle.com Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/uprobes.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index dea7acfbb071..30ea9a4f4ab4 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -901,8 +901,7 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume } mutex_unlock(uprobes_hash(inode)); - if (uprobe) - put_uprobe(uprobe); + put_uprobe(uprobe); } static struct rb_node * -- cgit v1.2.3 From bbc33d05930f870ea049eae5ed980f8b827d0813 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 21 Nov 2012 16:55:38 +0100 Subject: uprobes: Move __set_bit(UPROBE_SKIP_SSTEP) into alloc_uprobe() Cosmetic. __set_bit(UPROBE_SKIP_SSTEP) is the part of initialization, it is not clear why it is set in insert_uprobe(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 30ea9a4f4ab4..afbab2cb2742 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -430,9 +430,6 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe) u = __insert_uprobe(uprobe); spin_unlock(&uprobes_treelock); - /* For now assume that the instruction need not be single-stepped */ - __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags); - return u; } @@ -454,6 +451,8 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) uprobe->offset = offset; init_rwsem(&uprobe->consumer_rwsem); mutex_init(&uprobe->copy_mutex); + /* For now assume that the instruction need not be single-stepped */ + __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags); /* add to uprobes_tree, sorted on inode:offset */ cur_uprobe = insert_uprobe(uprobe); -- cgit v1.2.3 From f0744af7d0fde190674064c54e2ff60b34ac71fe Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 21 Nov 2012 18:01:43 +0100 Subject: uprobes: Kill the pointless inode/uc checks in register/unregister register/unregister verifies that inode/uc != NULL. For what? This really looks like "hide the potential problem", the caller should pass the valid data. register() also checks uc->next == NULL, probably to prevent the double-register but the caller can do other stupid/wrong things. If we do this check, then we should document that uc->next should be cleared before register() and add BUG_ON(). Also add the small comment about the i_size_read() check. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index afbab2cb2742..a39d8163b713 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -844,9 +844,7 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * struct uprobe *uprobe; int ret; - if (!inode || !uc || uc->next) - return -EINVAL; - + /* Racy, just to catch the obvious mistakes */ if (offset > i_size_read(inode)) return -EINVAL; @@ -883,9 +881,6 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume { struct uprobe *uprobe; - if (!inode || !uc) - return; - uprobe = find_uprobe(inode, offset); if (!uprobe) return; -- cgit v1.2.3 From fe20d71f25400cccc8bffef865f79250be7dbc81 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 21 Nov 2012 17:32:30 +0100 Subject: uprobes: Kill uprobe_consumer->filter() uprobe_consumer->filter() is pointless in its current form, kill it. We will add it back, but with the different signature/semantics. Perhaps we will even re-introduce the callsite in handler_chain(), but not to just skip uc->handler(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 5 ----- kernel/events/uprobes.c | 6 ++---- kernel/trace/trace_uprobe.c | 1 - 3 files changed, 2 insertions(+), 10 deletions(-) (limited to 'kernel/events') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 4f628a6fc5b4..83742b91ff73 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -37,11 +37,6 @@ struct inode; struct uprobe_consumer { int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); - /* - * filter is optional; If a filter exists, handler is run - * if and only if filter returns true. - */ - bool (*filter)(struct uprobe_consumer *self, struct task_struct *task); struct uprobe_consumer *next; }; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index a39d8163b713..5cbebac27c01 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -477,10 +477,8 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) return; down_read(&uprobe->consumer_rwsem); - for (uc = uprobe->consumers; uc; uc = uc->next) { - if (!uc->filter || uc->filter(uc, current)) - uc->handler(uc, regs); - } + for (uc = uprobe->consumers; uc; uc = uc->next) + uc->handler(uc, regs); up_read(&uprobe->consumer_rwsem); } diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 87b6db4ccbc5..e668024773d4 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -550,7 +550,6 @@ static int probe_event_enable(struct trace_uprobe *tu, int flag) return -EINTR; utc->cons.handler = uprobe_dispatcher; - utc->cons.filter = NULL; ret = uprobe_register(tu->inode, tu->offset, &utc->cons); if (ret) { kfree(utc); -- cgit v1.2.3 From 63633cbf82840d972248f11d2122b261d0d4779a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 22 Nov 2012 18:30:15 +0100 Subject: uprobes: Introduce filter_chain() Add the new helper filter_chain(). Currently it is only placeholder, the comment explains what is should do. We will change it later to consult every consumer to decide whether we need to install the swbp. Until then it works as if any consumer returns true, this matches the current behavior. Change install_breakpoint() to call filter_chain() instead of checking uprobe->consumers != NULL. We obviously need this, and this equally closes the race with _unregister(). Change remove_breakpoint() to call this helper too. Currently this is pointless because remove_breakpoint() is only called when the last consumer goes away, but we will change this. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 5cbebac27c01..c38bf37d0aca 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -614,6 +614,18 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, return ret; } +static bool filter_chain(struct uprobe *uprobe) +{ + /* + * TODO: + * for_each_consumer(uc) + * if (uc->filter(...)) + * return true; + * return false; + */ + return uprobe->consumers != NULL; +} + static int install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long vaddr) @@ -624,11 +636,10 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, /* * If probe is being deleted, unregister thread could be done with * the vma-rmap-walk through. Adding a probe now can be fatal since - * nobody will be able to cleanup. Also we could be from fork or - * mremap path, where the probe might have already been inserted. - * Hence behave as if probe already existed. + * nobody will be able to cleanup. But in this case filter_chain() + * must return false, all consumers have gone away. */ - if (!uprobe->consumers) + if (!filter_chain(uprobe)) return 0; ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr); @@ -655,10 +666,12 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, static int remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) { - /* can happen if uprobe_register() fails */ if (!test_bit(MMF_HAS_UPROBES, &mm->flags)) return 0; + if (filter_chain(uprobe)) + return 0; + set_bit(MMF_RECALC_UPROBES, &mm->flags); return set_orig_insn(&uprobe->arch, mm, vaddr); } @@ -1382,6 +1395,7 @@ static void mmf_recalc_uprobes(struct mm_struct *mm) * This is not strictly accurate, we can race with * uprobe_unregister() and see the already removed * uprobe if delete_uprobe() was not yet called. + * Or this uprobe can be filtered out. */ if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end)) return; -- cgit v1.2.3 From 04aab9b2006bbdeff78dc162f206fdfebeca97d9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 23 Nov 2012 19:43:50 +0100 Subject: uprobes: _unregister() should always do register_for_each_vma(false) uprobe_unregister() removes the breakpoints only if the last consumer goes away. To support the filtering it should do this every time, we want to remove the breakpoints which nobody else want to keep. Note: given that filter_chain() is not actually implemented, this patch itself doesn't change the behaviour yet, register_for_each_vma(false) is a heavy "nop" unless there are no more consumers. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index c38bf37d0aca..940199084639 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -825,12 +825,20 @@ static int __uprobe_register(struct uprobe *uprobe) return register_for_each_vma(uprobe, true); } -static void __uprobe_unregister(struct uprobe *uprobe) +static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc) { - if (!register_for_each_vma(uprobe, false)) - delete_uprobe(uprobe); + int err; + + if (!consumer_del(uprobe, uc)) /* WARN? */ + return; - /* TODO : cant unregister? schedule a worker thread */ + err = register_for_each_vma(uprobe, false); + if (!uprobe->consumers) { + clear_bit(UPROBE_RUN_HANDLER, &uprobe->flags); + /* TODO : cant unregister? schedule a worker thread */ + if (!err) + delete_uprobe(uprobe); + } } /* @@ -868,8 +876,7 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * } else if (!consumer_add(uprobe, uc)) { ret = __uprobe_register(uprobe); if (ret) { - uprobe->consumers = NULL; - __uprobe_unregister(uprobe); + __uprobe_unregister(uprobe, uc); } else { set_bit(UPROBE_RUN_HANDLER, &uprobe->flags); } @@ -897,14 +904,7 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume return; mutex_lock(uprobes_hash(inode)); - - if (consumer_del(uprobe, uc)) { - if (!uprobe->consumers) { - __uprobe_unregister(uprobe); - clear_bit(UPROBE_RUN_HANDLER, &uprobe->flags); - } - } - + __uprobe_unregister(uprobe, uc); mutex_unlock(uprobes_hash(inode)); put_uprobe(uprobe); } -- cgit v1.2.3 From 9a98e03cc145c994da824dac7602334f50feb670 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 23 Nov 2012 20:15:17 +0100 Subject: uprobes: _register() should always do register_for_each_vma(true) To support the filtering uprobe_register() should do register_for_each_vma(true) every time the new consumer comes, we need to install the previously nacked breakpoints. Note: - uprobes_mutex[] should die, what it actually protects is alloc_uprobe(). - UPROBE_RUN_HANDLER should die too, obviously it can't work unless uprobe has a single consumer. The consumer should serialize with _register/_unregister itself. Or this flag should live in uprobe_consumer->state. - Perhaps we can do some optimizations later. For example, if filter_chain() never returns false uprobe can record this fact and avoid the unnecessary register_for_each_vma(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 940199084639..d1d1394bca8b 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -482,16 +482,12 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) up_read(&uprobe->consumer_rwsem); } -/* Returns the previous consumer */ -static struct uprobe_consumer * -consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) +static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) { down_write(&uprobe->consumer_rwsem); uc->next = uprobe->consumers; uprobe->consumers = uc; up_write(&uprobe->consumer_rwsem); - - return uc->next; } /* @@ -820,9 +816,15 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) return err; } -static int __uprobe_register(struct uprobe *uprobe) +static int __uprobe_register(struct uprobe *uprobe, struct uprobe_consumer *uc) { - return register_for_each_vma(uprobe, true); + int err; + + consumer_add(uprobe, uc); + err = register_for_each_vma(uprobe, true); + if (!err) /* TODO: pointless unless the first consumer */ + set_bit(UPROBE_RUN_HANDLER, &uprobe->flags); + return err; } static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc) @@ -867,21 +869,14 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * if (offset > i_size_read(inode)) return -EINVAL; - ret = 0; + ret = -ENOMEM; mutex_lock(uprobes_hash(inode)); uprobe = alloc_uprobe(inode, offset); - - if (!uprobe) { - ret = -ENOMEM; - } else if (!consumer_add(uprobe, uc)) { - ret = __uprobe_register(uprobe); - if (ret) { + if (uprobe) { + ret = __uprobe_register(uprobe, uc); + if (ret) __uprobe_unregister(uprobe, uc); - } else { - set_bit(UPROBE_RUN_HANDLER, &uprobe->flags); - } } - mutex_unlock(uprobes_hash(inode)); if (uprobe) put_uprobe(uprobe); -- cgit v1.2.3 From e591c8d78e49e6206935cf31c4d2b603bbb29166 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 24 Nov 2012 17:29:40 +0100 Subject: uprobes: Introduce uprobe->register_rwsem Introduce uprobe->register_rwsem. It is taken for writing around __uprobe_register/unregister. Change handler_chain() to use this sem rather than consumer_rwsem. The main reason for this change is that we have the nasty problem with mmap_sem/consumer_rwsem dependency. filter_chain() needs to protect uprobe->consumers like handler_chain(), but they can not use the same lock. filter_chain() can be called under ->mmap_sem (currently this is always true), but we want to allow ->handler() to play with the probed task's memory, and this needs ->mmap_sem. Alternatively we could use srcu, but synchronize_srcu() is very slow and ->register_rwsem allows us to do more. In particular, we can teach handler_chain() to do remove_breakpoint() if this bp is "nacked" by all consumers, we know that we can't race with the new consumer which does uprobe_register(). See also the next patches. uprobes_mutex[] is almost ready to die. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index d1d1394bca8b..61d0fa6b5012 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -91,6 +91,7 @@ static atomic_t uprobe_events = ATOMIC_INIT(0); struct uprobe { struct rb_node rb_node; /* node in the rb tree */ atomic_t ref; + struct rw_semaphore register_rwsem; struct rw_semaphore consumer_rwsem; struct mutex copy_mutex; /* TODO: kill me and UPROBE_COPY_INSN */ struct list_head pending_list; @@ -449,6 +450,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) uprobe->inode = igrab(inode); uprobe->offset = offset; + init_rwsem(&uprobe->register_rwsem); init_rwsem(&uprobe->consumer_rwsem); mutex_init(&uprobe->copy_mutex); /* For now assume that the instruction need not be single-stepped */ @@ -476,10 +478,10 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) if (!test_bit(UPROBE_RUN_HANDLER, &uprobe->flags)) return; - down_read(&uprobe->consumer_rwsem); + down_read(&uprobe->register_rwsem); for (uc = uprobe->consumers; uc; uc = uc->next) uc->handler(uc, regs); - up_read(&uprobe->consumer_rwsem); + up_read(&uprobe->register_rwsem); } static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) @@ -873,9 +875,11 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * mutex_lock(uprobes_hash(inode)); uprobe = alloc_uprobe(inode, offset); if (uprobe) { + down_write(&uprobe->register_rwsem); ret = __uprobe_register(uprobe, uc); if (ret) __uprobe_unregister(uprobe, uc); + up_write(&uprobe->register_rwsem); } mutex_unlock(uprobes_hash(inode)); if (uprobe) @@ -899,7 +903,9 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume return; mutex_lock(uprobes_hash(inode)); + down_write(&uprobe->register_rwsem); __uprobe_unregister(uprobe, uc); + up_write(&uprobe->register_rwsem); mutex_unlock(uprobes_hash(inode)); put_uprobe(uprobe); } -- cgit v1.2.3 From 1ff6fee5e62c57d5923b805bb4206acb7953f16e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 24 Nov 2012 18:15:46 +0100 Subject: uprobes: Change filter_chain() to iterate ->consumers list Now that it safe to use ->consumer_rwsem under ->mmap_sem we can almost finish the implementation of filter_chain(). It still lacks the actual uc->filter(...) call but othewrwise it is ready, just it pretends that ->filter() always returns true. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 61d0fa6b5012..4d0452363686 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -614,14 +614,19 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, static bool filter_chain(struct uprobe *uprobe) { - /* - * TODO: - * for_each_consumer(uc) - * if (uc->filter(...)) - * return true; - * return false; - */ - return uprobe->consumers != NULL; + struct uprobe_consumer *uc; + bool ret = false; + + down_read(&uprobe->consumer_rwsem); + for (uc = uprobe->consumers; uc; uc = uc->next) { + /* TODO: ret = uc->filter(...) */ + ret = true; + if (ret) + break; + } + up_read(&uprobe->consumer_rwsem); + + return ret; } static int -- cgit v1.2.3 From bb929284be40cbbdb347690742557d708fd504a9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 24 Nov 2012 18:27:08 +0100 Subject: uprobes: Kill UPROBE_RUN_HANDLER flag Simply remove UPROBE_RUN_HANDLER and the corresponding code. It can only help if uprobe has a single consumer, and in fact it is no longer needed after handler_chain() was changed to use ->register_rwsem, we simply can not race with uprobe_register(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 4d0452363686..7ec2eb278634 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -83,10 +83,8 @@ static atomic_t uprobe_events = ATOMIC_INIT(0); /* Have a copy of original instruction */ #define UPROBE_COPY_INSN 0 -/* Dont run handlers when first register/ last unregister in progress*/ -#define UPROBE_RUN_HANDLER 1 /* Can skip singlestep */ -#define UPROBE_SKIP_SSTEP 2 +#define UPROBE_SKIP_SSTEP 1 struct uprobe { struct rb_node rb_node; /* node in the rb tree */ @@ -475,9 +473,6 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) { struct uprobe_consumer *uc; - if (!test_bit(UPROBE_RUN_HANDLER, &uprobe->flags)) - return; - down_read(&uprobe->register_rwsem); for (uc = uprobe->consumers; uc; uc = uc->next) uc->handler(uc, regs); @@ -825,13 +820,8 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) static int __uprobe_register(struct uprobe *uprobe, struct uprobe_consumer *uc) { - int err; - consumer_add(uprobe, uc); - err = register_for_each_vma(uprobe, true); - if (!err) /* TODO: pointless unless the first consumer */ - set_bit(UPROBE_RUN_HANDLER, &uprobe->flags); - return err; + return register_for_each_vma(uprobe, true); } static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc) @@ -842,12 +832,9 @@ static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *u return; err = register_for_each_vma(uprobe, false); - if (!uprobe->consumers) { - clear_bit(UPROBE_RUN_HANDLER, &uprobe->flags); - /* TODO : cant unregister? schedule a worker thread */ - if (!err) - delete_uprobe(uprobe); - } + /* TODO : cant unregister? schedule a worker thread */ + if (!uprobe->consumers && !err) + delete_uprobe(uprobe); } /* -- cgit v1.2.3 From d4d3ccc6d1eb74bd315d49a3829c5ad6c48d21b0 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 24 Nov 2012 18:51:34 +0100 Subject: uprobes: Kill uprobe->copy_mutex Now that ->register_rwsem is safe under ->mmap_sem we can kill ->copy_mutex and abuse down_write(&uprobe->consumer_rwsem). This makes prepare_uprobe() even more ugly, but we should kill it anyway. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 7ec2eb278634..40ced98b2bc8 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -91,7 +91,6 @@ struct uprobe { atomic_t ref; struct rw_semaphore register_rwsem; struct rw_semaphore consumer_rwsem; - struct mutex copy_mutex; /* TODO: kill me and UPROBE_COPY_INSN */ struct list_head pending_list; struct uprobe_consumer *consumers; struct inode *inode; /* Also hold a ref to inode */ @@ -450,7 +449,6 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) uprobe->offset = offset; init_rwsem(&uprobe->register_rwsem); init_rwsem(&uprobe->consumer_rwsem); - mutex_init(&uprobe->copy_mutex); /* For now assume that the instruction need not be single-stepped */ __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags); @@ -578,7 +576,8 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) return ret; - mutex_lock(&uprobe->copy_mutex); + /* TODO: move this into _register, until then we abuse this sem. */ + down_write(&uprobe->consumer_rwsem); if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) goto out; @@ -602,7 +601,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, set_bit(UPROBE_COPY_INSN, &uprobe->flags); out: - mutex_unlock(&uprobe->copy_mutex); + up_write(&uprobe->consumer_rwsem); return ret; } -- cgit v1.2.3 From 441f1eb7db8babe2b6b4bc805f023739dbb70e33 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 25 Nov 2012 19:54:29 +0100 Subject: uprobes: Kill uprobe_events, use RB_EMPTY_ROOT() instead uprobe_events counts the number of uprobes in uprobes_tree but it is used as a boolean. We can use RB_EMPTY_ROOT() instead. Probably no_uprobe_events() added by this patch can have more callers, say, mmf_recalc_uprobes(). Signed-off-by: Oleg Nesterov Acked-by: Anton Arapov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 40ced98b2bc8..5d38b40644b8 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -41,6 +41,11 @@ #define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE static struct rb_root uprobes_tree = RB_ROOT; +/* + * allows us to skip the uprobe_mmap if there are no uprobe events active + * at this time. Probably a fine grained per inode count is better? + */ +#define no_uprobe_events() RB_EMPTY_ROOT(&uprobes_tree) static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ @@ -74,13 +79,6 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; static struct percpu_rw_semaphore dup_mmap_sem; -/* - * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe - * events active at this time. Probably a fine grained per inode count is - * better? - */ -static atomic_t uprobe_events = ATOMIC_INIT(0); - /* Have a copy of original instruction */ #define UPROBE_COPY_INSN 0 /* Can skip singlestep */ @@ -460,8 +458,6 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) kfree(uprobe); uprobe = cur_uprobe; iput(inode); - } else { - atomic_inc(&uprobe_events); } return uprobe; @@ -685,7 +681,6 @@ static void delete_uprobe(struct uprobe *uprobe) spin_unlock(&uprobes_treelock); iput(uprobe->inode); put_uprobe(uprobe); - atomic_dec(&uprobe_events); } struct map_info { @@ -975,7 +970,7 @@ int uprobe_mmap(struct vm_area_struct *vma) struct uprobe *uprobe, *u; struct inode *inode; - if (!atomic_read(&uprobe_events) || !valid_vma(vma, true)) + if (no_uprobe_events() || !valid_vma(vma, true)) return 0; inode = vma->vm_file->f_mapping->host; @@ -1021,7 +1016,7 @@ vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long e */ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) + if (no_uprobe_events() || !valid_vma(vma, false)) return; if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ -- cgit v1.2.3 From 06b7bcd8cbd7eb1af331e437ec3d8f5182ae1b7e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 25 Nov 2012 22:01:42 +0100 Subject: uprobes: Introduce uprobe_is_active() The lifetime of uprobe->rb_node and uprobe->inode is not refcounted, delete_uprobe() is called when we detect that uprobe has no consumers, and it would be deadly wrong to do this twice. Change delete_uprobe() to WARN() if it was already called. We use RB_CLEAR_NODE() to mark uprobe "inactive", then RB_EMPTY_NODE() can be used to detect this case. RB_EMPTY_NODE() is not used directly, we add the trivial helper for the next change. Signed-off-by: Oleg Nesterov Acked-by: Anton Arapov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 5d38b40644b8..358baddc8ac2 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -669,6 +669,10 @@ remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vad return set_orig_insn(&uprobe->arch, mm, vaddr); } +static inline bool uprobe_is_active(struct uprobe *uprobe) +{ + return !RB_EMPTY_NODE(&uprobe->rb_node); +} /* * There could be threads that have already hit the breakpoint. They * will recheck the current insn and restart if find_uprobe() fails. @@ -676,9 +680,13 @@ remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vad */ static void delete_uprobe(struct uprobe *uprobe) { + if (WARN_ON(!uprobe_is_active(uprobe))) + return; + spin_lock(&uprobes_treelock); rb_erase(&uprobe->rb_node, &uprobes_tree); spin_unlock(&uprobes_treelock); + RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */ iput(uprobe->inode); put_uprobe(uprobe); } -- cgit v1.2.3 From 66d06dffa5ef6f3544997440af63a91ef36a2171 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 25 Nov 2012 22:48:37 +0100 Subject: uprobes: Kill uprobes_mutex[], separate alloc_uprobe() and __uprobe_register() uprobe_register() and uprobe_unregister() are the only users of mutex_lock(uprobes_hash(inode)), and the only reason why we can't simply remove it is that we need to ensure that delete_uprobe() is not possible after alloc_uprobe() and before consumer_add(). IOW, we need to ensure that when we take uprobe->register_rwsem this uprobe is still valid and we didn't race with _unregister() which called delete_uprobe() in between. With this patch uprobe_register() simply checks uprobe_is_active() and retries if it hits this very unlikely race. uprobes_mutex[] is no longer needed and can be removed. There is another reason for this change, prepare_uprobe() should be folded into alloc_uprobe() and we do not want to hold the extra locks around read_mapping_page/etc. Signed-off-by: Oleg Nesterov Acked-by: Anton Arapov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 51 +++++++++++++++---------------------------------- 1 file changed, 15 insertions(+), 36 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 358baddc8ac2..c3b65d1c8443 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -50,29 +50,6 @@ static struct rb_root uprobes_tree = RB_ROOT; static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ #define UPROBES_HASH_SZ 13 - -/* - * We need separate register/unregister and mmap/munmap lock hashes because - * of mmap_sem nesting. - * - * uprobe_register() needs to install probes on (potentially) all processes - * and thus needs to acquire multiple mmap_sems (consequtively, not - * concurrently), whereas uprobe_mmap() is called while holding mmap_sem - * for the particular process doing the mmap. - * - * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem - * because of lock order against i_mmap_mutex. This means there's a hole in - * the register vma iteration where a mmap() can happen. - * - * Thus uprobe_register() can race with uprobe_mmap() and we can try and - * install a probe where one is already installed. - */ - -/* serialize (un)register */ -static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; - -#define uprobes_hash(v) (&uprobes_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) - /* serialize uprobe->pending_list */ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; #define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) @@ -865,20 +842,26 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * if (offset > i_size_read(inode)) return -EINVAL; - ret = -ENOMEM; - mutex_lock(uprobes_hash(inode)); + retry: uprobe = alloc_uprobe(inode, offset); - if (uprobe) { - down_write(&uprobe->register_rwsem); + if (!uprobe) + return -ENOMEM; + /* + * We can race with uprobe_unregister()->delete_uprobe(). + * Check uprobe_is_active() and retry if it is false. + */ + down_write(&uprobe->register_rwsem); + ret = -EAGAIN; + if (likely(uprobe_is_active(uprobe))) { ret = __uprobe_register(uprobe, uc); if (ret) __uprobe_unregister(uprobe, uc); - up_write(&uprobe->register_rwsem); } - mutex_unlock(uprobes_hash(inode)); - if (uprobe) - put_uprobe(uprobe); + up_write(&uprobe->register_rwsem); + put_uprobe(uprobe); + if (unlikely(ret == -EAGAIN)) + goto retry; return ret; } @@ -896,11 +879,9 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume if (!uprobe) return; - mutex_lock(uprobes_hash(inode)); down_write(&uprobe->register_rwsem); __uprobe_unregister(uprobe, uc); up_write(&uprobe->register_rwsem); - mutex_unlock(uprobes_hash(inode)); put_uprobe(uprobe); } @@ -1609,10 +1590,8 @@ static int __init init_uprobes(void) { int i; - for (i = 0; i < UPROBES_HASH_SZ; i++) { - mutex_init(&uprobes_mutex[i]); + for (i = 0; i < UPROBES_HASH_SZ; i++) mutex_init(&uprobes_mmap_mutex[i]); - } if (percpu_init_rwsem(&dup_mmap_sem)) return -ENOMEM; -- cgit v1.2.3 From 806a98bdf2a862fef0fc880399d677b35ba525ff Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 27 Dec 2012 18:21:11 +0100 Subject: uprobes: Rationalize the usage of filter_chain() filter_chain() was added into install_breakpoint/remove_breakpoint to simplify the initial changes but this is sub-optimal. This patch shifts the callsite to the callers, register_for_each_vma() and uprobe_mmap(). This way: - It will be easier to add the new arguments. This is the main reason, we can do more optimizations later. - register_for_each_vma(is_register => true) can be optimized, we only need to consult the new consumer. The previous consumers were already asked when they called uprobe_register(). This patch also moves the MMF_HAS_UPROBES check from remove_breakpoint(), this allows to avoid the potentionally costly filter_chain(). Note that register_for_each_vma(is_register => false) doesn't really need to take ->consumer_rwsem, but I don't think it makes sense to optimize this and introduce filter_chain_lockless(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 44 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 23 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index c3b65d1c8443..33912086d54e 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -579,6 +579,11 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, return ret; } +static inline bool consumer_filter(struct uprobe_consumer *uc) +{ + return true; /* TODO: !uc->filter || uc->filter(...) */ +} + static bool filter_chain(struct uprobe *uprobe) { struct uprobe_consumer *uc; @@ -586,8 +591,7 @@ static bool filter_chain(struct uprobe *uprobe) down_read(&uprobe->consumer_rwsem); for (uc = uprobe->consumers; uc; uc = uc->next) { - /* TODO: ret = uc->filter(...) */ - ret = true; + ret = consumer_filter(uc); if (ret) break; } @@ -603,15 +607,6 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, bool first_uprobe; int ret; - /* - * If probe is being deleted, unregister thread could be done with - * the vma-rmap-walk through. Adding a probe now can be fatal since - * nobody will be able to cleanup. But in this case filter_chain() - * must return false, all consumers have gone away. - */ - if (!filter_chain(uprobe)) - return 0; - ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr); if (ret) return ret; @@ -636,12 +631,6 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, static int remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) { - if (!test_bit(MMF_HAS_UPROBES, &mm->flags)) - return 0; - - if (filter_chain(uprobe)) - return 0; - set_bit(MMF_RECALC_UPROBES, &mm->flags); return set_orig_insn(&uprobe->arch, mm, vaddr); } @@ -781,10 +770,14 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) vaddr_to_offset(vma, info->vaddr) != uprobe->offset) goto unlock; - if (is_register) - err = install_breakpoint(uprobe, mm, vma, info->vaddr); - else - err |= remove_breakpoint(uprobe, mm, info->vaddr); + if (is_register) { + /* consult only the "caller", new consumer. */ + if (consumer_filter(uprobe->consumers)) + err = install_breakpoint(uprobe, mm, vma, info->vaddr); + } else if (test_bit(MMF_HAS_UPROBES, &mm->flags)) { + if (!filter_chain(uprobe)) + err |= remove_breakpoint(uprobe, mm, info->vaddr); + } unlock: up_write(&mm->mmap_sem); @@ -968,9 +961,14 @@ int uprobe_mmap(struct vm_area_struct *vma) mutex_lock(uprobes_mmap_hash(inode)); build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); - + /* + * We can race with uprobe_unregister(), this uprobe can be already + * removed. But in this case filter_chain() must return false, all + * consumers have gone away. + */ list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { - if (!fatal_signal_pending(current)) { + if (!fatal_signal_pending(current) && + filter_chain(uprobe)) { unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); } -- cgit v1.2.3 From 8a7f2fa0dea3b019500961b86d765e6fdd4bffb2 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 28 Dec 2012 17:58:38 +0100 Subject: uprobes: Reintroduce uprobe_consumer->filter() Finally add uprobe_consumer->filter() and change consumer_filter() to actually call this method. Note that ->filter() accepts mm_struct, not task_struct. Because: 1. We do not have for_each_mm_user(mm, task). 2. Even if we implement for_each_mm_user(), ->filter() can use it itself. 3. It is not clear who will actually need this interface to do the "nontrivial" filtering. Another argument is "enum uprobe_filter_ctx", consumer->filter() can use it to figure out why/where it was called. For example, perhaps we can add UPROBE_FILTER_PRE_REGISTER used by build_map_info() to quickly "nack" the unwanted mm's. In this case consumer should know that it is called under ->i_mmap_mutex. See the previous discussion at http://marc.info/?t=135214229700002 Perhaps we should pass more arguments, vma/vaddr? Note: this patch obviously can't help to filter out the child created by fork(), this will be addressed later. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 9 +++++++++ kernel/events/uprobes.c | 18 +++++++++++------- 2 files changed, 20 insertions(+), 7 deletions(-) (limited to 'kernel/events') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 83742b91ff73..c2df6934fdc6 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -35,8 +35,17 @@ struct inode; # include #endif +enum uprobe_filter_ctx { + UPROBE_FILTER_REGISTER, + UPROBE_FILTER_UNREGISTER, + UPROBE_FILTER_MMAP, +}; + struct uprobe_consumer { int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); + bool (*filter)(struct uprobe_consumer *self, + enum uprobe_filter_ctx ctx, + struct mm_struct *mm); struct uprobe_consumer *next; }; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 33912086d54e..c2737be3c4b8 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -579,19 +579,21 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, return ret; } -static inline bool consumer_filter(struct uprobe_consumer *uc) +static inline bool consumer_filter(struct uprobe_consumer *uc, + enum uprobe_filter_ctx ctx, struct mm_struct *mm) { - return true; /* TODO: !uc->filter || uc->filter(...) */ + return !uc->filter || uc->filter(uc, ctx, mm); } -static bool filter_chain(struct uprobe *uprobe) +static bool filter_chain(struct uprobe *uprobe, + enum uprobe_filter_ctx ctx, struct mm_struct *mm) { struct uprobe_consumer *uc; bool ret = false; down_read(&uprobe->consumer_rwsem); for (uc = uprobe->consumers; uc; uc = uc->next) { - ret = consumer_filter(uc); + ret = consumer_filter(uc, ctx, mm); if (ret) break; } @@ -772,10 +774,12 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) if (is_register) { /* consult only the "caller", new consumer. */ - if (consumer_filter(uprobe->consumers)) + if (consumer_filter(uprobe->consumers, + UPROBE_FILTER_REGISTER, mm)) err = install_breakpoint(uprobe, mm, vma, info->vaddr); } else if (test_bit(MMF_HAS_UPROBES, &mm->flags)) { - if (!filter_chain(uprobe)) + if (!filter_chain(uprobe, + UPROBE_FILTER_UNREGISTER, mm)) err |= remove_breakpoint(uprobe, mm, info->vaddr); } @@ -968,7 +972,7 @@ int uprobe_mmap(struct vm_area_struct *vma) */ list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { if (!fatal_signal_pending(current) && - filter_chain(uprobe)) { + filter_chain(uprobe, UPROBE_FILTER_MMAP, vma->vm_mm)) { unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); } -- cgit v1.2.3 From da1816b1caeccdff04531e763bb35d7caa3ed19f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 29 Dec 2012 17:49:11 +0100 Subject: uprobes: Teach handler_chain() to filter out the probed task Currrently the are 2 problems with pre-filtering: 1. It is not possible to add/remove a task (mm) after uprobe_register() 2. A forked child inherits all breakpoints and uprobe_consumer can not control this. This patch does the first step to improve the filtering. handler_chain() removes the breakpoints installed by this uprobe from current->mm if all handlers return UPROBE_HANDLER_REMOVE. Note that handler_chain() relies on ->register_rwsem to avoid the race with uprobe_register/unregister which can add/del a consumer, or even remove and then insert the new uprobe at the same address. Perhaps we will add uprobe_apply_mm(uprobe, mm, is_register) and teach copy_mm() to do filter(UPROBE_FILTER_FORK), but I think this change makes sense anyway. Note: instead of checking the retcode from uc->handler, we could add uc->filter(UPROBE_FILTER_BPHIT). But I think this is not optimal to call 2 hooks in a row. This buys nothing, and if handler/filter do something nontrivial they will probably do the same work twice. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 3 +++ kernel/events/uprobes.c | 58 ++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 51 insertions(+), 10 deletions(-) (limited to 'kernel/events') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index c2df6934fdc6..95d0002efda5 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -35,6 +35,9 @@ struct inode; # include #endif +#define UPROBE_HANDLER_REMOVE 1 +#define UPROBE_HANDLER_MASK 1 + enum uprobe_filter_ctx { UPROBE_FILTER_REGISTER, UPROBE_FILTER_UNREGISTER, diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index c2737be3c4b8..04c104ad9522 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -440,16 +440,6 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) return uprobe; } -static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) -{ - struct uprobe_consumer *uc; - - down_read(&uprobe->register_rwsem); - for (uc = uprobe->consumers; uc; uc = uc->next) - uc->handler(uc, regs); - up_read(&uprobe->register_rwsem); -} - static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) { down_write(&uprobe->consumer_rwsem); @@ -882,6 +872,33 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume put_uprobe(uprobe); } +static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm) +{ + struct vm_area_struct *vma; + int err = 0; + + down_read(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) { + unsigned long vaddr; + loff_t offset; + + if (!valid_vma(vma, false) || + vma->vm_file->f_mapping->host != uprobe->inode) + continue; + + offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; + if (uprobe->offset < offset || + uprobe->offset >= offset + vma->vm_end - vma->vm_start) + continue; + + vaddr = offset_to_vaddr(vma, uprobe->offset); + err |= remove_breakpoint(uprobe, mm, vaddr); + } + up_read(&mm->mmap_sem); + + return err; +} + static struct rb_node * find_node_in_range(struct inode *inode, loff_t min, loff_t max) { @@ -1435,6 +1452,27 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) return uprobe; } +static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) +{ + struct uprobe_consumer *uc; + int remove = UPROBE_HANDLER_REMOVE; + + down_read(&uprobe->register_rwsem); + for (uc = uprobe->consumers; uc; uc = uc->next) { + int rc = uc->handler(uc, regs); + + WARN(rc & ~UPROBE_HANDLER_MASK, + "bad rc=0x%x from %pf()\n", rc, uc->handler); + remove &= rc; + } + + if (remove && uprobe->consumers) { + WARN_ON(!uprobe_is_active(uprobe)); + unapply_uprobe(uprobe, current->mm); + } + up_read(&uprobe->register_rwsem); +} + /* * Run handler and ask thread to singlestep. * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. -- cgit v1.2.3 From 74e59dfc6b19e3472a7c16ad57bc831e6e647895 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 30 Dec 2012 15:54:08 +0100 Subject: uprobes: Change handle_swbp() to expose bp_vaddr to handler_chain() Change handle_swbp() to set regs->ip = bp_vaddr in advance, this is what consumer->handler() needs but uprobe_get_swbp_addr() is not exported. This also simplifies the code and makes it more consistent across the supported architectures. handle_swbp() becomes the only caller of uprobe_get_swbp_addr(). Signed-off-by: Oleg Nesterov Acked-by: Ananth N Mavinakayanahalli --- arch/x86/kernel/uprobes.c | 1 - kernel/events/uprobes.c | 15 +++++++-------- kernel/trace/trace_uprobe.c | 4 ++-- 3 files changed, 9 insertions(+), 11 deletions(-) (limited to 'kernel/events') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 4e33a35d659e..0ba4cfb4f412 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -681,7 +681,6 @@ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) continue; if (auprobe->insn[i] == 0x90) { - regs->ip = uprobe_get_swbp_addr(regs); regs->ip += i + 1; return true; } diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 04c104ad9522..f1b807831fc2 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1504,6 +1504,10 @@ static void handle_swbp(struct pt_regs *regs) } return; } + + /* change it in advance for ->handler() and restart */ + instruction_pointer_set(regs, bp_vaddr); + /* * TODO: move copy_insn/etc into _register and remove this hack. * After we hit the bp, _unregister + _register can install the @@ -1511,14 +1515,14 @@ static void handle_swbp(struct pt_regs *regs) */ smp_rmb(); /* pairs with wmb() in install_breakpoint() */ if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags))) - goto restart; + goto out; utask = current->utask; if (!utask) { utask = add_utask(); /* Cannot allocate; re-execute the instruction. */ if (!utask) - goto restart; + goto out; } handler_chain(uprobe, regs); @@ -1531,12 +1535,7 @@ static void handle_swbp(struct pt_regs *regs) return; } -restart: - /* - * cannot singlestep; cannot skip instruction; - * re-execute the instruction. - */ - instruction_pointer_set(regs, bp_vaddr); + /* can_skip_sstep() succeeded, or restart if can't singlestep */ out: put_uprobe(uprobe); } diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index e668024773d4..17d9b2bcc28d 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -492,7 +492,7 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) return; entry = ring_buffer_event_data(event); - entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); + entry->ip = instruction_pointer(task_pt_regs(current)); data = (u8 *)&entry[1]; for (i = 0; i < tu->nr_args; i++) call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); @@ -667,7 +667,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) if (!entry) goto out; - entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); + entry->ip = instruction_pointer(task_pt_regs(current)); data = (u8 *)&entry[1]; for (i = 0; i < tu->nr_args; i++) call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); -- cgit v1.2.3 From c8a82538001e1a68f4a319d5a75de90d1f284731 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 30 Dec 2012 17:40:39 +0100 Subject: uprobes: Move alloc_page() from xol_add_vma() to xol_alloc_area() Move alloc_page() from xol_add_vma() to xol_alloc_area() to cleanup the code. This separates the memory allocations and consolidates the -EALREADY cleanups and the error handling. Signed-off-by: Oleg Nesterov Acked-by: Anton Arapov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index f1b807831fc2..ea2e2a85479a 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1041,22 +1041,14 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon /* Slot allocation for XOL */ static int xol_add_vma(struct xol_area *area) { - struct mm_struct *mm; - int ret; - - area->page = alloc_page(GFP_HIGHUSER); - if (!area->page) - return -ENOMEM; - - ret = -EALREADY; - mm = current->mm; + struct mm_struct *mm = current->mm; + int ret = -EALREADY; down_write(&mm->mmap_sem); if (mm->uprobes_state.xol_area) goto fail; ret = -ENOMEM; - /* Try to map as high as possible, this is only a hint. */ area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); if (area->vaddr & ~PAGE_MASK) { @@ -1072,11 +1064,8 @@ static int xol_add_vma(struct xol_area *area) smp_wmb(); /* pairs with get_xol_area() */ mm->uprobes_state.xol_area = area; ret = 0; - -fail: + fail: up_write(&mm->mmap_sem); - if (ret) - __free_page(area->page); return ret; } @@ -1104,21 +1093,26 @@ static struct xol_area *xol_alloc_area(void) area = kzalloc(sizeof(*area), GFP_KERNEL); if (unlikely(!area)) - return NULL; + goto out; area->bitmap = kzalloc(BITS_TO_LONGS(UINSNS_PER_PAGE) * sizeof(long), GFP_KERNEL); - if (!area->bitmap) - goto fail; + goto free_area; + + area->page = alloc_page(GFP_HIGHUSER); + if (!area->page) + goto free_bitmap; init_waitqueue_head(&area->wq); if (!xol_add_vma(area)) return area; -fail: + __free_page(area->page); + free_bitmap: kfree(area->bitmap); + free_area: kfree(area); - + out: return get_xol_area(current->mm); } -- cgit v1.2.3 From 9b545df809644912552360054c7bbe8b8a9e01fa Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 31 Dec 2012 16:39:49 +0100 Subject: uprobes: Fold xol_alloc_area() into get_xol_area() Currently only xol_get_insn_slot() does get_xol_area() + xol_alloc_area(), but this will have more users and we do not want to copy-and-paste this code. This patch simply moves xol_alloc_area() into get_xol_area() to simplify the current and future code. Signed-off-by: Oleg Nesterov Acked-by: Anton Arapov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ea2e2a85479a..7e3e5c5b0d88 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1070,27 +1070,21 @@ static int xol_add_vma(struct xol_area *area) return ret; } -static struct xol_area *get_xol_area(struct mm_struct *mm) -{ - struct xol_area *area; - - area = mm->uprobes_state.xol_area; - smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ - - return area; -} - /* - * xol_alloc_area - Allocate process's xol_area. - * This area will be used for storing instructions for execution out of - * line. + * get_xol_area - Allocate process's xol_area if necessary. + * This area will be used for storing instructions for execution out of line. * * Returns the allocated area or NULL. */ -static struct xol_area *xol_alloc_area(void) +static struct xol_area *get_xol_area(void) { + struct mm_struct *mm = current->mm; struct xol_area *area; + area = mm->uprobes_state.xol_area; + if (area) + goto ret; + area = kzalloc(sizeof(*area), GFP_KERNEL); if (unlikely(!area)) goto out; @@ -1113,7 +1107,10 @@ static struct xol_area *xol_alloc_area(void) free_area: kfree(area); out: - return get_xol_area(current->mm); + area = mm->uprobes_state.xol_area; + ret: + smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ + return area; } /* @@ -1189,14 +1186,11 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot unsigned long offset; void *vaddr; - area = get_xol_area(current->mm); - if (!area) { - area = xol_alloc_area(); - if (!area) - return 0; - } - current->utask->xol_vaddr = xol_take_insn_slot(area); + area = get_xol_area(); + if (!area) + return 0; + current->utask->xol_vaddr = xol_take_insn_slot(area); /* * Initialize the slot if xol_vaddr points to valid * instruction slot. -- cgit v1.2.3 From 5a2df662aafdabffb2cf3adb780a5adf66dfb3bc Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 31 Dec 2012 17:03:32 +0100 Subject: uprobes: Turn add_utask() into get_utask() Rename add_utask() into get_utask() and change it to allocate on demand to simplify the caller. Like get_xol_area() it will have more users. Signed-off-by: Oleg Nesterov Acked-by: Anton Arapov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 7e3e5c5b0d88..16e54d63a9fd 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1290,23 +1290,18 @@ void uprobe_copy_process(struct task_struct *t) } /* - * Allocate a uprobe_task object for the task. - * Called when the thread hits a breakpoint for the first time. + * Allocate a uprobe_task object for the task if if necessary. + * Called when the thread hits a breakpoint. * * Returns: * - pointer to new uprobe_task on success * - NULL otherwise */ -static struct uprobe_task *add_utask(void) +static struct uprobe_task *get_utask(void) { - struct uprobe_task *utask; - - utask = kzalloc(sizeof *utask, GFP_KERNEL); - if (unlikely(!utask)) - return NULL; - - current->utask = utask; - return utask; + if (!current->utask) + current->utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); + return current->utask; } /* Prepare to single-step probed instruction out of line. */ @@ -1505,13 +1500,9 @@ static void handle_swbp(struct pt_regs *regs) if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags))) goto out; - utask = current->utask; - if (!utask) { - utask = add_utask(); - /* Cannot allocate; re-execute the instruction. */ - if (!utask) - goto out; - } + utask = get_utask(); + if (!utask) + goto out; /* re-execute the instruction. */ handler_chain(uprobe, regs); if (can_skip_sstep(uprobe, regs)) -- cgit v1.2.3 From a6cb3f6d51253e9cf21a38b17c025018117809d7 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 31 Dec 2012 18:00:06 +0100 Subject: uprobes: Do not play with utask in xol_get_insn_slot() pre_ssout()->xol_get_insn_slot() path is confusing and buggy. This patch cleanups the code, the next one fixes the bug. Change xol_get_insn_slot() to only allocate the slot and do nothing more, move the initialization of utask->xol_vaddr/vaddr into pre_ssout(). Signed-off-by: Oleg Nesterov Acked-by: Anton Arapov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 16e54d63a9fd..8d9c5bcb110e 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1176,30 +1176,26 @@ static unsigned long xol_take_insn_slot(struct xol_area *area) } /* - * xol_get_insn_slot - If was not allocated a slot, then - * allocate a slot. + * xol_get_insn_slot - allocate a slot for xol. * Returns the allocated slot address or 0. */ -static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot_addr) +static unsigned long xol_get_insn_slot(struct uprobe *uprobe) { struct xol_area *area; unsigned long offset; + unsigned long xol_vaddr; void *vaddr; area = get_xol_area(); if (!area) return 0; - current->utask->xol_vaddr = xol_take_insn_slot(area); - /* - * Initialize the slot if xol_vaddr points to valid - * instruction slot. - */ - if (unlikely(!current->utask->xol_vaddr)) + xol_vaddr = xol_take_insn_slot(area); + if (unlikely(!xol_vaddr)) return 0; - current->utask->vaddr = slot_addr; - offset = current->utask->xol_vaddr & ~PAGE_MASK; + /* Initialize the slot */ + offset = xol_vaddr & ~PAGE_MASK; vaddr = kmap_atomic(area->page); memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES); kunmap_atomic(vaddr); @@ -1209,7 +1205,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot */ flush_dcache_page(area->page); - return current->utask->xol_vaddr; + return xol_vaddr; } /* @@ -1306,12 +1302,21 @@ static struct uprobe_task *get_utask(void) /* Prepare to single-step probed instruction out of line. */ static int -pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long vaddr) +pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) { - if (xol_get_insn_slot(uprobe, vaddr) && !arch_uprobe_pre_xol(&uprobe->arch, regs)) - return 0; + struct uprobe_task *utask; + unsigned long xol_vaddr; + + utask = current->utask; + + xol_vaddr = xol_get_insn_slot(uprobe); + if (!xol_vaddr) + return -ENOMEM; + + utask->xol_vaddr = xol_vaddr; + utask->vaddr = bp_vaddr; - return -EFAULT; + return arch_uprobe_pre_xol(&uprobe->arch, regs); } /* -- cgit v1.2.3 From aba51024e7159c93914557caaa2b8cda26331091 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 31 Dec 2012 18:12:48 +0100 Subject: uprobes: Fix utask->xol_vaddr leak in pre_ssout() pre_ssout() should do xol_free_insn_slot() if arch_uprobe_pre_xol() fails, otherwise nobody will free the allocated slot. Signed-off-by: Oleg Nesterov Acked-by: Anton Arapov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 8d9c5bcb110e..0527379dac5b 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1306,6 +1306,7 @@ pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) { struct uprobe_task *utask; unsigned long xol_vaddr; + int err; utask = current->utask; @@ -1316,7 +1317,13 @@ pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) utask->xol_vaddr = xol_vaddr; utask->vaddr = bp_vaddr; - return arch_uprobe_pre_xol(&uprobe->arch, regs); + err = arch_uprobe_pre_xol(&uprobe->arch, regs); + if (unlikely(err)) { + xol_free_insn_slot(current); + return err; + } + + return 0; } /* -- cgit v1.2.3 From 608e7427c0a06de0d70374a9fd7defc8eb228b7e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 31 Dec 2012 18:20:42 +0100 Subject: uprobes: Do not allocate current->utask unnecessary handle_swbp() does get_utask() before can_skip_sstep() for no reason, we do not need ->utask if can_skip_sstep() succeeds. Move get_utask() to pre_ssout() who actually starts to use it. Move the initialization of utask->active_uprobe/state as well. This way the whole initialization is consolidated in pre_ssout(). Signed-off-by: Oleg Nesterov Acked-by: Anton Arapov --- kernel/events/uprobes.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 0527379dac5b..071edcb3e62d 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1308,7 +1308,9 @@ pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) unsigned long xol_vaddr; int err; - utask = current->utask; + utask = get_utask(); + if (!utask) + return -ENOMEM; xol_vaddr = xol_get_insn_slot(uprobe); if (!xol_vaddr) @@ -1323,6 +1325,8 @@ pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) return err; } + utask->active_uprobe = uprobe; + utask->state = UTASK_SSTEP; return 0; } @@ -1474,7 +1478,6 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) */ static void handle_swbp(struct pt_regs *regs) { - struct uprobe_task *utask; struct uprobe *uprobe; unsigned long bp_vaddr; int uninitialized_var(is_swbp); @@ -1512,19 +1515,12 @@ static void handle_swbp(struct pt_regs *regs) if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags))) goto out; - utask = get_utask(); - if (!utask) - goto out; /* re-execute the instruction. */ - handler_chain(uprobe, regs); if (can_skip_sstep(uprobe, regs)) goto out; - if (!pre_ssout(uprobe, regs, bp_vaddr)) { - utask->active_uprobe = uprobe; - utask->state = UTASK_SSTEP; + if (!pre_ssout(uprobe, regs, bp_vaddr)) return; - } /* can_skip_sstep() succeeded, or restart if can't singlestep */ out: -- cgit v1.2.3 From af4355e91f15812df8608925738c91be57c580dd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 31 Dec 2012 18:37:11 +0100 Subject: uprobes: Kill the bogus IS_ERR_VALUE(xol_vaddr) check utask->xol_vaddr is either zero or valid, remove the bogus IS_ERR_VALUE() check in xol_free_insn_slot(). Signed-off-by: Oleg Nesterov Acked-by: Anton Arapov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 071edcb3e62d..f6c7062fb950 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1223,8 +1223,7 @@ static void xol_free_insn_slot(struct task_struct *tsk) return; slot_addr = tsk->utask->xol_vaddr; - - if (unlikely(!slot_addr || IS_ERR_VALUE(slot_addr))) + if (unlikely(!slot_addr)) return; area = tsk->mm->uprobes_state.xol_area; -- cgit v1.2.3 From e8440c1458ba571bc3fac8a6beb53ff604199f5b Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Sun, 13 Jan 2013 19:03:34 +0100 Subject: uprobes: Add exports for module use The original pull message for uprobes (commit 654443e2) noted: This tree includes uprobes support in 'perf probe' - but SystemTap (and other tools) can take advantage of user probe points as well. In order to actually be usable in module-based tools like SystemTap, the interface needs to be exported. This patch first adds the obvious exports for uprobe_register and uprobe_unregister. Then it also adds one for task_user_regset_view, which is necessary to get the correct state of userspace registers. Signed-off-by: Josh Stone Signed-off-by: Oleg Nesterov --- kernel/events/uprobes.c | 3 +++ kernel/ptrace.c | 6 ++++++ 2 files changed, 9 insertions(+) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index f6c7062fb950..221fc58f59e3 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -27,6 +27,7 @@ #include /* read_mapping_page */ #include #include +#include #include /* anon_vma_prepare */ #include /* set_pte_at_notify */ #include /* try_to_free_swap */ @@ -851,6 +852,7 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * goto retry; return ret; } +EXPORT_SYMBOL_GPL(uprobe_register); /* * uprobe_unregister - unregister a already registered probe. @@ -871,6 +873,7 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume up_write(&uprobe->register_rwsem); put_uprobe(uprobe); } +EXPORT_SYMBOL_GPL(uprobe_unregister); static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm) { diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 6cbeaae4406d..acbd28424d81 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -712,6 +712,12 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type, kiov->iov_len, kiov->iov_base); } +/* + * This is declared in linux/regset.h and defined in machine-dependent + * code. We put the export here, near the primary machine-neutral use, + * to ensure no machine forgets it. + */ +EXPORT_SYMBOL_GPL(task_user_regset_view); #endif int ptrace_request(struct task_struct *child, long request, -- cgit v1.2.3 From f22c1bb6b4706be3502b378cb14564449b15f983 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 2 Feb 2013 16:27:52 +0100 Subject: perf: Introduce hw_perf_event->tp_target and ->tp_list sys_perf_event_open()->perf_init_event(event) is called before find_get_context(event), this means that event->ctx == NULL when class->reg(TRACE_REG_PERF_REGISTER/OPEN) is called and thus it can't know if this event is per-task or system-wide. This patch adds hw_perf_event->tp_target for PERF_TYPE_TRACEPOINT, this is analogous to PERF_TYPE_BREAKPOINT/bp_target we already have. The patch also moves ->bp_target up so that it can overlap with the new member, this can help the compiler to generate the better code. trace_uprobe_register() will use it for prefiltering to avoid the unnecessary breakpoints in mm's we do not want to trace. ->tp_target doesn't have its own reference, but we can rely on the fact that either sys_perf_event_open() holds a reference, or it is equal to event->ctx->task. So this pointer is always valid until free_event(). Also add the "struct list_head tp_list" into this union. It is not strictly necessary, but it can simplify the next changes and we can add it for free. Signed-off-by: Oleg Nesterov --- include/linux/perf_event.h | 9 +++++++-- kernel/events/core.c | 5 ++++- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'kernel/events') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 42adf012145d..e47ee462c2f2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -135,16 +135,21 @@ struct hw_perf_event { struct { /* software */ struct hrtimer hrtimer; }; + struct { /* tracepoint */ + struct task_struct *tp_target; + /* for tp_event->class */ + struct list_head tp_list; + }; #ifdef CONFIG_HAVE_HW_BREAKPOINT struct { /* breakpoint */ - struct arch_hw_breakpoint info; - struct list_head bp_list; /* * Crufty hack to avoid the chicken and egg * problem hw_breakpoint has with context * creation and event initalization. */ struct task_struct *bp_target; + struct arch_hw_breakpoint info; + struct list_head bp_list; }; #endif }; diff --git a/kernel/events/core.c b/kernel/events/core.c index 301079d06f24..e2d4323c6ae6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6162,11 +6162,14 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, if (task) { event->attach_state = PERF_ATTACH_TASK; + + if (attr->type == PERF_TYPE_TRACEPOINT) + event->hw.tp_target = task; #ifdef CONFIG_HAVE_HW_BREAKPOINT /* * hw_breakpoint is a bit difficult here.. */ - if (attr->type == PERF_TYPE_BREAKPOINT) + else if (attr->type == PERF_TYPE_BREAKPOINT) event->hw.bp_target = task; #endif } -- cgit v1.2.3 From bdf8647c44766590ed02f9a84a450a796558b753 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 3 Feb 2013 19:21:12 +0100 Subject: uprobes: Introduce uprobe_apply() Currently it is not possible to change the filtering constraints after uprobe_register(), so a consumer can not, say, start to trace a task/mm which was previously filtered out, or remove the no longer needed bp's. Introduce uprobe_apply() which simply does register_for_each_vma() again to consult uprobe_consumer->filter() and install/remove the breakpoints. The only complication is that register_for_each_vma() can no longer assume that uprobe->consumers should be consulter if is_register == T, so we change it to accept "struct uprobe_consumer *new" instead. Unlike uprobe_register(), uprobe_apply(true) doesn't do "unregister" if register_for_each_vma() fails, it is up to caller to handle the error. Note: we probably need to cleanup the current interface, it is strange that uprobe_apply/unregister need inode/offset. We should either change uprobe_register() to return "struct uprobe *", or add a private ->uprobe member in uprobe_consumer. And in the long term uprobe_apply() should take a single argument, uprobe or consumer, even "bool add" should go away. Signed-off-by: Oleg Nesterov --- include/linux/uprobes.h | 6 ++++++ kernel/events/uprobes.c | 39 +++++++++++++++++++++++++++++++++++---- 2 files changed, 41 insertions(+), 4 deletions(-) (limited to 'kernel/events') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 95d0002efda5..02b83db8e2c5 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -101,6 +101,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); +extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_mmap(struct vm_area_struct *vma); extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -124,6 +125,11 @@ uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) { return -ENOSYS; } +static inline int +uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add) +{ + return -ENOSYS; +} static inline void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) { diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 221fc58f59e3..a567c8c7ef31 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -733,8 +733,10 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register) return curr; } -static int register_for_each_vma(struct uprobe *uprobe, bool is_register) +static int +register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new) { + bool is_register = !!new; struct map_info *info; int err = 0; @@ -765,7 +767,7 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) if (is_register) { /* consult only the "caller", new consumer. */ - if (consumer_filter(uprobe->consumers, + if (consumer_filter(new, UPROBE_FILTER_REGISTER, mm)) err = install_breakpoint(uprobe, mm, vma, info->vaddr); } else if (test_bit(MMF_HAS_UPROBES, &mm->flags)) { @@ -788,7 +790,7 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) static int __uprobe_register(struct uprobe *uprobe, struct uprobe_consumer *uc) { consumer_add(uprobe, uc); - return register_for_each_vma(uprobe, true); + return register_for_each_vma(uprobe, uc); } static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc) @@ -798,7 +800,7 @@ static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *u if (!consumer_del(uprobe, uc)) /* WARN? */ return; - err = register_for_each_vma(uprobe, false); + err = register_for_each_vma(uprobe, NULL); /* TODO : cant unregister? schedule a worker thread */ if (!uprobe->consumers && !err) delete_uprobe(uprobe); @@ -854,6 +856,35 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * } EXPORT_SYMBOL_GPL(uprobe_register); +/* + * uprobe_apply - unregister a already registered probe. + * @inode: the file in which the probe has to be removed. + * @offset: offset from the start of the file. + * @uc: consumer which wants to add more or remove some breakpoints + * @add: add or remove the breakpoints + */ +int uprobe_apply(struct inode *inode, loff_t offset, + struct uprobe_consumer *uc, bool add) +{ + struct uprobe *uprobe; + struct uprobe_consumer *con; + int ret = -ENOENT; + + uprobe = find_uprobe(inode, offset); + if (!uprobe) + return ret; + + down_write(&uprobe->register_rwsem); + for (con = uprobe->consumers; con && con != uc ; con = con->next) + ; + if (con) + ret = register_for_each_vma(uprobe, add ? uc : NULL); + up_write(&uprobe->register_rwsem); + put_uprobe(uprobe); + + return ret; +} + /* * uprobe_unregister - unregister a already registered probe. * @inode: the file in which the probe has to be removed. -- cgit v1.2.3 From 02e176af92f3e2e9ec3a48792036566af2dcd534 Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Wed, 6 Feb 2013 23:29:20 +0200 Subject: perf/hwbp: Fix cleanup in case of kzalloc failure Obviously this is a typo and could result in memory leaks if kzalloc fails on a given cpu. Signed-off-by: Daniel Baluta Acked-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1360186160-7566-1-git-send-email-dbaluta@ixiacom.com Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/hw_breakpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/events') diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index fe8a916507ed..a64f8aeb5c1f 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -676,7 +676,7 @@ int __init init_hw_breakpoint(void) err_alloc: for_each_possible_cpu(err_cpu) { for (i = 0; i < TYPE_MAX; i++) - kfree(per_cpu(nr_task_bp_pinned[i], cpu)); + kfree(per_cpu(nr_task_bp_pinned[i], err_cpu)); if (err_cpu == cpu) break; } -- cgit v1.2.3