From 682968e0c425c60f0dde37977e5beb2b12ddc4cc Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Fri, 30 Mar 2012 23:56:46 +0530 Subject: [PATCH] uprobes/core: Optimize probe hits with the help of a counter Maintain a per-mm counter: number of uprobes that are inserted on this process address space. This counter can be used at probe hit time to determine if we need a lookup in the uprobes rbtree. Everytime a probe gets inserted successfully, the probe count is incremented and everytime a probe gets removed, the probe count is decremented. The new uprobe_munmap hook ensures the count is correct on a unmap or remap of a region. We expect that once a uprobe_munmap() is called, the vma goes away. So uprobe_unregister() finding a probe to unregister would either mean unmap event hasnt occurred yet or a mmap event on the same executable file occured after a unmap event. Additionally, uprobe_mmap hook now also gets called: a. on every executable vma that is COWed at fork. b. a vma of interest is newly mapped; breakpoint insertion also happens at the required address. On process creation, make sure the probes count in the child is set correctly. Special cases that are taken care include: a. mremap b. VM_DONTCOPY vmas on fork() c. insertion/removal races in the parent during fork(). Signed-off-by: Srikar Dronamraju Cc: Linus Torvalds Cc: Ananth N Mavinakayanahalli Cc: Jim Keniston Cc: Linux-mm Cc: Oleg Nesterov Cc: Andi Kleen Cc: Christoph Hellwig Cc: Steven Rostedt Cc: Arnaldo Carvalho de Melo Cc: Masami Hiramatsu Cc: Anton Arapov Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120330182646.10018.85805.sendpatchset@srdronam.in.ibm.com Signed-off-by: Ingo Molnar --- include/linux/uprobes.h | 5 ++ kernel/events/uprobes.c | 119 +++++++++++++++++++++++++++++++++++++--- kernel/fork.c | 3 + mm/mmap.c | 10 +++- 4 files changed, 128 insertions(+), 9 deletions(-) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index a111460c07d5..d594d3b3ad4c 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -99,6 +99,7 @@ struct xol_area { struct uprobes_state { struct xol_area *xol_area; + atomic_t count; }; extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr, bool verify); @@ -106,6 +107,7 @@ extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_mmap(struct vm_area_struct *vma); +extern void uprobe_munmap(struct vm_area_struct *vma); extern void uprobe_free_utask(struct task_struct *t); extern void uprobe_copy_process(struct task_struct *t); extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); @@ -132,6 +134,9 @@ static inline int uprobe_mmap(struct vm_area_struct *vma) { return 0; } +static inline void uprobe_munmap(struct vm_area_struct *vma) +{ +} static inline void uprobe_notify_resume(struct pt_regs *regs) { } diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index b395edb97f53..29e881b0137d 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -642,6 +642,29 @@ copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr) return __copy_insn(mapping, vma, uprobe->arch.insn, bytes, uprobe->offset); } +/* + * How mm->uprobes_state.count gets updated + * uprobe_mmap() increments the count if + * - it successfully adds a breakpoint. + * - it cannot add a breakpoint, but sees that there is a underlying + * breakpoint (via a is_swbp_at_addr()). + * + * uprobe_munmap() decrements the count if + * - it sees a underlying breakpoint, (via is_swbp_at_addr) + * (Subsequent uprobe_unregister wouldnt find the breakpoint + * unless a uprobe_mmap kicks in, since the old vma would be + * dropped just after uprobe_munmap.) + * + * uprobe_register increments the count if: + * - it successfully adds a breakpoint. + * + * uprobe_unregister decrements the count if: + * - it sees a underlying breakpoint and removes successfully. + * (via is_swbp_at_addr) + * (Subsequent uprobe_munmap wouldnt find the breakpoint + * since there is no underlying breakpoint after the + * breakpoint removal.) + */ static int install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, struct vm_area_struct *vma, loff_t vaddr) @@ -675,7 +698,19 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, uprobe->flags |= UPROBE_COPY_INSN; } + + /* + * Ideally, should be updating the probe count after the breakpoint + * has been successfully inserted. However a thread could hit the + * breakpoint we just inserted even before the probe count is + * incremented. If this is the first breakpoint placed, breakpoint + * notifier might ignore uprobes and pass the trap to the thread. + * Hence increment before and decrement on failure. + */ + atomic_inc(&mm->uprobes_state.count); ret = set_swbp(&uprobe->arch, mm, addr); + if (ret) + atomic_dec(&mm->uprobes_state.count); return ret; } @@ -683,7 +718,8 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, static void remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr) { - set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true); + if (!set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true)) + atomic_dec(&mm->uprobes_state.count); } /* @@ -1009,7 +1045,7 @@ int uprobe_mmap(struct vm_area_struct *vma) struct list_head tmp_list; struct uprobe *uprobe, *u; struct inode *inode; - int ret; + int ret, count; if (!atomic_read(&uprobe_events) || !valid_vma(vma, true)) return 0; @@ -1023,6 +1059,7 @@ int uprobe_mmap(struct vm_area_struct *vma) build_probe_list(inode, &tmp_list); ret = 0; + count = 0; list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { loff_t vaddr; @@ -1030,21 +1067,85 @@ int uprobe_mmap(struct vm_area_struct *vma) list_del(&uprobe->pending_list); if (!ret) { vaddr = vma_address(vma, uprobe->offset); - if (vaddr >= vma->vm_start && vaddr < vma->vm_end) { - ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); - /* Ignore double add: */ - if (ret == -EEXIST) - ret = 0; + + if (vaddr < vma->vm_start || vaddr >= vma->vm_end) { + put_uprobe(uprobe); + continue; } + + ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); + + /* Ignore double add: */ + if (ret == -EEXIST) { + ret = 0; + + if (!is_swbp_at_addr(vma->vm_mm, vaddr)) + continue; + + /* + * Unable to insert a breakpoint, but + * breakpoint lies underneath. Increment the + * probe count. + */ + atomic_inc(&vma->vm_mm->uprobes_state.count); + } + + if (!ret) + count++; } put_uprobe(uprobe); } mutex_unlock(uprobes_mmap_hash(inode)); + if (ret) + atomic_sub(count, &vma->vm_mm->uprobes_state.count); + return ret; } +/* + * Called in context of a munmap of a vma. + */ +void uprobe_munmap(struct vm_area_struct *vma) +{ + struct list_head tmp_list; + struct uprobe *uprobe, *u; + struct inode *inode; + + if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) + return; + + if (!atomic_read(&vma->vm_mm->uprobes_state.count)) + return; + + inode = vma->vm_file->f_mapping->host; + if (!inode) + return; + + INIT_LIST_HEAD(&tmp_list); + mutex_lock(uprobes_mmap_hash(inode)); + build_probe_list(inode, &tmp_list); + + list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { + loff_t vaddr; + + list_del(&uprobe->pending_list); + vaddr = vma_address(vma, uprobe->offset); + + if (vaddr >= vma->vm_start && vaddr < vma->vm_end) { + /* + * An unregister could have removed the probe before + * unmap. So check before we decrement the count. + */ + if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1) + atomic_dec(&vma->vm_mm->uprobes_state.count); + } + put_uprobe(uprobe); + } + mutex_unlock(uprobes_mmap_hash(inode)); +} + /* Slot allocation for XOL */ static int xol_add_vma(struct xol_area *area) { @@ -1150,6 +1251,7 @@ void uprobe_clear_state(struct mm_struct *mm) void uprobe_reset_state(struct mm_struct *mm) { mm->uprobes_state.xol_area = NULL; + atomic_set(&mm->uprobes_state.count, 0); } /* @@ -1504,7 +1606,8 @@ int uprobe_pre_sstep_notifier(struct pt_regs *regs) { struct uprobe_task *utask; - if (!current->mm) + if (!current->mm || !atomic_read(¤t->mm->uprobes_state.count)) + /* task is currently not uprobed */ return 0; utask = current->utask; diff --git a/kernel/fork.c b/kernel/fork.c index 3133b9da59d5..26a8f5c25805 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -421,6 +421,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) if (retval) goto out; + + if (file && uprobe_mmap(tmp)) + goto out; } /* a new mm has just been created */ arch_dup_mmap(oldmm, mm); diff --git a/mm/mmap.c b/mm/mmap.c index 5a863d328a44..7c112fbca405 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -218,6 +218,7 @@ void unlink_file_vma(struct vm_area_struct *vma) mutex_lock(&mapping->i_mmap_mutex); __remove_shared_vm_struct(vma, file, mapping); mutex_unlock(&mapping->i_mmap_mutex); + uprobe_munmap(vma); } } @@ -546,8 +547,14 @@ again: remove_next = 1 + (end > next->vm_end); if (file) { mapping = file->f_mapping; - if (!(vma->vm_flags & VM_NONLINEAR)) + if (!(vma->vm_flags & VM_NONLINEAR)) { root = &mapping->i_mmap; + uprobe_munmap(vma); + + if (adjust_next) + uprobe_munmap(next); + } + mutex_lock(&mapping->i_mmap_mutex); if (insert) { /* @@ -626,6 +633,7 @@ again: remove_next = 1 + (end > next->vm_end); if (remove_next) { if (file) { + uprobe_munmap(next); fput(file); if (next->vm_flags & VM_EXECUTABLE) removed_exe_file_vma(mm);