Message ID | 20240711110400.635302571@infradead.org (mailing list archive) |
---|---|
State | Handled Elsewhere |
Headers | show |
Series | perf/uprobe: Optimize uprobes | expand |
On Thu, 11 Jul 2024 13:02:39 +0200 Peter Zijlstra <peterz@infradead.org> wrote: > With handle_swbp() triggering concurrently on (all) CPUs, tree_lock > becomes a bottleneck. Avoid treelock by doing RCU lookups of the > uprobe. > Looks good to me. Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> Thanks, > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> > --- > kernel/events/uprobes.c | 49 +++++++++++++++++++++++++++++++++++++++--------- > 1 file changed, 40 insertions(+), 9 deletions(-) > > --- a/kernel/events/uprobes.c > +++ b/kernel/events/uprobes.c > @@ -40,6 +40,7 @@ static struct rb_root uprobes_tree = RB_ > #define no_uprobe_events() RB_EMPTY_ROOT(&uprobes_tree) > > static DEFINE_RWLOCK(uprobes_treelock); /* serialize rbtree access */ > +static seqcount_rwlock_t uprobes_seqcount = SEQCNT_RWLOCK_ZERO(uprobes_seqcount, &uprobes_treelock); > > #define UPROBES_HASH_SZ 13 > /* serialize uprobe->pending_list */ > @@ -54,6 +55,7 @@ DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem) > struct uprobe { > struct rb_node rb_node; /* node in the rb tree */ > refcount_t ref; > + struct rcu_head rcu; > struct rw_semaphore register_rwsem; > struct rw_semaphore consumer_rwsem; > struct list_head pending_list; > @@ -587,12 +589,25 @@ set_orig_insn(struct arch_uprobe *auprob > *(uprobe_opcode_t *)&auprobe->insn); > } > > +static struct uprobe *try_get_uprobe(struct uprobe *uprobe) > +{ > + if (refcount_inc_not_zero(&uprobe->ref)) > + return uprobe; > + return NULL; > +} > + > static struct uprobe *get_uprobe(struct uprobe *uprobe) > { > refcount_inc(&uprobe->ref); > return uprobe; > } > > +static void uprobe_free_rcu(struct rcu_head *rcu) > +{ > + struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu); > + kfree(uprobe); > +} > + > static void put_uprobe(struct uprobe *uprobe) > { > if (refcount_dec_and_test(&uprobe->ref)) { > @@ -604,7 +619,7 @@ static void put_uprobe(struct uprobe *up > mutex_lock(&delayed_uprobe_lock); > delayed_uprobe_remove(uprobe, NULL); > mutex_unlock(&delayed_uprobe_lock); > - kfree(uprobe); > + call_rcu(&uprobe->rcu, uprobe_free_rcu); > } > } > > @@ -653,10 +668,10 @@ static struct uprobe *__find_uprobe(stru > .inode = inode, > .offset = offset, > }; > - struct rb_node *node = rb_find(&key, &uprobes_tree, __uprobe_cmp_key); > + struct rb_node *node = rb_find_rcu(&key, &uprobes_tree, __uprobe_cmp_key); > > if (node) > - return get_uprobe(__node_2_uprobe(node)); > + return try_get_uprobe(__node_2_uprobe(node)); > > return NULL; > } > @@ -667,20 +682,32 @@ static struct uprobe *__find_uprobe(stru > */ > static struct uprobe *find_uprobe(struct inode *inode, loff_t offset) > { > - struct uprobe *uprobe; > + unsigned int seq; > > - read_lock(&uprobes_treelock); > - uprobe = __find_uprobe(inode, offset); > - read_unlock(&uprobes_treelock); > + guard(rcu)(); > > - return uprobe; > + do { > + seq = read_seqcount_begin(&uprobes_seqcount); > + struct uprobe *uprobe = __find_uprobe(inode, offset); > + if (uprobe) { > + /* > + * Lockless RB-tree lookups are prone to false-negatives. > + * If they find something, it's good. If they do not find, > + * it needs to be validated. > + */ > + return uprobe; > + } > + } while (read_seqcount_retry(&uprobes_seqcount, seq)); > + > + /* Really didn't find anything. */ > + return NULL; > } > > static struct uprobe *__insert_uprobe(struct uprobe *uprobe) > { > struct rb_node *node; > > - node = rb_find_add(&uprobe->rb_node, &uprobes_tree, __uprobe_cmp); > + node = rb_find_add_rcu(&uprobe->rb_node, &uprobes_tree, __uprobe_cmp); > if (node) > return get_uprobe(__node_2_uprobe(node)); > > @@ -702,7 +729,9 @@ static struct uprobe *insert_uprobe(stru > struct uprobe *u; > > write_lock(&uprobes_treelock); > + write_seqcount_begin(&uprobes_seqcount); > u = __insert_uprobe(uprobe); > + write_seqcount_end(&uprobes_seqcount); > write_unlock(&uprobes_treelock); > > return u; > @@ -936,7 +965,9 @@ static void delete_uprobe(struct uprobe > return; > > write_lock(&uprobes_treelock); > + write_seqcount_begin(&uprobes_seqcount); > rb_erase(&uprobe->rb_node, &uprobes_tree); > + write_seqcount_end(&uprobes_seqcount); > write_unlock(&uprobes_treelock); > RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */ > put_uprobe(uprobe); > >
--- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -40,6 +40,7 @@ static struct rb_root uprobes_tree = RB_ #define no_uprobe_events() RB_EMPTY_ROOT(&uprobes_tree) static DEFINE_RWLOCK(uprobes_treelock); /* serialize rbtree access */ +static seqcount_rwlock_t uprobes_seqcount = SEQCNT_RWLOCK_ZERO(uprobes_seqcount, &uprobes_treelock); #define UPROBES_HASH_SZ 13 /* serialize uprobe->pending_list */ @@ -54,6 +55,7 @@ DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem) struct uprobe { struct rb_node rb_node; /* node in the rb tree */ refcount_t ref; + struct rcu_head rcu; struct rw_semaphore register_rwsem; struct rw_semaphore consumer_rwsem; struct list_head pending_list; @@ -587,12 +589,25 @@ set_orig_insn(struct arch_uprobe *auprob *(uprobe_opcode_t *)&auprobe->insn); } +static struct uprobe *try_get_uprobe(struct uprobe *uprobe) +{ + if (refcount_inc_not_zero(&uprobe->ref)) + return uprobe; + return NULL; +} + static struct uprobe *get_uprobe(struct uprobe *uprobe) { refcount_inc(&uprobe->ref); return uprobe; } +static void uprobe_free_rcu(struct rcu_head *rcu) +{ + struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu); + kfree(uprobe); +} + static void put_uprobe(struct uprobe *uprobe) { if (refcount_dec_and_test(&uprobe->ref)) { @@ -604,7 +619,7 @@ static void put_uprobe(struct uprobe *up mutex_lock(&delayed_uprobe_lock); delayed_uprobe_remove(uprobe, NULL); mutex_unlock(&delayed_uprobe_lock); - kfree(uprobe); + call_rcu(&uprobe->rcu, uprobe_free_rcu); } } @@ -653,10 +668,10 @@ static struct uprobe *__find_uprobe(stru .inode = inode, .offset = offset, }; - struct rb_node *node = rb_find(&key, &uprobes_tree, __uprobe_cmp_key); + struct rb_node *node = rb_find_rcu(&key, &uprobes_tree, __uprobe_cmp_key); if (node) - return get_uprobe(__node_2_uprobe(node)); + return try_get_uprobe(__node_2_uprobe(node)); return NULL; } @@ -667,20 +682,32 @@ static struct uprobe *__find_uprobe(stru */ static struct uprobe *find_uprobe(struct inode *inode, loff_t offset) { - struct uprobe *uprobe; + unsigned int seq; - read_lock(&uprobes_treelock); - uprobe = __find_uprobe(inode, offset); - read_unlock(&uprobes_treelock); + guard(rcu)(); - return uprobe; + do { + seq = read_seqcount_begin(&uprobes_seqcount); + struct uprobe *uprobe = __find_uprobe(inode, offset); + if (uprobe) { + /* + * Lockless RB-tree lookups are prone to false-negatives. + * If they find something, it's good. If they do not find, + * it needs to be validated. + */ + return uprobe; + } + } while (read_seqcount_retry(&uprobes_seqcount, seq)); + + /* Really didn't find anything. */ + return NULL; } static struct uprobe *__insert_uprobe(struct uprobe *uprobe) { struct rb_node *node; - node = rb_find_add(&uprobe->rb_node, &uprobes_tree, __uprobe_cmp); + node = rb_find_add_rcu(&uprobe->rb_node, &uprobes_tree, __uprobe_cmp); if (node) return get_uprobe(__node_2_uprobe(node)); @@ -702,7 +729,9 @@ static struct uprobe *insert_uprobe(stru struct uprobe *u; write_lock(&uprobes_treelock); + write_seqcount_begin(&uprobes_seqcount); u = __insert_uprobe(uprobe); + write_seqcount_end(&uprobes_seqcount); write_unlock(&uprobes_treelock); return u; @@ -936,7 +965,9 @@ static void delete_uprobe(struct uprobe return; write_lock(&uprobes_treelock); + write_seqcount_begin(&uprobes_seqcount); rb_erase(&uprobe->rb_node, &uprobes_tree); + write_seqcount_end(&uprobes_seqcount); write_unlock(&uprobes_treelock); RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */ put_uprobe(uprobe);
With handle_swbp() triggering concurrently on (all) CPUs, tree_lock becomes a bottleneck. Avoid treelock by doing RCU lookups of the uprobe. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> --- kernel/events/uprobes.c | 49 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 9 deletions(-)