From patchwork Thu Jul 11 11:02:36 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Peter Zijlstra X-Patchwork-Id: 13730363 Received: from desiato.infradead.org (desiato.infradead.org [90.155.92.199]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 9A0FF15B0E8; Thu, 11 Jul 2024 11:07:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.92.199 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696050; cv=none; b=Ov76A9IGvKsqXM4ZukVQIBnIU9eUSEgzMXJ7Cdzhyu5QcDa1kwp1f8ExK8LotIm9Wm6VgxA5av16BXyOoCJH4/juC000xA+uJqaWtjreSIXV9nfXVYe8jOyuvL/rmQUYommnmZnDFVmHQ/SG1VF+tl/n/yxrXYNW5nYSA+7xo/w= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696050; c=relaxed/simple; bh=iPnzJDPYV69x97yqOMsVW8M8RvXBmzlPMZ6dojO6w9w=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=J876YGQY6EhwwhlCvtAC0jdcthCxu/gXEH7KuVs29HI6VJ0N2/G+JvW2udR9jzTXpwNzJZaFmihWIF7EIR80Dv8fTpUQ1uSCXPLJVEq69SHIlL2PgoF6lLo9CtKrgZ2Qet5lZeCauCWxxuxDrfLRVKxBWCfgKYq1QdeqblSfshk= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=oETD364e; arc=none smtp.client-ip=90.155.92.199 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="oETD364e" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=desiato.20200630; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=D/qnIt6lTHMe7dedD5Wou6cHbGCP/Pb/6K/ANgI22y8=; b=oETD364eYO3X8m54frROBJ4DTa PLYe9JBO5SzzVZH17OWyRRRHTbHxUkdqQfaErD5ZR7Pqy2wC0U5AbKfHDN1DB5pSEeNkAdcrqhAfk zThljKlATbWKBMO3zaq4p488VTXuJO3tEgtFhyf9vpt5MSe9i066Hn3TuQI3WTd83QIui76vLlojP hiKEs28mhcB3EomOv74NHlrpzSqzxEo/0e5BabMDbMWfPfJERVjbgYei9ELYqnBjfe+QmUy3kDHoZ 8dhIcKkTXADhyrtAoxER4PWvWt9AGQiRfdCmjtHKjRRuG8hAGqXUjy5JIxhHjS9rt4Q5owlLLDOUw NCO9FdJQ==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by desiato.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sRrdz-000000015VS-1nLT; Thu, 11 Jul 2024 11:07:23 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id B3690300848; Thu, 11 Jul 2024 13:07:22 +0200 (CEST) Message-Id: <20240711110400.309670567@infradead.org> User-Agent: quilt/0.65 Date: Thu, 11 Jul 2024 13:02:36 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org, oleg@redhat.com Cc: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH v2 01/11] perf/uprobe: Re-indent labels References: <20240711110235.098009979@infradead.org> Precedence: bulk X-Mailing-List: linux-trace-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Remove the silly label indenting. s/^\ \([[:alnum:]]*\):$/\1:/g Signed-off-by: Peter Zijlstra (Intel) --- kernel/events/uprobes.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -205,7 +205,7 @@ static int __replace_page(struct vm_area folio_put(old_folio); err = 0; - unlock: +unlock: mmu_notifier_invalidate_range_end(&range); folio_unlock(old_folio); return err; @@ -857,7 +857,7 @@ static int prepare_uprobe(struct uprobe smp_wmb(); /* pairs with the smp_rmb() in handle_swbp() */ set_bit(UPROBE_COPY_INSN, &uprobe->flags); - out: +out: up_write(&uprobe->consumer_rwsem); return ret; @@ -965,7 +965,7 @@ build_map_info(struct address_space *map struct map_info *info; int more = 0; - again: +again: i_mmap_lock_read(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { if (!valid_vma(vma, is_register)) @@ -1019,7 +1019,7 @@ build_map_info(struct address_space *map } while (--more); goto again; - out: +out: while (prev) prev = free_map_info(prev); return curr; @@ -1068,13 +1068,13 @@ register_for_each_vma(struct uprobe *upr err |= remove_breakpoint(uprobe, mm, info->vaddr); } - unlock: +unlock: mmap_write_unlock(mm); - free: +free: mmput(mm); info = free_map_info(info); } - out: +out: percpu_up_write(&dup_mmap_sem); return err; } @@ -1159,7 +1159,7 @@ static int __uprobe_register(struct inod if (!IS_ALIGNED(ref_ctr_offset, sizeof(short))) return -EINVAL; - retry: +retry: uprobe = alloc_uprobe(inode, offset, ref_ctr_offset); if (!uprobe) return -ENOMEM; @@ -1468,7 +1468,7 @@ static int xol_add_vma(struct mm_struct ret = 0; /* pairs with get_xol_area() */ smp_store_release(&mm->uprobes_state.xol_area, area); /* ^^^ */ - fail: +fail: mmap_write_unlock(mm); return ret; @@ -1512,7 +1512,7 @@ static struct xol_area *__create_xol_are kfree(area->bitmap); free_area: kfree(area); - out: +out: return NULL; } @@ -1915,7 +1915,7 @@ static void prepare_uretprobe(struct upr utask->return_instances = ri; return; - fail: +fail: kfree(ri); } @@ -2031,7 +2031,7 @@ static int is_trap_at_addr(struct mm_str copy_from_page(page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); put_page(page); - out: +out: /* This needs to return true for any variant of the trap insn */ return is_trap_insn(&opcode); } @@ -2159,7 +2159,7 @@ static void handle_trampoline(struct pt_ utask->return_instances = ri; return; - sigill: +sigill: uprobe_warn(current, "handle uretprobe, sending SIGILL."); force_sig(SIGILL); From patchwork Thu Jul 11 11:02:37 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Peter Zijlstra X-Patchwork-Id: 13730367 Received: from desiato.infradead.org (desiato.infradead.org [90.155.92.199]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id BBC9D15A87B; Thu, 11 Jul 2024 11:07:29 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.92.199 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696051; cv=none; b=ttvgfvzEiEXeE2nkz3z71Cpm4BBU3didiRWVBkaa1/sDnxfz+Iuw8x92VcSZEdO+QKz4owLUwiBEzZZGGE6cM/bzQAsqqxjsqG4qoZuUuOsGh/uXfV3dCgRoy1tNnGuqc273gCxH8o6gWpzBrtQwjsAgx5lWbmAzavFopRGBolA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696051; c=relaxed/simple; bh=aHcz8je/K6D8qyRMNtQ30mbLo3y2E4SL+2Pr8zv5Wis=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=ssdiSucmM9Dn4elRpCVskQdSb1c7Nb9neOpnOn53848mForLFKzH+HcWFsACiMbDrc679AdVD6/WttGTUXUU0icI44Q9JO37UnLt3qOjvL6zrjjF6ZxQdtX65PMt9yhiJjEfP+CgCuYFzOTSmgCoLq3lWZ0qEQUPVtBnH02J7LE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=qJ/k11So; arc=none smtp.client-ip=90.155.92.199 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="qJ/k11So" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=desiato.20200630; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=1bUUAiw4/p70dXaUe2S6wv8W5kuKSZvbGkuAwwX7C3c=; b=qJ/k11SoQ7qwRI0yYeCmgokzqd h2ProHEQ51A14hrd289eUmBOztF+sg9LYyeOmtMaKRaxxA8m0yqPxxV1bRLZCiQ/UD6IR85lywXoN WbVkrFg891iAxbWj/JKfAjo2r64K9lyis+BD3N02Z13fsEAuaEq4f1A/8G86pLbX1K4C65qsZWev1 V8yXtEHEFY8xqKuC7N8uUBJy50Hq2gzKTOCMHxHz1SDS8EDbgifzbEQV1tEey2aIIQN+mK0PKhiNs OhVJY4q4wIEXJUmbylQFTmcB7KfzxwDGYYw/a18Pq4Na00Q05QJWBbEmSyGQjWHIGs9Gylerd03km N7tUc1Rw==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by desiato.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sRrdz-000000015VR-1nLT; Thu, 11 Jul 2024 11:07:23 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id B8911300CBA; Thu, 11 Jul 2024 13:07:22 +0200 (CEST) Message-Id: <20240711110400.421616631@infradead.org> User-Agent: quilt/0.65 Date: Thu, 11 Jul 2024 13:02:37 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org, oleg@redhat.com Cc: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH v2 02/11] perf/uprobe: Remove spurious whitespace References: <20240711110235.098009979@infradead.org> Precedence: bulk X-Mailing-List: linux-trace-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Signed-off-by: Peter Zijlstra (Intel) --- kernel/events/uprobes.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -67,7 +67,7 @@ struct uprobe { * The generic code assumes that it has two members of unknown type * owned by the arch-specific code: * - * insn - copy_insn() saves the original instruction here for + * insn - copy_insn() saves the original instruction here for * arch_uprobe_analyze_insn(). * * ixol - potentially modified instruction to execute out of @@ -95,18 +95,18 @@ static LIST_HEAD(delayed_uprobe_list); * allocated. */ struct xol_area { - wait_queue_head_t wq; /* if all slots are busy */ - atomic_t slot_count; /* number of in-use slots */ - unsigned long *bitmap; /* 0 = free slot */ + wait_queue_head_t wq; /* if all slots are busy */ + atomic_t slot_count; /* number of in-use slots */ + unsigned long *bitmap; /* 0 = free slot */ struct vm_special_mapping xol_mapping; - struct page *pages[2]; + struct page *pages[2]; /* * We keep the vma's vm_start rather than a pointer to the vma * itself. The probed process or a naughty kernel module could make * the vma go away, and we must handle that reasonably gracefully. */ - unsigned long vaddr; /* Page(s) of instruction slots */ + unsigned long vaddr; /* Page(s) of instruction slots */ }; /* From patchwork Thu Jul 11 11:02:38 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Peter Zijlstra X-Patchwork-Id: 13730369 Received: from casper.infradead.org (casper.infradead.org [90.155.50.34]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id BF95A15B0F0; Thu, 11 Jul 2024 11:07:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.50.34 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696051; cv=none; b=KyUQAUU1CmreE0NTj1Xx0uJKHFzO+hWCRjHBTd1H5rWg6n5qHzGJJyXUrcuobSHcatSRP1piTyeUM2jTPSGXyyWoQg76AlXlAcsy0SLFsDToW+k5NKBBL6+K7e6IBQq6e5oDW1BdHQ7DgeSRSmn+2jS9vK2JexuTTsZDS4anj4c= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696051; c=relaxed/simple; bh=nf9OJKCU1+T+PW3uEaHa/oMWagtwoT8LK4SH6ZDVVn4=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=Ii8sTCHxQS6YbgAkb44S4fqRwuOtweFEgpchi0qFb+PgAKeb08WHpBHlxuBr4MzxVyfj6K0N8jL/3YOApHUJxw6M/KHYIzqFEbHogSOAqhHlqMcWwnfHJjcgov46DCmnnKdANKH4SDXqt6l2a+dBDGtrlvnkPudF6guolmnBkLs= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=pIYoSkuG; arc=none smtp.client-ip=90.155.50.34 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="pIYoSkuG" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=sauT9IQp+lYdD7ir99ipRaTNhTpKArZ3iIMwq8bpAT8=; b=pIYoSkuGCeGYIdMvoUZ2X5zTQW VJN06xf/AsY2N+6fFwfab8OnR8ZeRCcrxM28RDAA13JmSgxFitOvJaCVt/o/uc304ghe1pZkNF8CO IF84HqbhFLmQLtRheE88k84Kt7P2bWZJLJ9ADsc1HnqdI5LZvQ06ucIVl1+028bUp25zUZ1Xb/nzh 98LA82KFwKUiZJ+LKWzf6AmOQiyJBFnQVXFsbaB/MGOm4baztVUnjlbPpiCFXtVL9bXB+Wfx77T9O Urj51FyPUDD0dFNqEQhZ6WpLIfa3ZVT2hfOHo2eacG0aue3hmyy5A45m08wQna9tKCPByVNDC+dF4 4ZgkfSAA==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by casper.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sRrdz-0000000Ax6S-23mQ; Thu, 11 Jul 2024 11:07:23 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id BC9F7300E46; Thu, 11 Jul 2024 13:07:22 +0200 (CEST) Message-Id: <20240711110400.529465037@infradead.org> User-Agent: quilt/0.65 Date: Thu, 11 Jul 2024 13:02:38 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org, oleg@redhat.com Cc: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH v2 03/11] rbtree: Provide rb_find_rcu() / rb_find_add_rcu() References: <20240711110235.098009979@infradead.org> Precedence: bulk X-Mailing-List: linux-trace-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Much like latch_tree, add two RCU methods for the regular RB-tree, which can be used in conjunction with a seqcount to provide lockless lookups. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Masami Hiramatsu (Google) --- include/linux/rbtree.h | 67 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -245,6 +245,42 @@ rb_find_add(struct rb_node *node, struct } /** + * rb_find_add_rcu() - find equivalent @node in @tree, or add @node + * @node: node to look-for / insert + * @tree: tree to search / modify + * @cmp: operator defining the node order + * + * Adds a Store-Release for link_node. + * + * Returns the rb_node matching @node, or NULL when no match is found and @node + * is inserted. + */ +static __always_inline struct rb_node * +rb_find_add_rcu(struct rb_node *node, struct rb_root *tree, + int (*cmp)(struct rb_node *, const struct rb_node *)) +{ + struct rb_node **link = &tree->rb_node; + struct rb_node *parent = NULL; + int c; + + while (*link) { + parent = *link; + c = cmp(node, parent); + + if (c < 0) + link = &parent->rb_left; + else if (c > 0) + link = &parent->rb_right; + else + return parent; + } + + rb_link_node_rcu(node, parent, link); + rb_insert_color(node, tree); + return NULL; +} + +/** * rb_find() - find @key in tree @tree * @key: key to match * @tree: tree to search @@ -268,6 +304,37 @@ rb_find(const void *key, const struct rb else return node; } + + return NULL; +} + +/** + * rb_find_rcu() - find @key in tree @tree + * @key: key to match + * @tree: tree to search + * @cmp: operator defining the node order + * + * Notably, tree descent vs concurrent tree rotations is unsound and can result + * in false-negatives. + * + * Returns the rb_node matching @key or NULL. + */ +static __always_inline struct rb_node * +rb_find_rcu(const void *key, const struct rb_root *tree, + int (*cmp)(const void *key, const struct rb_node *)) +{ + struct rb_node *node = tree->rb_node; + + while (node) { + int c = cmp(key, node); + + if (c < 0) + node = rcu_dereference_raw(node->rb_left); + else if (c > 0) + node = rcu_dereference_raw(node->rb_right); + else + return node; + } return NULL; } From patchwork Thu Jul 11 11:02:39 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Peter Zijlstra X-Patchwork-Id: 13730362 Received: from casper.infradead.org (casper.infradead.org [90.155.50.34]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id BF90515A874; Thu, 11 Jul 2024 11:07:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.50.34 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696050; cv=none; b=lA3a6RL2A2p/hppTrGcnWG5srawKTGIB6zbjwFh6BXOcil3Cl1nS78DniKOgzSCN9z1n/Xv89ASpsxHdAEzCQHSjt4cs3drmVKRmJvmd41nwuiJ3fqz0aaGrwd5R4knb3fTp1D7KMAClDsjibW4f0ajsweqO9hixmTWnpDiV/Vo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696050; c=relaxed/simple; bh=namzImpKkAE0SeDauxvgo7e0qpH+mTqxwz/WBAvffQw=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=ZEItV1K+XTJHl5zqLY8VVA1sbuiViadYboaXI3fEm9TBpTa36H7ux+jQk7Zb64uYrxIK93RqfJ/CKTfNl5l8zi2NJH889IuUDdoGRSkWOHMwJK8qRSoF+pVeq6qr/wCtEOyhzF0iyiTLO6fN1TC++GQkGkg9OoMSrOKIPqzIkCo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=j9RYO7cl; arc=none smtp.client-ip=90.155.50.34 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="j9RYO7cl" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=6enVwtxrhKY9z473ycfT5rwTE1B57qqsh+H+nST3cXo=; b=j9RYO7clZB4u3MSfWuo6QarfqL g3fesYOQpUjOrvUFPJSaLKFf3CoX9IcSzasNr3plfgpWx33Tu87SOV3mtZR8drNmygV0azoTNQk3U o/krgOwILVlvsXGUAUrWnpX0omF1MKpcbTCCy92kiJyf3AZYG0JEMO7KF/yDSmih9y/bfMEyt2XOh 4rmL/+GlqJtbPYMBGpyJqCU510Avjaz8RXDt2xW41+2PnPpzEMbSD1eB40M9J3v/UNM4Q/gkaQ/Be gJ811YzpFRsvZMOW8MfIZ1i90lMDBJcEv4bbdVEt10VuoqOBnAmZND8ksrP7Vjb+1phnH1PQIwRr9 /fxd45Dw==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by casper.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sRrdz-0000000Ax6R-22mg; Thu, 11 Jul 2024 11:07:23 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id C1463302182; Thu, 11 Jul 2024 13:07:22 +0200 (CEST) Message-Id: <20240711110400.635302571@infradead.org> User-Agent: quilt/0.65 Date: Thu, 11 Jul 2024 13:02:39 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org, oleg@redhat.com Cc: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH v2 04/11] perf/uprobe: RCU-ify find_uprobe() References: <20240711110235.098009979@infradead.org> Precedence: bulk X-Mailing-List: linux-trace-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 With handle_swbp() triggering concurrently on (all) CPUs, tree_lock becomes a bottleneck. Avoid treelock by doing RCU lookups of the uprobe. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu (Google) --- kernel/events/uprobes.c | 49 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 9 deletions(-) --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -40,6 +40,7 @@ static struct rb_root uprobes_tree = RB_ #define no_uprobe_events() RB_EMPTY_ROOT(&uprobes_tree) static DEFINE_RWLOCK(uprobes_treelock); /* serialize rbtree access */ +static seqcount_rwlock_t uprobes_seqcount = SEQCNT_RWLOCK_ZERO(uprobes_seqcount, &uprobes_treelock); #define UPROBES_HASH_SZ 13 /* serialize uprobe->pending_list */ @@ -54,6 +55,7 @@ DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem) struct uprobe { struct rb_node rb_node; /* node in the rb tree */ refcount_t ref; + struct rcu_head rcu; struct rw_semaphore register_rwsem; struct rw_semaphore consumer_rwsem; struct list_head pending_list; @@ -587,12 +589,25 @@ set_orig_insn(struct arch_uprobe *auprob *(uprobe_opcode_t *)&auprobe->insn); } +static struct uprobe *try_get_uprobe(struct uprobe *uprobe) +{ + if (refcount_inc_not_zero(&uprobe->ref)) + return uprobe; + return NULL; +} + static struct uprobe *get_uprobe(struct uprobe *uprobe) { refcount_inc(&uprobe->ref); return uprobe; } +static void uprobe_free_rcu(struct rcu_head *rcu) +{ + struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu); + kfree(uprobe); +} + static void put_uprobe(struct uprobe *uprobe) { if (refcount_dec_and_test(&uprobe->ref)) { @@ -604,7 +619,7 @@ static void put_uprobe(struct uprobe *up mutex_lock(&delayed_uprobe_lock); delayed_uprobe_remove(uprobe, NULL); mutex_unlock(&delayed_uprobe_lock); - kfree(uprobe); + call_rcu(&uprobe->rcu, uprobe_free_rcu); } } @@ -653,10 +668,10 @@ static struct uprobe *__find_uprobe(stru .inode = inode, .offset = offset, }; - struct rb_node *node = rb_find(&key, &uprobes_tree, __uprobe_cmp_key); + struct rb_node *node = rb_find_rcu(&key, &uprobes_tree, __uprobe_cmp_key); if (node) - return get_uprobe(__node_2_uprobe(node)); + return try_get_uprobe(__node_2_uprobe(node)); return NULL; } @@ -667,20 +682,32 @@ static struct uprobe *__find_uprobe(stru */ static struct uprobe *find_uprobe(struct inode *inode, loff_t offset) { - struct uprobe *uprobe; + unsigned int seq; - read_lock(&uprobes_treelock); - uprobe = __find_uprobe(inode, offset); - read_unlock(&uprobes_treelock); + guard(rcu)(); - return uprobe; + do { + seq = read_seqcount_begin(&uprobes_seqcount); + struct uprobe *uprobe = __find_uprobe(inode, offset); + if (uprobe) { + /* + * Lockless RB-tree lookups are prone to false-negatives. + * If they find something, it's good. If they do not find, + * it needs to be validated. + */ + return uprobe; + } + } while (read_seqcount_retry(&uprobes_seqcount, seq)); + + /* Really didn't find anything. */ + return NULL; } static struct uprobe *__insert_uprobe(struct uprobe *uprobe) { struct rb_node *node; - node = rb_find_add(&uprobe->rb_node, &uprobes_tree, __uprobe_cmp); + node = rb_find_add_rcu(&uprobe->rb_node, &uprobes_tree, __uprobe_cmp); if (node) return get_uprobe(__node_2_uprobe(node)); @@ -702,7 +729,9 @@ static struct uprobe *insert_uprobe(stru struct uprobe *u; write_lock(&uprobes_treelock); + write_seqcount_begin(&uprobes_seqcount); u = __insert_uprobe(uprobe); + write_seqcount_end(&uprobes_seqcount); write_unlock(&uprobes_treelock); return u; @@ -936,7 +965,9 @@ static void delete_uprobe(struct uprobe return; write_lock(&uprobes_treelock); + write_seqcount_begin(&uprobes_seqcount); rb_erase(&uprobe->rb_node, &uprobes_tree); + write_seqcount_end(&uprobes_seqcount); write_unlock(&uprobes_treelock); RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */ put_uprobe(uprobe); From patchwork Thu Jul 11 11:02:40 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Peter Zijlstra X-Patchwork-Id: 13730359 Received: from casper.infradead.org (casper.infradead.org [90.155.50.34]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id DE50D15A865; Thu, 11 Jul 2024 11:07:27 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.50.34 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696049; cv=none; b=qE3id0b3dZa+GkwIMpF+pxQLRNzsPowXJ74X/ppUzoIZbjTGc8S1vBltniXmGL1xnQ6U4WX79ZoiCVSwDqN0ZcU+QoXiKQ5BMQAOmAgZ6xj1CqXg2gmNvLkBg02B7X5O7aujqjGQqD/haaWpcgMejTJr+9rJXng4rIkTGBcESag= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696049; c=relaxed/simple; bh=SK0WpGxdLIMwlY7T1cLR+yWEBYicF8l1blnSe1p7LCU=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=jGuH/uhYd8fXaGrA/OVBCkindRsFIrBhqaEj/dzhPjEaAjK1xktE0JKxstWZYJHm08X+oBiqV6Bc12SUmuAvoRmiuG+5ODXvUFKZ8XfFOQb0vqFKyVc+UWDwUSMaDpk3IBJ2qKiId+sDoTekI/Vpgyc59BFNZnIlZ2znBGayPvU= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=ZeH51iyO; arc=none smtp.client-ip=90.155.50.34 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="ZeH51iyO" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=VqlAJfIzp9p6gk1mqKZ345ZWdiXBhJmcK02is9mdROM=; b=ZeH51iyOE2w8ALI6fMUsjNe25t MnKaYe0b1r5INsD7cEdQlbKjk4uMKau6MaN/V+YCDeyanwYFaf/XkMCl1LHa9qhULfiVd0XPfVxrw 19tdvPaX+z/iQhCN9SGOzYTaJBUwAO51CCohhlEMzFXFY6upXhdRdi5DMWGpXZHCJpZuM9MhRnAcq T2x35WAWZ4Q+BDp0Ns9bRcTIZgFG3m4aCg5ZKs2u834LcRPDKFHsZxQDe4BtSr7xFVBNoQSfZgjua t4t6El5MTGUPRZg2/VX1dEh5K1VI2VxaXug8JclihLdZQEXW0uWdWiaTHaga+PzTyK3OEZhVEqBEZ HOwsR9ug==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by casper.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sRre0-0000000Ax6X-08o2; Thu, 11 Jul 2024 11:07:24 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id C53E43021D4; Thu, 11 Jul 2024 13:07:22 +0200 (CEST) Message-Id: <20240711110400.768061729@infradead.org> User-Agent: quilt/0.65 Date: Thu, 11 Jul 2024 13:02:40 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org, oleg@redhat.com Cc: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH v2 05/11] perf/uprobe: Simplify UPROBE_HANDLER_REMOVE logic References: <20240711110235.098009979@infradead.org> Precedence: bulk X-Mailing-List: linux-trace-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Specifically, get rid of the uprobe->consumers re-load, which isn't sound under RCU. Signed-off-by: Peter Zijlstra (Intel) --- kernel/events/uprobes.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -2101,6 +2101,7 @@ static void handler_chain(struct uprobe struct uprobe_consumer *uc; int remove = UPROBE_HANDLER_REMOVE; bool need_prep = false; /* prepare return uprobe, when needed */ + bool had_handler = false; down_read(&uprobe->register_rwsem); for (uc = uprobe->consumers; uc; uc = uc->next) { @@ -2115,16 +2116,26 @@ static void handler_chain(struct uprobe if (uc->ret_handler) need_prep = true; + /* + * A single handler that does not mask out REMOVE, means the + * probe stays. + */ + had_handler = true; remove &= rc; } + /* + * If there were no handlers called, nobody asked for it to be removed + * but also nobody got to mask the value. Fix it up. + */ + if (!had_handler) + remove = 0; + if (need_prep && !remove) prepare_uretprobe(uprobe, regs); /* put bp at return */ - if (remove && uprobe->consumers) { - WARN_ON(!uprobe_is_active(uprobe)); + if (remove) unapply_uprobe(uprobe, current->mm); - } up_read(&uprobe->register_rwsem); } From patchwork Thu Jul 11 11:02:41 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Peter Zijlstra X-Patchwork-Id: 13730364 Received: from desiato.infradead.org (desiato.infradead.org [90.155.92.199]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 982F515B0E6; Thu, 11 Jul 2024 11:07:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.92.199 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696050; cv=none; b=rntd83IU0rTTd9xqmwuarZ1insESjK3dvzVYduuu4o+oBdl7s1ydosGo6OpMx0mcv6tQhvl6sFKrrYZAdVL+GrrBs/Aq9pC7uv3eqSSRGPDc906y0WMJDWqotlMLgCFNK6+P6Dy+vSfS2C4et6bMzPolApe0RZS/SNhR2qJgbJ0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696050; c=relaxed/simple; bh=V4VGBP/Aj8yPyI/IWKopehT3xnqmyYE9lEkZlZ4eNbc=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=CxQRaPL7NqtCOTl5cTIh8978Wd++3wuWyZx5LegPT9q5qyDFMBPZg5EKeIvtibLhtMF75j3+J2bSJpFy06W+m4HEqe0ATFvjiCfWZkAGE6frv+vvA0PlLJl3mZwfhlZdGq9pS7wLmDRgdP6D4/ivI77scr5mLxtDgCW7LkgMQGQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=cBTbgars; arc=none smtp.client-ip=90.155.92.199 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="cBTbgars" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=desiato.20200630; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=Ny8uqMTS6OmqyT/DzZEZHx/QBmd5E9Ugn7LCsaDF+b4=; b=cBTbgars7f8Y40nO86kVz5kgFX AgtvFMckaBT6iy89IDDORTrYKjmhfn+EZfVb6ROcfoC6nGC4uEsFavIPIuP8/iuyEIRpGYIqelkdN Bl+dQU0eIW5wxRSaBvCUBJPx0XvXLsTLFg4jHBSzPgeQNXdf/CYIdj0F7ESkrbp1kItLagN4w8viL bkO4we6zguzEkuaTTI4xzzU7OHlNklE92xbyqjAFdtuyxpm1zDmbVmLH21QlGLeTpfPGDvnTYv6IU BL260nUD937hh2kcSb/v3JuKxIgewiKoudgsJ0Spz7YVlW5n5Sg8o8mHmaRODFV5F7rak5MjDYUw+ gKaQ4LqA==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by desiato.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sRre0-000000015VV-0GeD; Thu, 11 Jul 2024 11:07:24 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id CA50A302440; Thu, 11 Jul 2024 13:07:22 +0200 (CEST) Message-Id: <20240711110400.880800153@infradead.org> User-Agent: quilt/0.65 Date: Thu, 11 Jul 2024 13:02:41 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org, oleg@redhat.com Cc: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH v2 06/11] perf/uprobe: SRCU-ify uprobe->consumer list References: <20240711110235.098009979@infradead.org> Precedence: bulk X-Mailing-List: linux-trace-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 With handle_swbp() hitting concurrently on (all) CPUs the uprobe->register_rwsem can get very contended. Add an SRCU instance to cover the consumer list and consumer lifetime. Since the consumer are externally embedded structures, unregister will have to suffer a synchronize_srcu(). A notably complication is the UPROBE_HANDLER_REMOVE logic which can race against uprobe_register() such that it might want to remove a freshly installer handler that didn't get called. In order to close this hole, a seqcount is added. With that, the removal path can tell if anything changed and bail out of the removal. Signed-off-by: Peter Zijlstra (Intel) --- kernel/events/uprobes.c | 60 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 10 deletions(-) --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -49,6 +50,11 @@ static struct mutex uprobes_mmap_mutex[U DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem); +/* + * Covers uprobe->consumers lifetime. + */ +DEFINE_STATIC_SRCU(uprobes_srcu); + /* Have a copy of original instruction */ #define UPROBE_COPY_INSN 0 @@ -57,6 +63,7 @@ struct uprobe { refcount_t ref; struct rcu_head rcu; struct rw_semaphore register_rwsem; + seqcount_t register_seq; struct rw_semaphore consumer_rwsem; struct list_head pending_list; struct uprobe_consumer *consumers; @@ -760,6 +767,7 @@ static struct uprobe *alloc_uprobe(struc uprobe->offset = offset; uprobe->ref_ctr_offset = ref_ctr_offset; init_rwsem(&uprobe->register_rwsem); + seqcount_init(&uprobe->register_seq); init_rwsem(&uprobe->consumer_rwsem); /* add to uprobes_tree, sorted on inode:offset */ @@ -782,8 +790,8 @@ static struct uprobe *alloc_uprobe(struc static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) { down_write(&uprobe->consumer_rwsem); - uc->next = uprobe->consumers; - uprobe->consumers = uc; + WRITE_ONCE(uc->next, uprobe->consumers); + rcu_assign_pointer(uprobe->consumers, uc); up_write(&uprobe->consumer_rwsem); } @@ -800,7 +808,7 @@ static bool consumer_del(struct uprobe * down_write(&uprobe->consumer_rwsem); for (con = &uprobe->consumers; *con; con = &(*con)->next) { if (*con == uc) { - *con = uc->next; + WRITE_ONCE(*con, uc->next); ret = true; break; } @@ -1139,9 +1147,13 @@ void uprobe_unregister(struct inode *ino return; down_write(&uprobe->register_rwsem); + raw_write_seqcount_begin(&uprobe->register_seq); __uprobe_unregister(uprobe, uc); + raw_write_seqcount_end(&uprobe->register_seq); up_write(&uprobe->register_rwsem); put_uprobe(uprobe); + + synchronize_srcu(&uprobes_srcu); } EXPORT_SYMBOL_GPL(uprobe_unregister); @@ -1204,10 +1216,12 @@ static int __uprobe_register(struct inod down_write(&uprobe->register_rwsem); ret = -EAGAIN; if (likely(uprobe_is_active(uprobe))) { + raw_write_seqcount_begin(&uprobe->register_seq); consumer_add(uprobe, uc); ret = register_for_each_vma(uprobe, uc); if (ret) __uprobe_unregister(uprobe, uc); + raw_write_seqcount_end(&uprobe->register_seq); } up_write(&uprobe->register_rwsem); put_uprobe(uprobe); @@ -1250,10 +1264,12 @@ int uprobe_apply(struct inode *inode, lo return ret; down_write(&uprobe->register_rwsem); + raw_write_seqcount_begin(&uprobe->register_seq); for (con = uprobe->consumers; con && con != uc ; con = con->next) ; if (con) ret = register_for_each_vma(uprobe, add ? uc : NULL); + raw_write_seqcount_end(&uprobe->register_seq); up_write(&uprobe->register_rwsem); put_uprobe(uprobe); @@ -2096,15 +2112,23 @@ static struct uprobe *find_active_uprobe return uprobe; } +#define for_each_consumer_rcu(pos, head) \ + for (pos = rcu_dereference_raw(head); pos; \ + pos = rcu_dereference_raw(pos->next)) + static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) { struct uprobe_consumer *uc; int remove = UPROBE_HANDLER_REMOVE; bool need_prep = false; /* prepare return uprobe, when needed */ bool had_handler = false; + unsigned int seq; - down_read(&uprobe->register_rwsem); - for (uc = uprobe->consumers; uc; uc = uc->next) { + guard(srcu)(&uprobes_srcu); + + seq = raw_read_seqcount_begin(&uprobe->register_seq); + + for_each_consumer_rcu(uc, uprobe->consumers) { int rc = 0; if (uc->handler) { @@ -2134,9 +2158,25 @@ static void handler_chain(struct uprobe if (need_prep && !remove) prepare_uretprobe(uprobe, regs); /* put bp at return */ - if (remove) + if (remove) { + /* + * Removing uprobes is a slow path, after all, the more probes + * you remove, the less probe hits you get. + * + * This needs to serialize against uprobe_register(), such that + * if the above RCU iteration missed a new handler that + * would've liked to keep the probe, we don't go uninstall the + * probe after it already ran register_for_each_vma(). + * + * The rwsem ensures exclusivity against uprobe_register() + * while the seqcount will avoid the removal if anything has + * changed since we started. + */ + guard(rwsem_read)(&uprobe->register_rwsem); + if (read_seqcount_retry(&uprobe->register_seq, seq)) + return; unapply_uprobe(uprobe, current->mm); - up_read(&uprobe->register_rwsem); + } } static void @@ -2145,12 +2185,12 @@ handle_uretprobe_chain(struct return_ins struct uprobe *uprobe = ri->uprobe; struct uprobe_consumer *uc; - down_read(&uprobe->register_rwsem); - for (uc = uprobe->consumers; uc; uc = uc->next) { + guard(srcu)(&uprobes_srcu); + + for_each_consumer_rcu(uc, uprobe->consumers) { if (uc->ret_handler) uc->ret_handler(uc, ri->func, regs); } - up_read(&uprobe->register_rwsem); } static struct return_instance *find_next_ret_chain(struct return_instance *ri) From patchwork Thu Jul 11 11:02:42 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Peter Zijlstra X-Patchwork-Id: 13730368 Received: from casper.infradead.org (casper.infradead.org [90.155.50.34]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 803ED15A858; Thu, 11 Jul 2024 11:07:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.50.34 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696051; cv=none; b=PjjheLIB8aaao8vKIzX83ZOqTsO95Sz5SIYbRdYoHG5kV8scZS9mR8fhXZ39gwwaDVhil1sNAgdfAD2Ak1A0491IKpciNagSnBEnOgvWHNZoKghX+3f+SAGpH6vj4An8NLooTtM8Ol6pAwX8ZnKLgAI1rZUhrfsf20ZTXEvjVMQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696051; c=relaxed/simple; bh=yWhZob/BPdfmsu4sMDxRN0GS62Spdk2BlA3DKMfmucc=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=Jimirk//bTdQFoNkD4hSrOlyvpLtIrVRGTHM60rHagRKhHVAzhPv35HyqsJ4XMtS7nmv1Ndw5A8AMvT6jW6SN+BVg0hsZkQaMLHZcpecstjbQ9vXbP6cvWb/cvmVKv8WC+LJ9cnj+VqObyTqQUeaGUNYm4253kgk3a6Mhy7dr3s= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=UxG5cgI2; arc=none smtp.client-ip=90.155.50.34 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="UxG5cgI2" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=s3nlTehZi84BQk5xkIwuRvVH+7l2BKBHmhJ6dERK42A=; b=UxG5cgI2PlV+kN2BaMvLp2uti+ SivKGvdWfRU5ftI6DRrV00GogYI7/qEdn5itwzJzGZTgYMChpZkcaQepMAe61HanCd+65s73Vb5gK uEhsBthleKNzi3kHJceVE/JXA3Esfs/I2qHJmKjCAN1ULYrmMRA0Kt0R5d3MRjCZzgZUseNNJTvgV 7xqKTpj5W5cpW+0W8hRyxjRvL0lCMKkt1o4rktrUuxcoUj562iVIMVz8kHonwQGkI59J6Q0Q+XRIw 813658CY1VH6+QJg4jKaHwqCiGDFWp+wUqsRN5JfFL0SLanLDSlUX7+lPu/8/zjd4nHRl5MCNZtlu pKqdusJg==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by casper.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sRre0-0000000Ax6Y-0FWb; Thu, 11 Jul 2024 11:07:24 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id CE7C2302DCC; Thu, 11 Jul 2024 13:07:22 +0200 (CEST) Message-Id: <20240711110400.987380024@infradead.org> User-Agent: quilt/0.65 Date: Thu, 11 Jul 2024 13:02:42 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org, oleg@redhat.com Cc: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH v2 07/11] perf/uprobe: Split uprobe_unregister() References: <20240711110235.098009979@infradead.org> Precedence: bulk X-Mailing-List: linux-trace-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 With uprobe_unregister() having grown a synchronize_srcu(), it becomes fairly slow to call. Esp. since both users of this API call it in a loop. Peel off the sync_srcu() and do it once, after the loop. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu (Google) Acked-by: Andrii Nakryiko --- include/linux/uprobes.h | 8 ++++++-- kernel/events/uprobes.c | 8 ++++++-- kernel/trace/bpf_trace.c | 6 ++++-- kernel/trace/trace_uprobe.c | 6 +++++- 4 files changed, 21 insertions(+), 7 deletions(-) --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -113,7 +113,8 @@ extern int uprobe_write_opcode(struct ar extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_register_refctr(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); -extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); +extern void uprobe_unregister_nosync(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); +extern void uprobe_unregister_sync(void); extern int uprobe_mmap(struct vm_area_struct *vma); extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void uprobe_start_dup_mmap(void); @@ -163,7 +164,10 @@ uprobe_apply(struct inode *inode, loff_t return -ENOSYS; } static inline void -uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) +uprobe_unregister_nosync(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) +{ +} +static inline void uprobes_unregister_sync(void) { } static inline int uprobe_mmap(struct vm_area_struct *vma) --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1138,7 +1138,7 @@ __uprobe_unregister(struct uprobe *uprob * @offset: offset from the start of the file. * @uc: identify which probe if multiple probes are colocated. */ -void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) +void uprobe_unregister_nosync(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) { struct uprobe *uprobe; @@ -1152,10 +1152,14 @@ void uprobe_unregister(struct inode *ino raw_write_seqcount_end(&uprobe->register_seq); up_write(&uprobe->register_rwsem); put_uprobe(uprobe); +} +EXPORT_SYMBOL_GPL(uprobe_unregister_nosync); +void uprobe_unregister_sync(void) +{ synchronize_srcu(&uprobes_srcu); } -EXPORT_SYMBOL_GPL(uprobe_unregister); +EXPORT_SYMBOL_GPL(uprobe_unregister_sync); /* * __uprobe_register - register a probe --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3181,9 +3181,11 @@ static void bpf_uprobe_unregister(struct u32 i; for (i = 0; i < cnt; i++) { - uprobe_unregister(d_real_inode(path->dentry), uprobes[i].offset, - &uprobes[i].consumer); + uprobe_unregister_nosync(d_real_inode(path->dentry), uprobes[i].offset, + &uprobes[i].consumer); } + if (cnt) + uprobe_unregister_sync(); } static void bpf_uprobe_multi_link_release(struct bpf_link *link) --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1104,6 +1104,7 @@ static int trace_uprobe_enable(struct tr static void __probe_event_disable(struct trace_probe *tp) { struct trace_uprobe *tu; + bool sync = false; tu = container_of(tp, struct trace_uprobe, tp); WARN_ON(!uprobe_filter_is_empty(tu->tp.event->filter)); @@ -1112,9 +1113,12 @@ static void __probe_event_disable(struct if (!tu->inode) continue; - uprobe_unregister(tu->inode, tu->offset, &tu->consumer); + uprobe_unregister_nosync(tu->inode, tu->offset, &tu->consumer); + sync = true; tu->inode = NULL; } + if (sync) + uprobe_unregister_sync(); } static int probe_event_enable(struct trace_event_call *call, From patchwork Thu Jul 11 11:02:43 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Peter Zijlstra X-Patchwork-Id: 13730365 Received: from desiato.infradead.org (desiato.infradead.org [90.155.92.199]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0B77115B0FA; Thu, 11 Jul 2024 11:07:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.92.199 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696050; cv=none; b=HoGLQUY/ZOXpPItxp5oY4/aJ0/LHw3O0iKluw1hAUbHhis9GW9Pm2fkpcgiJBrhCWG75hc7IC2Nlx1SS9yltm4MKckYEm62GDI0YhfUDnUMHYY1cx4UdEXNPTBBPRwikNtrYA4BLWiaGULNgoBgtllum/B+/gUd3xreDjDVYFh8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696050; c=relaxed/simple; bh=1M+rhtc/qaJHoSvWssThVK2Vgx9AVwkdXE4otbqqAQE=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=WGqnNLOklXS3K8H79DFORko4LCs/hsKN+JHeDgBO8lq6yT+cixY3/++FkxGTrWYwiZNIPv0UGCilmOwJVVq/gLhog70ocETaoyQ4h9/lhdewPuu5H95RT9mykU61PrrrfA2DUzMvRQ2MsYn8XOJMU89Xs0mlqTtri4USmEYRc3A= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=opIittmj; arc=none smtp.client-ip=90.155.92.199 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="opIittmj" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=desiato.20200630; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=oSWYpy59uOkf0yV2H/rgoKV+1idp02ib0EduwVb/ADY=; b=opIittmjzxeKvzGOiAX757Yf4/ ZE0VyqFSma3GLdV27tVOFaD7BrmuNz4gBpqZfWrunSXcNuUUXkP1Di5f2ptkV1qAwFd5H9DFL7PEJ L2TS+FoAWabsx9MMJZQW1j0lezTIBPRy5H7jPmRHaFVzVO9FBL52TpNLo6Se8lWs/ZU/Ki/yNPfZJ G9w39uGMO9drhG7EJHIrVaZUdjHJkhEF0CF2rJwCAcAarpqN/L8Mm0kVFQzrNqes54lXKop55Y7j8 yghDfx2eH2E9gBLnysbMpSI41yN7p1UwXlYJkKU7w01lBU6jwvatPMo41cAa8uB2JCxEEA0POAd3t JMTxIhmg==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by desiato.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sRre0-000000015VW-0J2D; Thu, 11 Jul 2024 11:07:24 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id D27A7302DCD; Thu, 11 Jul 2024 13:07:22 +0200 (CEST) Message-Id: <20240711110401.096506262@infradead.org> User-Agent: quilt/0.65 Date: Thu, 11 Jul 2024 13:02:43 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org, oleg@redhat.com Cc: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH v2 08/11] perf/uprobe: Convert (some) uprobe->refcount to SRCU References: <20240711110235.098009979@infradead.org> Precedence: bulk X-Mailing-List: linux-trace-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 With handle_swbp() hitting concurrently on (all) CPUs, potentially on the same uprobe, the uprobe->refcount can get *very* hot. Move the struct uprobe lifetime into uprobes_srcu such that it covers both the uprobe and the uprobe->consumers list. With this, handle_swbp() can use a single large SRCU critical section to avoid taking a refcount on the uprobe for it's duration. Notably, the single-step and uretprobe paths need a reference that leaves handle_swbp() and will, for now, still use ->refcount. Signed-off-by: Peter Zijlstra (Intel) --- kernel/events/uprobes.c | 68 ++++++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 27 deletions(-) --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -51,7 +51,7 @@ static struct mutex uprobes_mmap_mutex[U DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem); /* - * Covers uprobe->consumers lifetime. + * Covers uprobe->consumers lifetime as well as struct uprobe. */ DEFINE_STATIC_SRCU(uprobes_srcu); @@ -626,7 +626,7 @@ static void put_uprobe(struct uprobe *up mutex_lock(&delayed_uprobe_lock); delayed_uprobe_remove(uprobe, NULL); mutex_unlock(&delayed_uprobe_lock); - call_rcu(&uprobe->rcu, uprobe_free_rcu); + call_srcu(&uprobes_srcu, &uprobe->rcu, uprobe_free_rcu); } } @@ -678,7 +678,7 @@ static struct uprobe *__find_uprobe(stru struct rb_node *node = rb_find_rcu(&key, &uprobes_tree, __uprobe_cmp_key); if (node) - return try_get_uprobe(__node_2_uprobe(node)); + return __node_2_uprobe(node); return NULL; } @@ -691,7 +691,7 @@ static struct uprobe *find_uprobe(struct { unsigned int seq; - guard(rcu)(); + lockdep_assert(srcu_read_lock_held(&uprobes_srcu)); do { seq = read_seqcount_begin(&uprobes_seqcount); @@ -1142,6 +1142,8 @@ void uprobe_unregister_nosync(struct ino { struct uprobe *uprobe; + guard(srcu)(&uprobes_srcu); + uprobe = find_uprobe(inode, offset); if (WARN_ON(!uprobe)) return; @@ -1151,7 +1153,6 @@ void uprobe_unregister_nosync(struct ino __uprobe_unregister(uprobe, uc); raw_write_seqcount_end(&uprobe->register_seq); up_write(&uprobe->register_rwsem); - put_uprobe(uprobe); } EXPORT_SYMBOL_GPL(uprobe_unregister_nosync); @@ -1263,6 +1264,8 @@ int uprobe_apply(struct inode *inode, lo struct uprobe_consumer *con; int ret = -ENOENT; + guard(srcu)(&uprobes_srcu); + uprobe = find_uprobe(inode, offset); if (WARN_ON(!uprobe)) return ret; @@ -1275,7 +1278,6 @@ int uprobe_apply(struct inode *inode, lo ret = register_for_each_vma(uprobe, add ? uc : NULL); raw_write_seqcount_end(&uprobe->register_seq); up_write(&uprobe->register_rwsem); - put_uprobe(uprobe); return ret; } @@ -1929,10 +1931,14 @@ static void prepare_uretprobe(struct upr if (!ri) return; + ri->uprobe = try_get_uprobe(uprobe); + if (!ri->uprobe) + goto err_mem; + trampoline_vaddr = get_trampoline_vaddr(); orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs); if (orig_ret_vaddr == -1) - goto fail; + goto err_uprobe; /* drop the entries invalidated by longjmp() */ chained = (orig_ret_vaddr == trampoline_vaddr); @@ -1950,12 +1956,11 @@ static void prepare_uretprobe(struct upr * attack from user-space. */ uprobe_warn(current, "handle tail call"); - goto fail; + goto err_uprobe; } orig_ret_vaddr = utask->return_instances->orig_ret_vaddr; } - ri->uprobe = get_uprobe(uprobe); ri->func = instruction_pointer(regs); ri->stack = user_stack_pointer(regs); ri->orig_ret_vaddr = orig_ret_vaddr; @@ -1966,7 +1971,10 @@ static void prepare_uretprobe(struct upr utask->return_instances = ri; return; -fail: + +err_uprobe: + uprobe_put(ri->uprobe); +err_mem: kfree(ri); } @@ -1982,22 +1990,31 @@ pre_ssout(struct uprobe *uprobe, struct if (!utask) return -ENOMEM; + utask->active_uprobe = try_get_uprobe(uprobe); + if (!utask->active_uprobe) + return -ESRCH; + xol_vaddr = xol_get_insn_slot(uprobe); - if (!xol_vaddr) - return -ENOMEM; + if (!xol_vaddr) { + err = -ENOMEM; + goto err_uprobe; + } utask->xol_vaddr = xol_vaddr; utask->vaddr = bp_vaddr; err = arch_uprobe_pre_xol(&uprobe->arch, regs); - if (unlikely(err)) { - xol_free_insn_slot(current); - return err; - } + if (unlikely(err)) + goto err_xol; - utask->active_uprobe = uprobe; utask->state = UTASK_SSTEP; return 0; + +err_xol: + xol_free_insn_slot(current); +err_uprobe: + put_uprobe(utask->active_uprobe); + return err; } /* @@ -2128,7 +2145,7 @@ static void handler_chain(struct uprobe bool had_handler = false; unsigned int seq; - guard(srcu)(&uprobes_srcu); + lockdep_assert(srcu_read_lock_held(&uprobes_srcu)); seq = raw_read_seqcount_begin(&uprobe->register_seq); @@ -2276,6 +2293,8 @@ static void handle_swbp(struct pt_regs * if (bp_vaddr == get_trampoline_vaddr()) return handle_trampoline(regs); + guard(srcu)(&uprobes_srcu); + uprobe = find_active_uprobe(bp_vaddr, &is_swbp); if (!uprobe) { if (is_swbp > 0) { @@ -2304,7 +2323,7 @@ static void handle_swbp(struct pt_regs * * new and not-yet-analyzed uprobe at the same address, restart. */ if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags))) - goto out; + return; /* * Pairs with the smp_wmb() in prepare_uprobe(). @@ -2317,22 +2336,17 @@ static void handle_swbp(struct pt_regs * /* Tracing handlers use ->utask to communicate with fetch methods */ if (!get_utask()) - goto out; + return; if (arch_uprobe_ignore(&uprobe->arch, regs)) - goto out; + return; handler_chain(uprobe, regs); if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) - goto out; - - if (!pre_ssout(uprobe, regs, bp_vaddr)) return; - /* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */ -out: - put_uprobe(uprobe); + pre_ssout(uprobe, regs, bp_vaddr); } /* From patchwork Thu Jul 11 11:02:44 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Peter Zijlstra X-Patchwork-Id: 13730360 Received: from casper.infradead.org (casper.infradead.org [90.155.50.34]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0AEF915A86E; Thu, 11 Jul 2024 11:07:27 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.50.34 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696049; cv=none; b=P+6Y/gAoLuoh0gLthBcoZAh9WzjEyKC5tIbdJBSmVD4OKufkRvzBFeq85zvhVcFTjT1RJzGEllsENREVshZAlhfUN3a5mXIhwGW5eRkcBRrpq/Bc7fTevbtAy6H3Ie/GvmtaqHUiltsqPK6UTcT3td8dt6wTsKVkLfONOENbJbg= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696049; c=relaxed/simple; bh=dTtktbIQzPN0M1WT/SSamB3BTYm0QnUYyOtyVkFgo2s=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=oTtAnyZpsFrn+uKj0v6KX1/YKuuoKp/RqRSHQ1G6HrwtE+NRgPwoQHRU3NNm2UWJpOVCO5yHX6lnHBoeFpB6kgZB/DUxXnyu1MmN3KUKGG4IIOhnznX2BbZ4HuyMgxBrI0XKYkpfYRvvit/i2p7vX2cEmwGwfYIDx7dD/6gUO3I= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=RndYImWa; arc=none smtp.client-ip=90.155.50.34 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="RndYImWa" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=epm86oBKZwoCXF9Cm3n9QKwfFnScPDeksYSx49nPLjc=; b=RndYImWaa5OLJ4qUo6PNRFfl0z asshekHfKR47hhffhSeHfY5CEqgsE/mJ6w+CEvXqNBWOkAmM3LHGQRUhnW9WQPGVnBQTBHhbqFGYW 40meY4NDRbKM2OzMdXpu7qz8tZ2/aXJprbc8MhjmrAsz1SNf5N5p/AmIYaXYY8WfNm9vJOu4bQYf8 c+XRjXRxpRAM8nXEMZ0txOngr8EJ9I3PhOeMN1ZpyaNV+IPTtXNjLZjCmj+wWxNxcfqZScjISHFCj xUfIJuhC//fItau7KRqAQua0JbF6uN8OzuQkW5sRYKwjT8XiTD1erOaG+fc8dVxkvm0EHC71eRzGA cSQgylpQ==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by casper.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sRre0-0000000Ax6Z-0Gxz; Thu, 11 Jul 2024 11:07:24 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id D65EA302EAB; Thu, 11 Jul 2024 13:07:22 +0200 (CEST) Message-Id: <20240711110401.203782356@infradead.org> User-Agent: quilt/0.65 Date: Thu, 11 Jul 2024 13:02:44 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org, oleg@redhat.com Cc: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH v2 09/11] srcu: Add __srcu_clone_read_lock() References: <20240711110235.098009979@infradead.org> Precedence: bulk X-Mailing-List: linux-trace-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 In order to support carrying an srcu_read_lock() section across fork, where both the parent and child process will do: srcu_read_unlock(), it is needed to account for the extra decrement with an extra increment at fork time. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/srcu.h | 1 + include/linux/srcutiny.h | 10 ++++++++++ kernel/rcu/srcutree.c | 5 +++++ 3 files changed, 16 insertions(+) --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -55,6 +55,7 @@ void call_srcu(struct srcu_struct *ssp, void (*func)(struct rcu_head *head)); void cleanup_srcu_struct(struct srcu_struct *ssp); int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); +void __srcu_clone_read_lock(struct srcu_struct *ssp, int idx); void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); void synchronize_srcu(struct srcu_struct *ssp); unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp); --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -71,6 +71,16 @@ static inline int __srcu_read_lock(struc return idx; } +static inline void __srcu_clone_read_lock(struct srcu_struct *ssp, int idx) +{ + int newval; + + preempt_disable(); // Needed for PREEMPT_AUTO + newval = READ_ONCE(ssp->srcu_lock_nesting[idx]) + 1; + WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval); + preempt_enable(); +} + static inline void synchronize_srcu_expedited(struct srcu_struct *ssp) { synchronize_srcu(ssp); --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -720,6 +720,11 @@ int __srcu_read_lock(struct srcu_struct } EXPORT_SYMBOL_GPL(__srcu_read_lock); +void __srcu_clone_read_lock(struct srcu_struct *ssp, int idx) +{ + this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter); +} + /* * Removes the count for the old reader from the appropriate per-CPU * element of the srcu_struct. Note that this may well be a different From patchwork Thu Jul 11 11:02:45 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Peter Zijlstra X-Patchwork-Id: 13730370 Received: from desiato.infradead.org (desiato.infradead.org [90.155.92.199]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7766615B10F; Thu, 11 Jul 2024 11:07:29 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.92.199 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696052; cv=none; b=kolGEzBNiPgqmlE5HEyJSVa9xPoLY86vWXCUmJ6XZ7AHdX9sKhGYtwMMcb65YgLVNsITKT4AY3ibDZ9EPZiUdH4BfTXBABuxa/5qApYsZQzGTdg09JYexuUUkXfxwsLIKj2ct/CWm0Q635AKS3eXv09FO9m6x7YUSAVGcbei/1c= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696052; c=relaxed/simple; bh=2T0ZeaCuHyE7S1MQjICNRCQqp1yquQA9xYneZhcf7yA=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=gCQj1qs7I7L8dC/0E+Vhn6VWllRZ/C6N+eytVoAZp74avJf8VJmPU9U8TFZVFXbLRNtBUwUsJiY4aKdHzZuvKtAF68PMINLN3NvNqbMPjvbqN3gOWqWzI95337WrXGPH5/EVba6SeGjC9Fl1fizpPQ2KUSkXyjRQgH1dxQkFLvM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=gvf8k7lx; arc=none smtp.client-ip=90.155.92.199 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="gvf8k7lx" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=desiato.20200630; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=JGAQJ9O4GQNJ+lSrwckhmSvTl5Z3WnOhBl9Cbykx5eM=; b=gvf8k7lxxuojFBCwj8OPBpbV5y AXwBogmVlq5Ub110X0UB/t+tI0XMWU2pK80XoOy0UMkxEVHsKmcj5tjiz67BfFOPMNl8LJ9U7/oHE M5XLS8EJtYrQ5V+FFWSp1Vi7ZZ8yGV4mBw/kYM8EDsMhnHXjBgkGIWuss27LVnz4OzZYV3YMCscYV OdIe+JwhzStMl6Au4saLNYqp3S4KO++lSdLvxDgb69TTrWL5OOFt032cKIV3+Yowo8lsLnQmoK7L2 E8verntSNWxJK0TCdyfmkIrgpbQaPAHO9ZyVzzEISK7Ti9DreovSScemLUCdiDUtxfhzPIn8nQBV7 VpbjxThg==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by desiato.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sRre0-000000015VX-0L3j; Thu, 11 Jul 2024 11:07:24 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id D9F793034B0; Thu, 11 Jul 2024 13:07:22 +0200 (CEST) Message-Id: <20240711110401.311168524@infradead.org> User-Agent: quilt/0.65 Date: Thu, 11 Jul 2024 13:02:45 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org, oleg@redhat.com Cc: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH v2 10/11] perf/uprobe: Convert single-step and uretprobe to SRCU References: <20240711110235.098009979@infradead.org> Precedence: bulk X-Mailing-List: linux-trace-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Both single-step and uretprobes take a refcount on struct uprobe in handle_swbp() in order to ensure struct uprobe stays extant until a next trap. Since uprobe_unregister() only cares about the uprobe_consumer life-time, and these intra-trap sections can be arbitrarily large, create a second SRCU domain to cover these. Notably, a uretprobe with a registered return_instance that never triggers -- because userspace -- will currently pin the return_instance and related uprobe until the task dies. With this convertion to SRCU this behaviour will inhibit freeing of all uprobes. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/uprobes.h | 2 + kernel/events/uprobes.c | 60 +++++++++++++++++++++++------------------------- 2 files changed, 31 insertions(+), 31 deletions(-) --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -78,6 +78,7 @@ struct uprobe_task { struct return_instance *return_instances; unsigned int depth; + unsigned int active_srcu_idx; }; struct return_instance { @@ -86,6 +87,7 @@ struct return_instance { unsigned long stack; /* stack pointer */ unsigned long orig_ret_vaddr; /* original return address */ bool chained; /* true, if instance is nested */ + int srcu_idx; struct return_instance *next; /* keep as stack */ }; --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -54,6 +54,15 @@ DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem) * Covers uprobe->consumers lifetime as well as struct uprobe. */ DEFINE_STATIC_SRCU(uprobes_srcu); +/* + * Covers return_instance->uprobe and utask->active_uprobe. Separate from + * uprobe_srcu because uprobe_unregister() doesn't need to wait for this + * and these lifetimes can be fairly long. + * + * Notably, these sections span userspace and as such use + * __srcu_read_{,un}lock() to elide lockdep. + */ +DEFINE_STATIC_SRCU(uretprobes_srcu); /* Have a copy of original instruction */ #define UPROBE_COPY_INSN 0 @@ -596,25 +605,24 @@ set_orig_insn(struct arch_uprobe *auprob *(uprobe_opcode_t *)&auprobe->insn); } -static struct uprobe *try_get_uprobe(struct uprobe *uprobe) -{ - if (refcount_inc_not_zero(&uprobe->ref)) - return uprobe; - return NULL; -} - static struct uprobe *get_uprobe(struct uprobe *uprobe) { refcount_inc(&uprobe->ref); return uprobe; } -static void uprobe_free_rcu(struct rcu_head *rcu) +static void uprobe_free_stage2(struct rcu_head *rcu) { struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu); kfree(uprobe); } +static void uprobe_free_stage1(struct rcu_head *rcu) +{ + struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu); + call_srcu(&uretprobes_srcu, &uprobe->rcu, uprobe_free_stage2); +} + static void put_uprobe(struct uprobe *uprobe) { if (refcount_dec_and_test(&uprobe->ref)) { @@ -626,7 +634,7 @@ static void put_uprobe(struct uprobe *up mutex_lock(&delayed_uprobe_lock); delayed_uprobe_remove(uprobe, NULL); mutex_unlock(&delayed_uprobe_lock); - call_srcu(&uprobes_srcu, &uprobe->rcu, uprobe_free_rcu); + call_srcu(&uprobes_srcu, &uprobe->rcu, uprobe_free_stage1); } } @@ -1753,7 +1761,7 @@ unsigned long uprobe_get_trap_addr(struc static struct return_instance *free_ret_instance(struct return_instance *ri) { struct return_instance *next = ri->next; - put_uprobe(ri->uprobe); + __srcu_read_unlock(&uretprobes_srcu, ri->srcu_idx); kfree(ri); return next; } @@ -1771,7 +1779,7 @@ void uprobe_free_utask(struct task_struc return; if (utask->active_uprobe) - put_uprobe(utask->active_uprobe); + __srcu_read_unlock(&uretprobes_srcu, utask->active_srcu_idx); ri = utask->return_instances; while (ri) @@ -1814,7 +1822,7 @@ static int dup_utask(struct task_struct return -ENOMEM; *n = *o; - get_uprobe(n->uprobe); + __srcu_clone_read_lock(&uretprobes_srcu, n->srcu_idx); n->next = NULL; *p = n; @@ -1931,14 +1939,10 @@ static void prepare_uretprobe(struct upr if (!ri) return; - ri->uprobe = try_get_uprobe(uprobe); - if (!ri->uprobe) - goto err_mem; - trampoline_vaddr = get_trampoline_vaddr(); orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs); if (orig_ret_vaddr == -1) - goto err_uprobe; + goto err_mem; /* drop the entries invalidated by longjmp() */ chained = (orig_ret_vaddr == trampoline_vaddr); @@ -1956,11 +1960,13 @@ static void prepare_uretprobe(struct upr * attack from user-space. */ uprobe_warn(current, "handle tail call"); - goto err_uprobe; + goto err_mem; } orig_ret_vaddr = utask->return_instances->orig_ret_vaddr; } + ri->srcu_idx = __srcu_read_lock(&uretprobes_srcu); + ri->uprobe = uprobe; ri->func = instruction_pointer(regs); ri->stack = user_stack_pointer(regs); ri->orig_ret_vaddr = orig_ret_vaddr; @@ -1972,8 +1978,6 @@ static void prepare_uretprobe(struct upr return; -err_uprobe: - uprobe_put(ri->uprobe); err_mem: kfree(ri); } @@ -1990,15 +1994,9 @@ pre_ssout(struct uprobe *uprobe, struct if (!utask) return -ENOMEM; - utask->active_uprobe = try_get_uprobe(uprobe); - if (!utask->active_uprobe) - return -ESRCH; - xol_vaddr = xol_get_insn_slot(uprobe); - if (!xol_vaddr) { - err = -ENOMEM; - goto err_uprobe; - } + if (!xol_vaddr) + return -ENOMEM; utask->xol_vaddr = xol_vaddr; utask->vaddr = bp_vaddr; @@ -2007,13 +2005,13 @@ pre_ssout(struct uprobe *uprobe, struct if (unlikely(err)) goto err_xol; + utask->active_srcu_idx = __srcu_read_lock(&uretprobes_srcu); + utask->active_uprobe = uprobe; utask->state = UTASK_SSTEP; return 0; err_xol: xol_free_insn_slot(current); -err_uprobe: - put_uprobe(utask->active_uprobe); return err; } @@ -2366,7 +2364,7 @@ static void handle_singlestep(struct upr else WARN_ON_ONCE(1); - put_uprobe(uprobe); + __srcu_read_unlock(&uretprobes_srcu, utask->active_srcu_idx); utask->active_uprobe = NULL; utask->state = UTASK_RUNNING; xol_free_insn_slot(current); From patchwork Thu Jul 11 11:02:46 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Peter Zijlstra X-Patchwork-Id: 13730366 Received: from casper.infradead.org (casper.infradead.org [90.155.50.34]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0885C15A868; Thu, 11 Jul 2024 11:07:27 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.50.34 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696051; cv=none; b=o6DPjqel2iFltOR4UeN1w1tvjzBHU1GQYbbuihfuK+cINYUa1vme5VyCvPGh2npO6q7Xg6qlbpvgbunDIj94Qm0Ltmc2sXNSaeJEyGe9brcNfzGQFaZFkuGb6RUydE8B4MZjGTFrjvAqJqzpunJqYKymIjTTFHzdkL7c+XVqCfY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720696051; c=relaxed/simple; bh=eKrnWxEQ7mJat9bK7hztaGayVd7Ezz7aRwEv9LVNEhs=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=KkiKEW2fdwVRPF3uRf2WCJXpK5/mtMuBifZrgwWDRii0wd+V2ZMY1iezOzjjfpxt84UCkNMlzjt1TZekufVPZoPnv4290bfcNFudfIsGWqxtEJ10F0kANoFx6gmbCbz6U9IWIdPnxG8nZAxtgi4OrK4cSPaDVMEXvqoHaJJ8hhA= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=HVg9SoDa; arc=none smtp.client-ip=90.155.50.34 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="HVg9SoDa" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=oJyO4fVmvgndm2CNyTnHKfKK4j72bWwI2b0ZVWus99U=; b=HVg9SoDaQDbBa8tLIo2j1Pg8a3 5ubXeE/VRU/5VS9Q2vui4H2BPsgTJQKcYaicHUfsR2H0SiNSHNkPxhCVyTvQrBmKcmRfhVJig2iTe C93fvhouWLYLeW0Irm0AzIWa3AIi8RY/H1etH//g/rAhTecwdHlr4B2YhjmlklCkswdRsSWEkOeaY Djl0K5eSJtZTPM7VJFPtwT8HQyyh0Ss5kJy9hW3EnhW42xffRezLcr/s1De9xtBnfcO4jzkXyi/ew n7nh5PrGXrqJR2pxe4UgDuV2PAIB0IcWVttmwj6f9OL54pa9xsMWq3iHyk5IsXsoBYIw0F8IVzTAB eyOTt4/A==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by casper.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sRre0-0000000Ax6a-0kbN; Thu, 11 Jul 2024 11:07:24 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id DEAAB30614E; Thu, 11 Jul 2024 13:07:22 +0200 (CEST) Message-Id: <20240711110401.412779774@infradead.org> User-Agent: quilt/0.65 Date: Thu, 11 Jul 2024 13:02:46 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org, oleg@redhat.com Cc: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH v2 11/11] perf/uprobe: Add uretprobe timer References: <20240711110235.098009979@infradead.org> Precedence: bulk X-Mailing-List: linux-trace-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 In order to put a bound on the uretprobe_srcu critical section, add a timer to uprobe_task. Upon every RI added or removed the timer is pushed forward to now + 1s. If the timer were ever to fire, it would convert the SRCU 'reference' to a refcount reference if possible. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/uprobes.h | 8 +++++ kernel/events/uprobes.c | 67 ++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 69 insertions(+), 6 deletions(-) --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -15,6 +15,7 @@ #include #include #include +#include struct vm_area_struct; struct mm_struct; @@ -79,6 +80,10 @@ struct uprobe_task { struct return_instance *return_instances; unsigned int depth; unsigned int active_srcu_idx; + + struct timer_list ri_timer; + struct callback_head ri_task_work; + struct task_struct *task; }; struct return_instance { @@ -86,7 +91,8 @@ struct return_instance { unsigned long func; unsigned long stack; /* stack pointer */ unsigned long orig_ret_vaddr; /* original return address */ - bool chained; /* true, if instance is nested */ + u8 chained; /* true, if instance is nested */ + u8 has_ref; int srcu_idx; struct return_instance *next; /* keep as stack */ --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1761,7 +1761,12 @@ unsigned long uprobe_get_trap_addr(struc static struct return_instance *free_ret_instance(struct return_instance *ri) { struct return_instance *next = ri->next; - __srcu_read_unlock(&uretprobes_srcu, ri->srcu_idx); + if (ri->uprobe) { + if (ri->has_ref) + put_uprobe(ri->uprobe); + else + __srcu_read_unlock(&uretprobes_srcu, ri->srcu_idx); + } kfree(ri); return next; } @@ -1785,11 +1790,48 @@ void uprobe_free_utask(struct task_struc while (ri) ri = free_ret_instance(ri); + timer_delete_sync(&utask->ri_timer); + task_work_cancel(utask->task, &utask->ri_task_work); xol_free_insn_slot(t); kfree(utask); t->utask = NULL; } +static void return_instance_task_work(struct callback_head *head) +{ + struct uprobe_task *utask = container_of(head, struct uprobe_task, ri_task_work); + struct return_instance *ri; + + for (ri = utask->return_instances; ri; ri = ri->next) { + if (!ri->uprobe) + continue; + if (ri->has_ref) + continue; + if (refcount_inc_not_zero(&ri->uprobe->ref)) + ri->has_ref = true; + else + ri->uprobe = NULL; + __srcu_read_unlock(&uretprobes_srcu, ri->srcu_idx); + } +} + +static void return_instance_timer(struct timer_list *timer) +{ + struct uprobe_task *utask = container_of(timer, struct uprobe_task, ri_timer); + task_work_add(utask->task, &utask->ri_task_work, TWA_SIGNAL); +} + +static struct uprobe_task *alloc_utask(struct task_struct *task) +{ + struct uprobe_task *utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); + if (!utask) + return NULL; + timer_setup(&utask->ri_timer, return_instance_timer, 0); + init_task_work(&utask->ri_task_work, return_instance_task_work); + utask->task = task; + return utask; +} + /* * Allocate a uprobe_task object for the task if necessary. * Called when the thread hits a breakpoint. @@ -1801,7 +1843,7 @@ void uprobe_free_utask(struct task_struc static struct uprobe_task *get_utask(void) { if (!current->utask) - current->utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); + current->utask = alloc_utask(current); return current->utask; } @@ -1810,7 +1852,7 @@ static int dup_utask(struct task_struct struct uprobe_task *n_utask; struct return_instance **p, *o, *n; - n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); + n_utask = alloc_utask(t); if (!n_utask) return -ENOMEM; t->utask = n_utask; @@ -1822,13 +1864,20 @@ static int dup_utask(struct task_struct return -ENOMEM; *n = *o; - __srcu_clone_read_lock(&uretprobes_srcu, n->srcu_idx); + if (n->uprobe) { + if (n->has_ref) + get_uprobe(n->uprobe); + else + __srcu_clone_read_lock(&uretprobes_srcu, n->srcu_idx); + } n->next = NULL; *p = n; p = &n->next; n_utask->depth++; } + if (n_utask->return_instances) + mod_timer(&n_utask->ri_timer, jiffies + HZ); return 0; } @@ -1967,6 +2016,7 @@ static void prepare_uretprobe(struct upr ri->srcu_idx = __srcu_read_lock(&uretprobes_srcu); ri->uprobe = uprobe; + ri->has_ref = 0; ri->func = instruction_pointer(regs); ri->stack = user_stack_pointer(regs); ri->orig_ret_vaddr = orig_ret_vaddr; @@ -1976,6 +2026,8 @@ static void prepare_uretprobe(struct upr ri->next = utask->return_instances; utask->return_instances = ri; + mod_timer(&utask->ri_timer, jiffies + HZ); + return; err_mem: @@ -2204,6 +2256,9 @@ handle_uretprobe_chain(struct return_ins struct uprobe *uprobe = ri->uprobe; struct uprobe_consumer *uc; + if (!uprobe) + return; + guard(srcu)(&uprobes_srcu); for_each_consumer_rcu(uc, uprobe->consumers) { @@ -2250,8 +2305,10 @@ static void handle_trampoline(struct pt_ instruction_pointer_set(regs, ri->orig_ret_vaddr); do { - if (valid) + if (valid) { handle_uretprobe_chain(ri, regs); + mod_timer(&utask->ri_timer, jiffies + HZ); + } ri = free_ret_instance(ri); utask->depth--; } while (ri != next);