diff mbox series

[net-next,v3,1/2] mctp: make __mctp_dev_get() take a refcount hold

Message ID 20220222041739.511255-2-matt@codeconstruct.com.au (mailing list archive)
State Accepted
Commit dc121c0084910db985cf1c8ba6fce5d8c307cc02
Delegated to: Netdev Maintainers
Headers show
Series mctp: Fix incorrect refs for extended addr | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 4 this patch: 4
netdev/cc_maintainers success CCed 5 of 5 maintainers
netdev/build_clang success Errors and warnings before: 18 this patch: 18
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 9 this patch: 9
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 84 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Matt Johnston Feb. 22, 2022, 4:17 a.m. UTC
Previously there was a race that could allow the mctp_dev refcount
to hit zero:

rcu_read_lock();
mdev = __mctp_dev_get(dev);
// mctp_unregister() happens here, mdev->refs hits zero
mctp_dev_hold(dev);
rcu_read_unlock();

Now we make __mctp_dev_get() take the hold itself. It is safe to test
against the zero refcount because __mctp_dev_get() is called holding
rcu_read_lock and mctp_dev uses kfree_rcu().

Reported-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
---
 net/mctp/device.c     | 21 ++++++++++++++++++---
 net/mctp/route.c      |  5 ++++-
 net/mctp/test/utils.c |  1 -
 3 files changed, 22 insertions(+), 5 deletions(-)

Comments

Jakub Kicinski Feb. 23, 2022, 3:59 a.m. UTC | #1
On Tue, 22 Feb 2022 12:17:38 +0800 Matt Johnston wrote:
> Previously there was a race that could allow the mctp_dev refcount
> to hit zero:
> 
> rcu_read_lock();
> mdev = __mctp_dev_get(dev);
> // mctp_unregister() happens here, mdev->refs hits zero
> mctp_dev_hold(dev);
> rcu_read_unlock();
> 
> Now we make __mctp_dev_get() take the hold itself. It is safe to test
> against the zero refcount because __mctp_dev_get() is called holding
> rcu_read_lock and mctp_dev uses kfree_rcu().

Jeremy, did you have any specific semantics or naming scheme in mind
here? PTAL. Is it better to make __mctp_dev_get() "safe" or create
mctp_dev_get()? etc
Jeremy Kerr Feb. 23, 2022, 4:22 a.m. UTC | #2
Hi Jakub,

> Jeremy, did you have any specific semantics or naming scheme in mind
> here? PTAL. Is it better to make __mctp_dev_get() "safe" or create
> mctp_dev_get()? etc

The __ prefix is (was?) more about the requirement for the RCU read lock
there. That's still the case, so the __ may still be applicable.

We only have one non-test usage of a contender for a RCU-locked
mctp_dev_get(), ie, currently:

  rcu_read_lock();
  dev = __mctp_dev_get();
  rcu_read_unlock();

 - so I'm not sure it's worthwhile adding a separate function for that
at present, and I'm OK with this patch retaining the __.

I guess the question is really: as per existing conventions, does __
more imply an unlocked accessor, or a non-reference-counting accessor?

Cheers,


Jeremy
diff mbox series

Patch

diff --git a/net/mctp/device.c b/net/mctp/device.c
index 9e097e61f23a..b754c31162b1 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -25,12 +25,25 @@  struct mctp_dump_cb {
 	size_t a_idx;
 };
 
-/* unlocked: caller must hold rcu_read_lock */
+/* unlocked: caller must hold rcu_read_lock.
+ * Returned mctp_dev has its refcount incremented, or NULL if unset.
+ */
 struct mctp_dev *__mctp_dev_get(const struct net_device *dev)
 {
-	return rcu_dereference(dev->mctp_ptr);
+	struct mctp_dev *mdev = rcu_dereference(dev->mctp_ptr);
+
+	/* RCU guarantees that any mdev is still live.
+	 * Zero refcount implies a pending free, return NULL.
+	 */
+	if (mdev)
+		if (!refcount_inc_not_zero(&mdev->refs))
+			return NULL;
+	return mdev;
 }
 
+/* Returned mctp_dev does not have refcount incremented. The returned pointer
+ * remains live while rtnl_lock is held, as that prevents mctp_unregister()
+ */
 struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev)
 {
 	return rtnl_dereference(dev->mctp_ptr);
@@ -124,6 +137,7 @@  static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb)
 				if (mdev) {
 					rc = mctp_dump_dev_addrinfo(mdev,
 								    skb, cb);
+					mctp_dev_put(mdev);
 					// Error indicates full buffer, this
 					// callback will get retried.
 					if (rc < 0)
@@ -298,7 +312,7 @@  void mctp_dev_hold(struct mctp_dev *mdev)
 
 void mctp_dev_put(struct mctp_dev *mdev)
 {
-	if (refcount_dec_and_test(&mdev->refs)) {
+	if (mdev && refcount_dec_and_test(&mdev->refs)) {
 		dev_put(mdev->dev);
 		kfree_rcu(mdev, rcu);
 	}
@@ -370,6 +384,7 @@  static size_t mctp_get_link_af_size(const struct net_device *dev,
 	if (!mdev)
 		return 0;
 	ret = nla_total_size(4); /* IFLA_MCTP_NET */
+	mctp_dev_put(mdev);
 	return ret;
 }
 
diff --git a/net/mctp/route.c b/net/mctp/route.c
index fe6c8bf1ec2c..6f277e56b168 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -836,7 +836,7 @@  int mctp_local_output(struct sock *sk, struct mctp_route *rt,
 {
 	struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
 	struct mctp_skb_cb *cb = mctp_cb(skb);
-	struct mctp_route tmp_rt;
+	struct mctp_route tmp_rt = {0};
 	struct mctp_sk_key *key;
 	struct net_device *dev;
 	struct mctp_hdr *hdr;
@@ -948,6 +948,7 @@  int mctp_local_output(struct sock *sk, struct mctp_route *rt,
 		mctp_route_release(rt);
 
 	dev_put(dev);
+	mctp_dev_put(tmp_rt.dev);
 
 	return rc;
 
@@ -1124,11 +1125,13 @@  static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
 
 	rt->output(rt, skb);
 	mctp_route_release(rt);
+	mctp_dev_put(mdev);
 
 	return NET_RX_SUCCESS;
 
 err_drop:
 	kfree_skb(skb);
+	mctp_dev_put(mdev);
 	return NET_RX_DROP;
 }
 
diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c
index 7b7918702592..e03ba66bbe18 100644
--- a/net/mctp/test/utils.c
+++ b/net/mctp/test/utils.c
@@ -54,7 +54,6 @@  struct mctp_test_dev *mctp_test_create_dev(void)
 
 	rcu_read_lock();
 	dev->mdev = __mctp_dev_get(ndev);
-	mctp_dev_hold(dev->mdev);
 	rcu_read_unlock();
 
 	return dev;