diff mbox series

[RFC,net-next,05/10] devlink: remove the registration guarantee of references

Message ID 20221217011953.152487-6-kuba@kernel.org (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series devlink: remove the wait-for-references on unregister | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/apply fail Patch does not apply to net-next

Commit Message

Jakub Kicinski Dec. 17, 2022, 1:19 a.m. UTC
The objective of exposing the devlink instance locks to
drivers was to let them use these locks to prevent user space
from accessing the device before it's fully initialized.
This is difficult because devlink_unregister() waits for all
references to be released, meaning that devlink_unregister()
can't itself be called under the instance lock.

To avoid this issue devlink_register() was moved after subobject
registration a while ago. Unfortunately the netdev paths get
a hold of the devlink instances _before_ they are registered.
Ideally netdev should wait for devlink init to finish (synchronizing
on the instance lock). This can't work because we don't know if the
instance will _ever_ be registered (in case of failures it may not).
The other option of returning an error until devlink_register()
is called is unappealing (user space would get a notification
netdev exist but would have to wait arbitrary amount of time
before accessing some of its attributes).

Weaken the guarantees of the devlink references.

Holding a reference will now only guarantee that the memory
of the object is around. Another way of looking at it is that
the reference now protects the object not its "registered" status.
Use devlink instance lock to synchronize unregistration.

This implies that releasing of the "main" reference of the devlink
instance moves from devlink_unregister() to devlink_free().

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/devlink.h       |  2 ++
 net/devlink/core.c          | 64 ++++++++++++++++---------------------
 net/devlink/devl_internal.h |  2 --
 3 files changed, 30 insertions(+), 38 deletions(-)

Comments

Keller, Jacob E Dec. 19, 2022, 5:56 p.m. UTC | #1
On 12/16/2022 5:19 PM, Jakub Kicinski wrote:
> The objective of exposing the devlink instance locks to
> drivers was to let them use these locks to prevent user space
> from accessing the device before it's fully initialized.
> This is difficult because devlink_unregister() waits for all
> references to be released, meaning that devlink_unregister()
> can't itself be called under the instance lock.
> 

Sure.

> To avoid this issue devlink_register() was moved after subobject
> registration a while ago. Unfortunately the netdev paths get
> a hold of the devlink instances _before_ they are registered.
> Ideally netdev should wait for devlink init to finish (synchronizing
> on the instance lock). This can't work because we don't know if the
> instance will _ever_ be registered (in case of failures it may not).
> The other option of returning an error until devlink_register()
> is called is unappealing (user space would get a notification
> netdev exist but would have to wait arbitrary amount of time
> before accessing some of its attributes).
> 

Nice summary of the problems and options that we have tried already.

I think its also important as this can allow sub objects to be
registered after the devlink instance?

> Weaken the guarantees of the devlink references.
> 
> Holding a reference will now only guarantee that the memory
> of the object is around. Another way of looking at it is that
> the reference now protects the object not its "registered" status.
> Use devlink instance lock to synchronize unregistration.
> 

Right, this makes sense.

> This implies that releasing of the "main" reference of the devlink
> instance moves from devlink_unregister() to devlink_free().
> 

This makes sense and I think aligns more with how most references work
in practice. Good.

> Signed-off-by: Jakub Kicinski <kuba@kernel.org>

Code change seems straight forward enough. I had a minor question, but:

Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>

> ---
>  include/net/devlink.h       |  2 ++
>  net/devlink/core.c          | 64 ++++++++++++++++---------------------
>  net/devlink/devl_internal.h |  2 --
>  3 files changed, 30 insertions(+), 38 deletions(-)
> 
> diff --git a/include/net/devlink.h b/include/net/devlink.h
> index 36e013d3aa52..cc910612b3f4 100644
> --- a/include/net/devlink.h
> +++ b/include/net/devlink.h
> @@ -1648,6 +1648,8 @@ static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
>  	return devlink_alloc_ns(ops, priv_size, &init_net, dev);
>  }
>  void devlink_set_features(struct devlink *devlink, u64 features);
> +int devl_register(struct devlink *devlink);
> +void devl_unregister(struct devlink *devlink);
>  void devlink_register(struct devlink *devlink);
>  void devlink_unregister(struct devlink *devlink);
>  void devlink_free(struct devlink *devlink);
> diff --git a/net/devlink/core.c b/net/devlink/core.c
> index 2abad8247597..413b92534ad6 100644
> --- a/net/devlink/core.c
> +++ b/net/devlink/core.c
> @@ -89,21 +89,10 @@ struct devlink *__must_check devlink_try_get(struct devlink *devlink)
>  	return NULL;
>  }
>  
> -static void __devlink_put_rcu(struct rcu_head *head)
> -{
> -	struct devlink *devlink = container_of(head, struct devlink, rcu);
> -
> -	complete(&devlink->comp);
> -}
> -
>  void devlink_put(struct devlink *devlink)
>  {
>  	if (refcount_dec_and_test(&devlink->refcount))
> -		/* Make sure unregister operation that may await the completion
> -		 * is unblocked only after all users are after the end of
> -		 * RCU grace period.
> -		 */
> -		call_rcu(&devlink->rcu, __devlink_put_rcu);
> +		kfree_rcu(devlink, rcu);
>  }
>  
>  struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp)
> @@ -116,13 +105,6 @@ struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp)
>  	if (!devlink)
>  		goto unlock;
>  
> -	/* In case devlink_unregister() was already called and "unregistering"
> -	 * mark was set, do not allow to get a devlink reference here.
> -	 * This prevents live-lock of devlink_unregister() wait for completion.
> -	 */
> -	if (xa_get_mark(&devlinks, *indexp, DEVLINK_UNREGISTERING))
> -		goto next;
> -
>  	if (!devlink_try_get(devlink))
>  		goto next;
>  	if (!net_eq(devlink_net(devlink), net)) {
> @@ -158,37 +140,48 @@ void devlink_set_features(struct devlink *devlink, u64 features)
>  EXPORT_SYMBOL_GPL(devlink_set_features);
>  
>  /**
> - *	devlink_register - Register devlink instance
> - *
> - *	@devlink: devlink
> + * devl_register - Register devlink instance
> + * @devlink: devlink
>   */
> -void devlink_register(struct devlink *devlink)
> +int devl_register(struct devlink *devlink)
>  {
>  	ASSERT_DEVLINK_NOT_REGISTERED(devlink);
> -	/* Make sure that we are in .probe() routine */
> +	devl_assert_locked(devlink);
>  
>  	xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
>  	devlink_notify_register(devlink);
> +
> +	return 0;

Any particular reason to change this to int when it doesn't have a
failure case yet? Future patches I assume? You don't check the
devl_register return value.

> +}
> +EXPORT_SYMBOL_GPL(devl_register);
> +
> +void devlink_register(struct devlink *devlink)
> +{
> +	devl_lock(devlink);
> +	devl_register(devlink);
> +	devl_unlock(devlink);
>  }
>  EXPORT_SYMBOL_GPL(devlink_register);
>  
>  /**
> - *	devlink_unregister - Unregister devlink instance
> - *
> - *	@devlink: devlink
> + * devl_unregister - Unregister devlink instance
> + * @devlink: devlink
>   */
> -void devlink_unregister(struct devlink *devlink)
> +void devl_unregister(struct devlink *devlink)
>  {
>  	ASSERT_DEVLINK_REGISTERED(devlink);
> -	/* Make sure that we are in .remove() routine */
> -
> -	xa_set_mark(&devlinks, devlink->index, DEVLINK_UNREGISTERING);
> -	devlink_put(devlink);
> -	wait_for_completion(&devlink->comp);
> +	devl_assert_locked(devlink);
>  
>  	devlink_notify_unregister(devlink);
>  	xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
> -	xa_clear_mark(&devlinks, devlink->index, DEVLINK_UNREGISTERING);
> +}
> +EXPORT_SYMBOL_GPL(devl_unregister);
> +
> +void devlink_unregister(struct devlink *devlink)
> +{
> +	devl_lock(devlink);
> +	devl_unregister(devlink);
> +	devl_unlock(devlink);
>  }
>  EXPORT_SYMBOL_GPL(devlink_unregister);
>  
> @@ -252,7 +245,6 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
>  	mutex_init(&devlink->reporters_lock);
>  	mutex_init(&devlink->linecards_lock);
>  	refcount_set(&devlink->refcount, 1);
> -	init_completion(&devlink->comp);
>  
>  	return devlink;
>  
> @@ -298,7 +290,7 @@ void devlink_free(struct devlink *devlink)
>  
>  	xa_erase(&devlinks, devlink->index);
>  
> -	kfree(devlink);
> +	devlink_put(devlink);
>  }
>  EXPORT_SYMBOL_GPL(devlink_free);
>  
> diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
> index c3977c69552a..7e77eebde3b9 100644
> --- a/net/devlink/devl_internal.h
> +++ b/net/devlink/devl_internal.h
> @@ -12,7 +12,6 @@
>  #include <net/net_namespace.h>
>  
>  #define DEVLINK_REGISTERED XA_MARK_1
> -#define DEVLINK_UNREGISTERING XA_MARK_2
>  
>  #define DEVLINK_RELOAD_STATS_ARRAY_SIZE \
>  	(__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX)
> @@ -52,7 +51,6 @@ struct devlink {
>  	struct lock_class_key lock_key;
>  	u8 reload_failed:1;
>  	refcount_t refcount;
> -	struct completion comp;
>  	struct rcu_head rcu;
>  	struct notifier_block netdevice_nb;
>  	char priv[] __aligned(NETDEV_ALIGN);
Jakub Kicinski Dec. 19, 2022, 10:02 p.m. UTC | #2
On Mon, 19 Dec 2022 09:56:26 -0800 Jacob Keller wrote:
> > -void devlink_register(struct devlink *devlink)
> > +int devl_register(struct devlink *devlink)
> >  {
> >  	ASSERT_DEVLINK_NOT_REGISTERED(devlink);
> > -	/* Make sure that we are in .probe() routine */
> > +	devl_assert_locked(devlink);
> >  
> >  	xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
> >  	devlink_notify_register(devlink);
> > +
> > +	return 0;  
> 
> Any particular reason to change this to int when it doesn't have a
> failure case yet? Future patches I assume? You don't check the
> devl_register return value.

I was wondering if anyone would notice :)

Returning errors from the registration helper seems natural,
and if we don't have this ability it may impact our ability
to extend the core in the long run.
I was against making core functions void in the first place.
It's a good opportunity to change back.
Keller, Jacob E Dec. 19, 2022, 10:14 p.m. UTC | #3
On 12/19/2022 2:02 PM, Jakub Kicinski wrote:
> On Mon, 19 Dec 2022 09:56:26 -0800 Jacob Keller wrote:
>>> -void devlink_register(struct devlink *devlink)
>>> +int devl_register(struct devlink *devlink)
>>>  {
>>>  	ASSERT_DEVLINK_NOT_REGISTERED(devlink);
>>> -	/* Make sure that we are in .probe() routine */
>>> +	devl_assert_locked(devlink);
>>>  
>>>  	xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
>>>  	devlink_notify_register(devlink);
>>> +
>>> +	return 0;  
>>
>> Any particular reason to change this to int when it doesn't have a
>> failure case yet? Future patches I assume? You don't check the
>> devl_register return value.
> 
> I was wondering if anyone would notice :)
> 

I'm fine with it, but I would expect that devlink_register would want to
report it at least?

> Returning errors from the registration helper seems natural,
> and if we don't have this ability it may impact our ability
> to extend the core in the long run.
> I was against making core functions void in the first place.
> It's a good opportunity to change back.

Sure. I think its better to be able to report an error but wanted to
make sure its actually caught or at least logged if it occurs.

We can ofcourse change the function templates again since we don't
really guarantee API stability across versions, but it is more work for
backporting in the future.
Jakub Kicinski Dec. 19, 2022, 10:31 p.m. UTC | #4
On Mon, 19 Dec 2022 14:14:18 -0800 Jacob Keller wrote:
> On 12/19/2022 2:02 PM, Jakub Kicinski wrote:
> > I was wondering if anyone would notice :)
> 
> I'm fine with it, but I would expect that devlink_register would want to
> report it at least?

New code should not use devlink_* functions, so probably not worth it.

> > Returning errors from the registration helper seems natural,
> > and if we don't have this ability it may impact our ability
> > to extend the core in the long run.
> > I was against making core functions void in the first place.
> > It's a good opportunity to change back.  
> 
> Sure. I think its better to be able to report an error but wanted to
> make sure its actually caught or at least logged if it occurs.
> 
> We can ofcourse change the function templates again since we don't
> really guarantee API stability across versions, but it is more work for
> backporting in the future.
Jiri Pirko Jan. 2, 2023, 2:18 p.m. UTC | #5
Mon, Dec 19, 2022 at 11:02:10PM CET, kuba@kernel.org wrote:
>On Mon, 19 Dec 2022 09:56:26 -0800 Jacob Keller wrote:
>> > -void devlink_register(struct devlink *devlink)
>> > +int devl_register(struct devlink *devlink)
>> >  {
>> >  	ASSERT_DEVLINK_NOT_REGISTERED(devlink);
>> > -	/* Make sure that we are in .probe() routine */
>> > +	devl_assert_locked(devlink);
>> >  
>> >  	xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
>> >  	devlink_notify_register(devlink);
>> > +
>> > +	return 0;  
>> 
>> Any particular reason to change this to int when it doesn't have a
>> failure case yet? Future patches I assume? You don't check the
>> devl_register return value.
>
>I was wondering if anyone would notice :)
>
>Returning errors from the registration helper seems natural,
>and if we don't have this ability it may impact our ability
>to extend the core in the long run.
>I was against making core functions void in the first place.
>It's a good opportunity to change back.

devlink_register originally returned int. Leon changed that as part of
his work. I believe I expressed my negative feelings about that back
then. Sigh.
Jiri Pirko Jan. 2, 2023, 2:32 p.m. UTC | #6
Sat, Dec 17, 2022 at 02:19:48AM CET, kuba@kernel.org wrote:
>

[...]


>Holding a reference will now only guarantee that the memory
>of the object is around. Another way of looking at it is that
>the reference now protects the object not its "registered" status.

This would help to understand what you are doing in patch 3. Perphaps it
would be fine to describe the goal in the cover letter?
Jakub Kicinski Jan. 2, 2023, 11:18 p.m. UTC | #7
On Mon, 2 Jan 2023 15:32:04 +0100 Jiri Pirko wrote:
> >Holding a reference will now only guarantee that the memory
> >of the object is around. Another way of looking at it is that
> >the reference now protects the object not its "registered" status.  
> 
> This would help to understand what you are doing in patch 3. Perphaps it
> would be fine to describe the goal in the cover letter?

Fair point, will do!
diff mbox series

Patch

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 36e013d3aa52..cc910612b3f4 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1648,6 +1648,8 @@  static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
 	return devlink_alloc_ns(ops, priv_size, &init_net, dev);
 }
 void devlink_set_features(struct devlink *devlink, u64 features);
+int devl_register(struct devlink *devlink);
+void devl_unregister(struct devlink *devlink);
 void devlink_register(struct devlink *devlink);
 void devlink_unregister(struct devlink *devlink);
 void devlink_free(struct devlink *devlink);
diff --git a/net/devlink/core.c b/net/devlink/core.c
index 2abad8247597..413b92534ad6 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -89,21 +89,10 @@  struct devlink *__must_check devlink_try_get(struct devlink *devlink)
 	return NULL;
 }
 
-static void __devlink_put_rcu(struct rcu_head *head)
-{
-	struct devlink *devlink = container_of(head, struct devlink, rcu);
-
-	complete(&devlink->comp);
-}
-
 void devlink_put(struct devlink *devlink)
 {
 	if (refcount_dec_and_test(&devlink->refcount))
-		/* Make sure unregister operation that may await the completion
-		 * is unblocked only after all users are after the end of
-		 * RCU grace period.
-		 */
-		call_rcu(&devlink->rcu, __devlink_put_rcu);
+		kfree_rcu(devlink, rcu);
 }
 
 struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp)
@@ -116,13 +105,6 @@  struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp)
 	if (!devlink)
 		goto unlock;
 
-	/* In case devlink_unregister() was already called and "unregistering"
-	 * mark was set, do not allow to get a devlink reference here.
-	 * This prevents live-lock of devlink_unregister() wait for completion.
-	 */
-	if (xa_get_mark(&devlinks, *indexp, DEVLINK_UNREGISTERING))
-		goto next;
-
 	if (!devlink_try_get(devlink))
 		goto next;
 	if (!net_eq(devlink_net(devlink), net)) {
@@ -158,37 +140,48 @@  void devlink_set_features(struct devlink *devlink, u64 features)
 EXPORT_SYMBOL_GPL(devlink_set_features);
 
 /**
- *	devlink_register - Register devlink instance
- *
- *	@devlink: devlink
+ * devl_register - Register devlink instance
+ * @devlink: devlink
  */
-void devlink_register(struct devlink *devlink)
+int devl_register(struct devlink *devlink)
 {
 	ASSERT_DEVLINK_NOT_REGISTERED(devlink);
-	/* Make sure that we are in .probe() routine */
+	devl_assert_locked(devlink);
 
 	xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
 	devlink_notify_register(devlink);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devl_register);
+
+void devlink_register(struct devlink *devlink)
+{
+	devl_lock(devlink);
+	devl_register(devlink);
+	devl_unlock(devlink);
 }
 EXPORT_SYMBOL_GPL(devlink_register);
 
 /**
- *	devlink_unregister - Unregister devlink instance
- *
- *	@devlink: devlink
+ * devl_unregister - Unregister devlink instance
+ * @devlink: devlink
  */
-void devlink_unregister(struct devlink *devlink)
+void devl_unregister(struct devlink *devlink)
 {
 	ASSERT_DEVLINK_REGISTERED(devlink);
-	/* Make sure that we are in .remove() routine */
-
-	xa_set_mark(&devlinks, devlink->index, DEVLINK_UNREGISTERING);
-	devlink_put(devlink);
-	wait_for_completion(&devlink->comp);
+	devl_assert_locked(devlink);
 
 	devlink_notify_unregister(devlink);
 	xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
-	xa_clear_mark(&devlinks, devlink->index, DEVLINK_UNREGISTERING);
+}
+EXPORT_SYMBOL_GPL(devl_unregister);
+
+void devlink_unregister(struct devlink *devlink)
+{
+	devl_lock(devlink);
+	devl_unregister(devlink);
+	devl_unlock(devlink);
 }
 EXPORT_SYMBOL_GPL(devlink_unregister);
 
@@ -252,7 +245,6 @@  struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
 	mutex_init(&devlink->reporters_lock);
 	mutex_init(&devlink->linecards_lock);
 	refcount_set(&devlink->refcount, 1);
-	init_completion(&devlink->comp);
 
 	return devlink;
 
@@ -298,7 +290,7 @@  void devlink_free(struct devlink *devlink)
 
 	xa_erase(&devlinks, devlink->index);
 
-	kfree(devlink);
+	devlink_put(devlink);
 }
 EXPORT_SYMBOL_GPL(devlink_free);
 
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index c3977c69552a..7e77eebde3b9 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -12,7 +12,6 @@ 
 #include <net/net_namespace.h>
 
 #define DEVLINK_REGISTERED XA_MARK_1
-#define DEVLINK_UNREGISTERING XA_MARK_2
 
 #define DEVLINK_RELOAD_STATS_ARRAY_SIZE \
 	(__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX)
@@ -52,7 +51,6 @@  struct devlink {
 	struct lock_class_key lock_key;
 	u8 reload_failed:1;
 	refcount_t refcount;
-	struct completion comp;
 	struct rcu_head rcu;
 	struct notifier_block netdevice_nb;
 	char priv[] __aligned(NETDEV_ALIGN);