diff mbox series

[RFC,v2] Convert struct pid count to refcount_t

Message ID 20190624184534.209896-1-joel@joelfernandes.org (mailing list archive)
State New, archived
Headers show
Series [RFC,v2] Convert struct pid count to refcount_t | expand

Commit Message

Joel Fernandes June 24, 2019, 6:45 p.m. UTC
struct pid's count is an atomic_t field used as a refcount. Use
refcount_t for it which is basically atomic_t but does additional
checking to prevent use-after-free bugs.

For memory ordering, the only change is with the following:
 -	if ((atomic_read(&pid->count) == 1) ||
 -	     atomic_dec_and_test(&pid->count)) {
 +	if (refcount_dec_and_test(&pid->count)) {
 		kmem_cache_free(ns->pid_cachep, pid);

Here the change is from:
Fully ordered --> RELEASE + ACQUIRE (as per refcount-vs-atomic.rst)
This ACQUIRE should take care of making sure the free happens after the
refcount_dec_and_test().

The above hunk also removes atomic_read() since it is not needed for the
code to work and it is unclear how beneficial it is. The removal lets
refcount_dec_and_test() check for cases where get_pid() happened before
the object was freed.

Cc: jannh@google.com
Cc: oleg@redhat.com
Cc: mathieu.desnoyers@efficios.com
Cc: willy@infradead.org
Cc: peterz@infradead.org
Cc: will.deacon@arm.com
Cc: paulmck@linux.vnet.ibm.com
Cc: elena.reshetova@intel.com
Cc: keescook@chromium.org
Cc: kernel-team@android.com
Cc: kernel-hardening@lists.openwall.com
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>

---
Changed to RFC to get any feedback on the memory ordering.


 include/linux/pid.h | 5 +++--
 kernel/pid.c        | 7 +++----
 2 files changed, 6 insertions(+), 6 deletions(-)

Comments

Joel Fernandes June 24, 2019, 6:52 p.m. UTC | #1
On Mon, Jun 24, 2019 at 02:45:34PM -0400, Joel Fernandes (Google) wrote:
> struct pid's count is an atomic_t field used as a refcount. Use
> refcount_t for it which is basically atomic_t but does additional
> checking to prevent use-after-free bugs.
> 
> For memory ordering, the only change is with the following:
>  -	if ((atomic_read(&pid->count) == 1) ||
>  -	     atomic_dec_and_test(&pid->count)) {
>  +	if (refcount_dec_and_test(&pid->count)) {
>  		kmem_cache_free(ns->pid_cachep, pid);
> 
> Here the change is from:
> Fully ordered --> RELEASE + ACQUIRE (as per refcount-vs-atomic.rst)
> This ACQUIRE should take care of making sure the free happens after the
> refcount_dec_and_test().
> 
> The above hunk also removes atomic_read() since it is not needed for the
> code to work and it is unclear how beneficial it is. The removal lets
> refcount_dec_and_test() check for cases where get_pid() happened before
> the object was freed.
> 
> Cc: jannh@google.com
> Cc: oleg@redhat.com
> Cc: mathieu.desnoyers@efficios.com
> Cc: willy@infradead.org
> Cc: peterz@infradead.org
> Cc: will.deacon@arm.com
> Cc: paulmck@linux.vnet.ibm.com
> Cc: elena.reshetova@intel.com
> Cc: keescook@chromium.org
> Cc: kernel-team@android.com
> Cc: kernel-hardening@lists.openwall.com
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> 
> ---
> Changed to RFC to get any feedback on the memory ordering.

I had a question about refcount_inc().

As per Documentation/core-api/refcount-vs-atomic.rst , it says:

A control dependency (on success) for refcounters guarantees that
if a reference for an object was successfully obtained (reference
counter increment or addition happened, function returned true),
then further stores are ordered against this operation.

However, in refcount_inc() I don't see any memory barriers (in the case where
CONFIG_REFCOUNT_FULL=n). Is the documentation wrong?

get_pid() does a refcount_inc() but doesn't have any memory barriers either.

thanks,

 - Joel


> 
>  include/linux/pid.h | 5 +++--
>  kernel/pid.c        | 7 +++----
>  2 files changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/pid.h b/include/linux/pid.h
> index 14a9a39da9c7..8cb86d377ff5 100644
> --- a/include/linux/pid.h
> +++ b/include/linux/pid.h
> @@ -3,6 +3,7 @@
>  #define _LINUX_PID_H
>  
>  #include <linux/rculist.h>
> +#include <linux/refcount.h>
>  
>  enum pid_type
>  {
> @@ -56,7 +57,7 @@ struct upid {
>  
>  struct pid
>  {
> -	atomic_t count;
> +	refcount_t count;
>  	unsigned int level;
>  	/* lists of tasks that use this pid */
>  	struct hlist_head tasks[PIDTYPE_MAX];
> @@ -69,7 +70,7 @@ extern struct pid init_struct_pid;
>  static inline struct pid *get_pid(struct pid *pid)
>  {
>  	if (pid)
> -		atomic_inc(&pid->count);
> +		refcount_inc(&pid->count);
>  	return pid;
>  }
>  
> diff --git a/kernel/pid.c b/kernel/pid.c
> index 20881598bdfa..89c4849fab5d 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -37,7 +37,7 @@
>  #include <linux/init_task.h>
>  #include <linux/syscalls.h>
>  #include <linux/proc_ns.h>
> -#include <linux/proc_fs.h>
> +#include <linux/refcount.h>
>  #include <linux/sched/task.h>
>  #include <linux/idr.h>
>  
> @@ -106,8 +106,7 @@ void put_pid(struct pid *pid)
>  		return;
>  
>  	ns = pid->numbers[pid->level].ns;
> -	if ((atomic_read(&pid->count) == 1) ||
> -	     atomic_dec_and_test(&pid->count)) {
> +	if (refcount_dec_and_test(&pid->count)) {
>  		kmem_cache_free(ns->pid_cachep, pid);
>  		put_pid_ns(ns);
>  	}
> @@ -210,7 +209,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
>  	}
>  
>  	get_pid_ns(ns);
> -	atomic_set(&pid->count, 1);
> +	refcount_set(&pid->count, 1);
>  	for (type = 0; type < PIDTYPE_MAX; ++type)
>  		INIT_HLIST_HEAD(&pid->tasks[type]);
>  
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
Jann Horn June 24, 2019, 7:10 p.m. UTC | #2
On Mon, Jun 24, 2019 at 8:52 PM Joel Fernandes <joel@joelfernandes.org> wrote:
> On Mon, Jun 24, 2019 at 02:45:34PM -0400, Joel Fernandes (Google) wrote:
> > struct pid's count is an atomic_t field used as a refcount. Use
> > refcount_t for it which is basically atomic_t but does additional
> > checking to prevent use-after-free bugs.
> >
> > For memory ordering, the only change is with the following:
> >  -    if ((atomic_read(&pid->count) == 1) ||
> >  -         atomic_dec_and_test(&pid->count)) {
> >  +    if (refcount_dec_and_test(&pid->count)) {
> >               kmem_cache_free(ns->pid_cachep, pid);
> >
> > Here the change is from:
> > Fully ordered --> RELEASE + ACQUIRE (as per refcount-vs-atomic.rst)
> > This ACQUIRE should take care of making sure the free happens after the
> > refcount_dec_and_test().
> >
> > The above hunk also removes atomic_read() since it is not needed for the
> > code to work and it is unclear how beneficial it is. The removal lets
> > refcount_dec_and_test() check for cases where get_pid() happened before
> > the object was freed.
[...]
> I had a question about refcount_inc().
>
> As per Documentation/core-api/refcount-vs-atomic.rst , it says:
>
> A control dependency (on success) for refcounters guarantees that
> if a reference for an object was successfully obtained (reference
> counter increment or addition happened, function returned true),
> then further stores are ordered against this operation.
>
> However, in refcount_inc() I don't see any memory barriers (in the case where
> CONFIG_REFCOUNT_FULL=n). Is the documentation wrong?

That part of the documentation only talks about cases where you have a
control dependency on the return value of the refcount operation. But
refcount_inc() does not return a value, so this isn't relevant for
refcount_inc().

Also, AFAIU, the control dependency mentioned in the documentation has
to exist *in the caller* - it's just pointing out that if you write
code like the following, you have a control dependency between the
refcount operation and the write:

    if (refcount_inc_not_zero(&obj->refcount)) {
      WRITE_ONCE(obj->x, y);
    }

For more information on the details of this stuff, try reading the
section "CONTROL DEPENDENCIES" of Documentation/memory-barriers.txt.
Joel Fernandes June 24, 2019, 8:43 p.m. UTC | #3
On Mon, Jun 24, 2019 at 3:10 PM Jann Horn <jannh@google.com> wrote:
>
> On Mon, Jun 24, 2019 at 8:52 PM Joel Fernandes <joel@joelfernandes.org> wrote:
> > On Mon, Jun 24, 2019 at 02:45:34PM -0400, Joel Fernandes (Google) wrote:
> > > struct pid's count is an atomic_t field used as a refcount. Use
> > > refcount_t for it which is basically atomic_t but does additional
> > > checking to prevent use-after-free bugs.
> > >
> > > For memory ordering, the only change is with the following:
> > >  -    if ((atomic_read(&pid->count) == 1) ||
> > >  -         atomic_dec_and_test(&pid->count)) {
> > >  +    if (refcount_dec_and_test(&pid->count)) {
> > >               kmem_cache_free(ns->pid_cachep, pid);
> > >
> > > Here the change is from:
> > > Fully ordered --> RELEASE + ACQUIRE (as per refcount-vs-atomic.rst)
> > > This ACQUIRE should take care of making sure the free happens after the
> > > refcount_dec_and_test().
> > >
> > > The above hunk also removes atomic_read() since it is not needed for the
> > > code to work and it is unclear how beneficial it is. The removal lets
> > > refcount_dec_and_test() check for cases where get_pid() happened before
> > > the object was freed.
> [...]
> > I had a question about refcount_inc().
> >
> > As per Documentation/core-api/refcount-vs-atomic.rst , it says:
> >
> > A control dependency (on success) for refcounters guarantees that
> > if a reference for an object was successfully obtained (reference
> > counter increment or addition happened, function returned true),
> > then further stores are ordered against this operation.
> >
> > However, in refcount_inc() I don't see any memory barriers (in the case where
> > CONFIG_REFCOUNT_FULL=n). Is the documentation wrong?
>
> That part of the documentation only talks about cases where you have a
> control dependency on the return value of the refcount operation. But
> refcount_inc() does not return a value, so this isn't relevant for
> refcount_inc().
>
> Also, AFAIU, the control dependency mentioned in the documentation has
> to exist *in the caller* - it's just pointing out that if you write
> code like the following, you have a control dependency between the
> refcount operation and the write:
>
>     if (refcount_inc_not_zero(&obj->refcount)) {
>       WRITE_ONCE(obj->x, y);
>     }
>
> For more information on the details of this stuff, try reading the
> section "CONTROL DEPENDENCIES" of Documentation/memory-barriers.txt.

Makes sense now, thank you Jann!

 - Joel
Peter Zijlstra June 25, 2019, 7:34 a.m. UTC | #4
On Mon, Jun 24, 2019 at 09:10:15PM +0200, Jann Horn wrote:
> That part of the documentation only talks about cases where you have a
> control dependency on the return value of the refcount operation. But
> refcount_inc() does not return a value, so this isn't relevant for
> refcount_inc().
> 
> Also, AFAIU, the control dependency mentioned in the documentation has
> to exist *in the caller* - it's just pointing out that if you write
> code like the following, you have a control dependency between the
> refcount operation and the write:
> 
>     if (refcount_inc_not_zero(&obj->refcount)) {
>       WRITE_ONCE(obj->x, y);
>     }
> 
> For more information on the details of this stuff, try reading the
> section "CONTROL DEPENDENCIES" of Documentation/memory-barriers.txt.

IIRC the argument went as follows:

 - if you use refcount_inc(), you've already got a stable object and
   have ACQUIRED it otherwise, typically through locking.

 - if you use refcount_inc_not_zero(), you have a semi stable object
   (RCU), but you still need to ensure any changes to the object happen
   after acquiring a reference, and this is where the control dependency
   comes in as Jann already explained.

Specifically, it would be bad to allow STOREs to happen before we know
the refcount isn't 0, as that would be a UaF.

Also see the comment in lib/refcount.c.
Joel Fernandes June 26, 2019, 9:50 p.m. UTC | #5
On Tue, Jun 25, 2019 at 09:34:07AM +0200, Peter Zijlstra wrote:
> On Mon, Jun 24, 2019 at 09:10:15PM +0200, Jann Horn wrote:
> > That part of the documentation only talks about cases where you have a
> > control dependency on the return value of the refcount operation. But
> > refcount_inc() does not return a value, so this isn't relevant for
> > refcount_inc().
> > 
> > Also, AFAIU, the control dependency mentioned in the documentation has
> > to exist *in the caller* - it's just pointing out that if you write
> > code like the following, you have a control dependency between the
> > refcount operation and the write:
> > 
> >     if (refcount_inc_not_zero(&obj->refcount)) {
> >       WRITE_ONCE(obj->x, y);
> >     }
> > 
> > For more information on the details of this stuff, try reading the
> > section "CONTROL DEPENDENCIES" of Documentation/memory-barriers.txt.
> 
> IIRC the argument went as follows:
> 
>  - if you use refcount_inc(), you've already got a stable object and
>    have ACQUIRED it otherwise, typically through locking.
> 
>  - if you use refcount_inc_not_zero(), you have a semi stable object
>    (RCU), but you still need to ensure any changes to the object happen
>    after acquiring a reference, and this is where the control dependency
>    comes in as Jann already explained.
> 
> Specifically, it would be bad to allow STOREs to happen before we know
> the refcount isn't 0, as that would be a UaF.
> 
> Also see the comment in lib/refcount.c.
> 

Thanks a lot for the explanations and the pointers to the comment in
lib/refcount.c . It makes it really clearly.

Also, does this patch look good to you? If so and if ok with you, could you
Ack it? The patch is not really "RFC" but I still tagged it as such since I
wanted to have this discussion.

Thanks!

- Joel
Andrea Parri June 29, 2019, 2:30 p.m. UTC | #6
On Mon, Jun 24, 2019 at 02:45:34PM -0400, Joel Fernandes (Google) wrote:
> struct pid's count is an atomic_t field used as a refcount. Use
> refcount_t for it which is basically atomic_t but does additional
> checking to prevent use-after-free bugs.
> 
> For memory ordering, the only change is with the following:
>  -	if ((atomic_read(&pid->count) == 1) ||
>  -	     atomic_dec_and_test(&pid->count)) {
>  +	if (refcount_dec_and_test(&pid->count)) {
>  		kmem_cache_free(ns->pid_cachep, pid);
> 
> Here the change is from:
> Fully ordered --> RELEASE + ACQUIRE (as per refcount-vs-atomic.rst)
> This ACQUIRE should take care of making sure the free happens after the
> refcount_dec_and_test().
> 
> The above hunk also removes atomic_read() since it is not needed for the
> code to work and it is unclear how beneficial it is. The removal lets
> refcount_dec_and_test() check for cases where get_pid() happened before
> the object was freed.
> 
> Cc: jannh@google.com
> Cc: oleg@redhat.com
> Cc: mathieu.desnoyers@efficios.com
> Cc: willy@infradead.org
> Cc: peterz@infradead.org
> Cc: will.deacon@arm.com
> Cc: paulmck@linux.vnet.ibm.com
> Cc: elena.reshetova@intel.com
> Cc: keescook@chromium.org
> Cc: kernel-team@android.com
> Cc: kernel-hardening@lists.openwall.com
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>

As always with these matters, it's quite possible that I'm missing
something subtle here; said this, ;-)  the patch does look good to
me: FWIW,

Reviewed-by: Andrea Parri <andrea.parri@amarulasolutions.com>

Thanks,
  Andrea


> 
> ---
> Changed to RFC to get any feedback on the memory ordering.
> 
> 
>  include/linux/pid.h | 5 +++--
>  kernel/pid.c        | 7 +++----
>  2 files changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/pid.h b/include/linux/pid.h
> index 14a9a39da9c7..8cb86d377ff5 100644
> --- a/include/linux/pid.h
> +++ b/include/linux/pid.h
> @@ -3,6 +3,7 @@
>  #define _LINUX_PID_H
>  
>  #include <linux/rculist.h>
> +#include <linux/refcount.h>
>  
>  enum pid_type
>  {
> @@ -56,7 +57,7 @@ struct upid {
>  
>  struct pid
>  {
> -	atomic_t count;
> +	refcount_t count;
>  	unsigned int level;
>  	/* lists of tasks that use this pid */
>  	struct hlist_head tasks[PIDTYPE_MAX];
> @@ -69,7 +70,7 @@ extern struct pid init_struct_pid;
>  static inline struct pid *get_pid(struct pid *pid)
>  {
>  	if (pid)
> -		atomic_inc(&pid->count);
> +		refcount_inc(&pid->count);
>  	return pid;
>  }
>  
> diff --git a/kernel/pid.c b/kernel/pid.c
> index 20881598bdfa..89c4849fab5d 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -37,7 +37,7 @@
>  #include <linux/init_task.h>
>  #include <linux/syscalls.h>
>  #include <linux/proc_ns.h>
> -#include <linux/proc_fs.h>
> +#include <linux/refcount.h>
>  #include <linux/sched/task.h>
>  #include <linux/idr.h>
>  
> @@ -106,8 +106,7 @@ void put_pid(struct pid *pid)
>  		return;
>  
>  	ns = pid->numbers[pid->level].ns;
> -	if ((atomic_read(&pid->count) == 1) ||
> -	     atomic_dec_and_test(&pid->count)) {
> +	if (refcount_dec_and_test(&pid->count)) {
>  		kmem_cache_free(ns->pid_cachep, pid);
>  		put_pid_ns(ns);
>  	}
> @@ -210,7 +209,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
>  	}
>  
>  	get_pid_ns(ns);
> -	atomic_set(&pid->count, 1);
> +	refcount_set(&pid->count, 1);
>  	for (type = 0; type < PIDTYPE_MAX; ++type)
>  		INIT_HLIST_HEAD(&pid->tasks[type]);
>  
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
diff mbox series

Patch

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 14a9a39da9c7..8cb86d377ff5 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -3,6 +3,7 @@ 
 #define _LINUX_PID_H
 
 #include <linux/rculist.h>
+#include <linux/refcount.h>
 
 enum pid_type
 {
@@ -56,7 +57,7 @@  struct upid {
 
 struct pid
 {
-	atomic_t count;
+	refcount_t count;
 	unsigned int level;
 	/* lists of tasks that use this pid */
 	struct hlist_head tasks[PIDTYPE_MAX];
@@ -69,7 +70,7 @@  extern struct pid init_struct_pid;
 static inline struct pid *get_pid(struct pid *pid)
 {
 	if (pid)
-		atomic_inc(&pid->count);
+		refcount_inc(&pid->count);
 	return pid;
 }
 
diff --git a/kernel/pid.c b/kernel/pid.c
index 20881598bdfa..89c4849fab5d 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -37,7 +37,7 @@ 
 #include <linux/init_task.h>
 #include <linux/syscalls.h>
 #include <linux/proc_ns.h>
-#include <linux/proc_fs.h>
+#include <linux/refcount.h>
 #include <linux/sched/task.h>
 #include <linux/idr.h>
 
@@ -106,8 +106,7 @@  void put_pid(struct pid *pid)
 		return;
 
 	ns = pid->numbers[pid->level].ns;
-	if ((atomic_read(&pid->count) == 1) ||
-	     atomic_dec_and_test(&pid->count)) {
+	if (refcount_dec_and_test(&pid->count)) {
 		kmem_cache_free(ns->pid_cachep, pid);
 		put_pid_ns(ns);
 	}
@@ -210,7 +209,7 @@  struct pid *alloc_pid(struct pid_namespace *ns)
 	}
 
 	get_pid_ns(ns);
-	atomic_set(&pid->count, 1);
+	refcount_set(&pid->count, 1);
 	for (type = 0; type < PIDTYPE_MAX; ++type)
 		INIT_HLIST_HEAD(&pid->tasks[type]);