diff mbox series

[v2,2/5] exec: Factor unshare_sighand out of de_thread and call it separately

Message ID 87k13u5y26.fsf_-_@x220.int.ebiederm.org (mailing list archive)
State New, archived
Headers show
Series Infrastructure to allow fixing exec deadlocks | expand

Commit Message

Eric W. Biederman March 8, 2020, 9:36 p.m. UTC
This makes the code clearer and makes it easier to implement a mutex
that is not taken over any locations that may block indefinitely waiting
for userspace.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/exec.c | 39 ++++++++++++++++++++++++++-------------
 1 file changed, 26 insertions(+), 13 deletions(-)

Comments

Bernd Edlinger March 9, 2020, 7:28 p.m. UTC | #1
On 3/8/20 10:36 PM, Eric W. Biederman wrote:
> 
> This makes the code clearer and makes it easier to implement a mutex
> that is not taken over any locations that may block indefinitely waiting
> for userspace.
> 
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>

Reviewed-by: Bernd Edlinger <bernd.edlinger@hotmail.de>


Bernd.
> ---
>  fs/exec.c | 39 ++++++++++++++++++++++++++-------------
>  1 file changed, 26 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/exec.c b/fs/exec.c
> index c3f34791f2f0..ff74b9a74d34 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1194,6 +1194,23 @@ static int de_thread(struct task_struct *tsk)
>  	flush_itimer_signals();
>  #endif
>  
> +	BUG_ON(!thread_group_leader(tsk));
> +	return 0;
> +
> +killed:
> +	/* protects against exit_notify() and __exit_signal() */
> +	read_lock(&tasklist_lock);
> +	sig->group_exit_task = NULL;
> +	sig->notify_count = 0;
> +	read_unlock(&tasklist_lock);
> +	return -EAGAIN;
> +}
> +
> +
> +static int unshare_sighand(struct task_struct *me)
> +{
> +	struct sighand_struct *oldsighand = me->sighand;
> +
>  	if (refcount_read(&oldsighand->count) != 1) {
>  		struct sighand_struct *newsighand;
>  		/*
> @@ -1210,23 +1227,13 @@ static int de_thread(struct task_struct *tsk)
>  
>  		write_lock_irq(&tasklist_lock);
>  		spin_lock(&oldsighand->siglock);
> -		rcu_assign_pointer(tsk->sighand, newsighand);
> +		rcu_assign_pointer(me->sighand, newsighand);
>  		spin_unlock(&oldsighand->siglock);
>  		write_unlock_irq(&tasklist_lock);
>  
>  		__cleanup_sighand(oldsighand);
>  	}
> -
> -	BUG_ON(!thread_group_leader(tsk));
>  	return 0;
> -
> -killed:
> -	/* protects against exit_notify() and __exit_signal() */
> -	read_lock(&tasklist_lock);
> -	sig->group_exit_task = NULL;
> -	sig->notify_count = 0;
> -	read_unlock(&tasklist_lock);
> -	return -EAGAIN;
>  }
>  
>  char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
> @@ -1264,13 +1271,19 @@ int flush_old_exec(struct linux_binprm * bprm)
>  	int retval;
>  
>  	/*
> -	 * Make sure we have a private signal table and that
> -	 * we are unassociated from the previous thread group.
> +	 * Make this the only thread in the thread group.
>  	 */
>  	retval = de_thread(me);
>  	if (retval)
>  		goto out;
>  
> +	/*
> +	 * Make the signal table private.
> +	 */
> +	retval = unshare_sighand(me);
> +	if (retval)
> +		goto out;
> +
>  	/*
>  	 * Must be called _before_ exec_mmap() as bprm->mm is
>  	 * not visibile until then. This also enables the update
>
Kees Cook March 10, 2020, 8:29 p.m. UTC | #2
On Sun, Mar 08, 2020 at 04:36:17PM -0500, Eric W. Biederman wrote:
> 
> This makes the code clearer and makes it easier to implement a mutex
> that is not taken over any locations that may block indefinitely waiting
> for userspace.
> 
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
> ---
>  fs/exec.c | 39 ++++++++++++++++++++++++++-------------
>  1 file changed, 26 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/exec.c b/fs/exec.c
> index c3f34791f2f0..ff74b9a74d34 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1194,6 +1194,23 @@ static int de_thread(struct task_struct *tsk)
>  	flush_itimer_signals();
>  #endif

Semi-related (existing behavior): in de_thread(), what keeps the thread
group from changing? i.e.:

        if (thread_group_empty(tsk))
                goto no_thread_group;

        /*
         * Kill all other threads in the thread group.
         */
        spin_lock_irq(lock);
	... kill other threads under lock ...

Why is the thread_group_emtpy() test not under lock?

>  
> +	BUG_ON(!thread_group_leader(tsk));
> +	return 0;
> +
> +killed:
> +	/* protects against exit_notify() and __exit_signal() */

I wonder if include/linux/sched/task.h's definition of tasklist_lock
should explicitly gain note about group_exit_task and notify_count,
or, alternatively, signal.h's section on these fields should gain a
comment? tasklist_lock is unmentioned in signal.h... :(

> +	read_lock(&tasklist_lock);
> +	sig->group_exit_task = NULL;
> +	sig->notify_count = 0;
> +	read_unlock(&tasklist_lock);
> +	return -EAGAIN;
> +}
> +
> +
> +static int unshare_sighand(struct task_struct *me)
> +{
> +	struct sighand_struct *oldsighand = me->sighand;
> +
>  	if (refcount_read(&oldsighand->count) != 1) {
>  		struct sighand_struct *newsighand;
>  		/*
> @@ -1210,23 +1227,13 @@ static int de_thread(struct task_struct *tsk)
>  
>  		write_lock_irq(&tasklist_lock);
>  		spin_lock(&oldsighand->siglock);
> -		rcu_assign_pointer(tsk->sighand, newsighand);
> +		rcu_assign_pointer(me->sighand, newsighand);
>  		spin_unlock(&oldsighand->siglock);
>  		write_unlock_irq(&tasklist_lock);
>  
>  		__cleanup_sighand(oldsighand);
>  	}
> -
> -	BUG_ON(!thread_group_leader(tsk));
>  	return 0;
> -
> -killed:
> -	/* protects against exit_notify() and __exit_signal() */
> -	read_lock(&tasklist_lock);
> -	sig->group_exit_task = NULL;
> -	sig->notify_count = 0;
> -	read_unlock(&tasklist_lock);
> -	return -EAGAIN;
>  }
>  
>  char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
> @@ -1264,13 +1271,19 @@ int flush_old_exec(struct linux_binprm * bprm)
>  	int retval;
>  
>  	/*
> -	 * Make sure we have a private signal table and that
> -	 * we are unassociated from the previous thread group.
> +	 * Make this the only thread in the thread group.
>  	 */
>  	retval = de_thread(me);
>  	if (retval)
>  		goto out;
>  
> +	/*
> +	 * Make the signal table private.
> +	 */
> +	retval = unshare_sighand(me);
> +	if (retval)
> +		goto out;
> +
>  	/*
>  	 * Must be called _before_ exec_mmap() as bprm->mm is
>  	 * not visibile until then. This also enables the update
> -- 
> 2.25.0

Otherwise, yes, sensible separation.

Reviewed-by: Kees Cook <keescook@chromium.org>
Bernd Edlinger March 10, 2020, 8:34 p.m. UTC | #3
On 3/10/20 9:29 PM, Kees Cook wrote:
> On Sun, Mar 08, 2020 at 04:36:17PM -0500, Eric W. Biederman wrote:
>>
>> This makes the code clearer and makes it easier to implement a mutex
>> that is not taken over any locations that may block indefinitely waiting
>> for userspace.
>>
>> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
>> ---
>>  fs/exec.c | 39 ++++++++++++++++++++++++++-------------
>>  1 file changed, 26 insertions(+), 13 deletions(-)
>>
>> diff --git a/fs/exec.c b/fs/exec.c
>> index c3f34791f2f0..ff74b9a74d34 100644
>> --- a/fs/exec.c
>> +++ b/fs/exec.c
>> @@ -1194,6 +1194,23 @@ static int de_thread(struct task_struct *tsk)
>>  	flush_itimer_signals();
>>  #endif
> 
> Semi-related (existing behavior): in de_thread(), what keeps the thread
> group from changing? i.e.:
> 
>         if (thread_group_empty(tsk))
>                 goto no_thread_group;
> 
>         /*
>          * Kill all other threads in the thread group.
>          */
>         spin_lock_irq(lock);
> 	... kill other threads under lock ...
> 
> Why is the thread_group_emtpy() test not under lock?
> 

A new thread cannot created when only one thread is executing,
right?

>>  
>> +	BUG_ON(!thread_group_leader(tsk));
>> +	return 0;
>> +
>> +killed:
>> +	/* protects against exit_notify() and __exit_signal() */
> 
> I wonder if include/linux/sched/task.h's definition of tasklist_lock
> should explicitly gain note about group_exit_task and notify_count,
> or, alternatively, signal.h's section on these fields should gain a
> comment? tasklist_lock is unmentioned in signal.h... :(
> 
>> +	read_lock(&tasklist_lock);
>> +	sig->group_exit_task = NULL;
>> +	sig->notify_count = 0;
>> +	read_unlock(&tasklist_lock);
>> +	return -EAGAIN;
>> +}
>> +
>> +
>> +static int unshare_sighand(struct task_struct *me)
>> +{
>> +	struct sighand_struct *oldsighand = me->sighand;
>> +
>>  	if (refcount_read(&oldsighand->count) != 1) {
>>  		struct sighand_struct *newsighand;
>>  		/*
>> @@ -1210,23 +1227,13 @@ static int de_thread(struct task_struct *tsk)
>>  
>>  		write_lock_irq(&tasklist_lock);
>>  		spin_lock(&oldsighand->siglock);
>> -		rcu_assign_pointer(tsk->sighand, newsighand);
>> +		rcu_assign_pointer(me->sighand, newsighand);
>>  		spin_unlock(&oldsighand->siglock);
>>  		write_unlock_irq(&tasklist_lock);
>>  
>>  		__cleanup_sighand(oldsighand);
>>  	}
>> -
>> -	BUG_ON(!thread_group_leader(tsk));
>>  	return 0;
>> -
>> -killed:
>> -	/* protects against exit_notify() and __exit_signal() */
>> -	read_lock(&tasklist_lock);
>> -	sig->group_exit_task = NULL;
>> -	sig->notify_count = 0;
>> -	read_unlock(&tasklist_lock);
>> -	return -EAGAIN;
>>  }
>>  
>>  char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
>> @@ -1264,13 +1271,19 @@ int flush_old_exec(struct linux_binprm * bprm)
>>  	int retval;
>>  
>>  	/*
>> -	 * Make sure we have a private signal table and that
>> -	 * we are unassociated from the previous thread group.
>> +	 * Make this the only thread in the thread group.
>>  	 */
>>  	retval = de_thread(me);
>>  	if (retval)
>>  		goto out;
>>  
>> +	/*
>> +	 * Make the signal table private.
>> +	 */
>> +	retval = unshare_sighand(me);
>> +	if (retval)
>> +		goto out;
>> +
>>  	/*
>>  	 * Must be called _before_ exec_mmap() as bprm->mm is
>>  	 * not visibile until then. This also enables the update
>> -- 
>> 2.25.0
> 
> Otherwise, yes, sensible separation.
> 
> Reviewed-by: Kees Cook <keescook@chromium.org>
>
Kees Cook March 10, 2020, 8:57 p.m. UTC | #4
On Tue, Mar 10, 2020 at 09:34:03PM +0100, Bernd Edlinger wrote:
> On 3/10/20 9:29 PM, Kees Cook wrote:
> > On Sun, Mar 08, 2020 at 04:36:17PM -0500, Eric W. Biederman wrote:
> >>
> >> This makes the code clearer and makes it easier to implement a mutex
> >> that is not taken over any locations that may block indefinitely waiting
> >> for userspace.
> >>
> >> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
> >> ---
> >>  fs/exec.c | 39 ++++++++++++++++++++++++++-------------
> >>  1 file changed, 26 insertions(+), 13 deletions(-)
> >>
> >> diff --git a/fs/exec.c b/fs/exec.c
> >> index c3f34791f2f0..ff74b9a74d34 100644
> >> --- a/fs/exec.c
> >> +++ b/fs/exec.c
> >> @@ -1194,6 +1194,23 @@ static int de_thread(struct task_struct *tsk)
> >>  	flush_itimer_signals();
> >>  #endif
> > 
> > Semi-related (existing behavior): in de_thread(), what keeps the thread
> > group from changing? i.e.:
> > 
> >         if (thread_group_empty(tsk))
> >                 goto no_thread_group;
> > 
> >         /*
> >          * Kill all other threads in the thread group.
> >          */
> >         spin_lock_irq(lock);
> > 	... kill other threads under lock ...
> > 
> > Why is the thread_group_emtpy() test not under lock?
> > 
> 
> A new thread cannot created when only one thread is executing,
> right?

*face palm* Yes, of course. :) I'm thinking too hard.
Christian Brauner March 10, 2020, 9:21 p.m. UTC | #5
On Sun, Mar 08, 2020 at 04:36:17PM -0500, Eric W. Biederman wrote:
> 
> This makes the code clearer and makes it easier to implement a mutex
> that is not taken over any locations that may block indefinitely waiting
> for userspace.
> 
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
> ---
>  fs/exec.c | 39 ++++++++++++++++++++++++++-------------
>  1 file changed, 26 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/exec.c b/fs/exec.c
> index c3f34791f2f0..ff74b9a74d34 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1194,6 +1194,23 @@ static int de_thread(struct task_struct *tsk)
>  	flush_itimer_signals();
>  #endif
>  
> +	BUG_ON(!thread_group_leader(tsk));
> +	return 0;
> +
> +killed:
> +	/* protects against exit_notify() and __exit_signal() */
> +	read_lock(&tasklist_lock);
> +	sig->group_exit_task = NULL;
> +	sig->notify_count = 0;
> +	read_unlock(&tasklist_lock);
> +	return -EAGAIN;
> +}
> +
> +
> +static int unshare_sighand(struct task_struct *me)
> +{
> +	struct sighand_struct *oldsighand = me->sighand;
> +
>  	if (refcount_read(&oldsighand->count) != 1) {
>  		struct sighand_struct *newsighand;
>  		/*
> @@ -1210,23 +1227,13 @@ static int de_thread(struct task_struct *tsk)
>  
>  		write_lock_irq(&tasklist_lock);
>  		spin_lock(&oldsighand->siglock);
> -		rcu_assign_pointer(tsk->sighand, newsighand);
> +		rcu_assign_pointer(me->sighand, newsighand);
>  		spin_unlock(&oldsighand->siglock);
>  		write_unlock_irq(&tasklist_lock);
>  
>  		__cleanup_sighand(oldsighand);
>  	}

This is fine for now but we share an aweful lot of code with
copy_sighand(). We should earmark this to look into consolidating the
core operations into a common helper called from both copy_sighand() and
unshare_sighand() maybe even dumbing it down to one helper. But not
needed for now.

Otherwise:
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
diff mbox series

Patch

diff --git a/fs/exec.c b/fs/exec.c
index c3f34791f2f0..ff74b9a74d34 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1194,6 +1194,23 @@  static int de_thread(struct task_struct *tsk)
 	flush_itimer_signals();
 #endif
 
+	BUG_ON(!thread_group_leader(tsk));
+	return 0;
+
+killed:
+	/* protects against exit_notify() and __exit_signal() */
+	read_lock(&tasklist_lock);
+	sig->group_exit_task = NULL;
+	sig->notify_count = 0;
+	read_unlock(&tasklist_lock);
+	return -EAGAIN;
+}
+
+
+static int unshare_sighand(struct task_struct *me)
+{
+	struct sighand_struct *oldsighand = me->sighand;
+
 	if (refcount_read(&oldsighand->count) != 1) {
 		struct sighand_struct *newsighand;
 		/*
@@ -1210,23 +1227,13 @@  static int de_thread(struct task_struct *tsk)
 
 		write_lock_irq(&tasklist_lock);
 		spin_lock(&oldsighand->siglock);
-		rcu_assign_pointer(tsk->sighand, newsighand);
+		rcu_assign_pointer(me->sighand, newsighand);
 		spin_unlock(&oldsighand->siglock);
 		write_unlock_irq(&tasklist_lock);
 
 		__cleanup_sighand(oldsighand);
 	}
-
-	BUG_ON(!thread_group_leader(tsk));
 	return 0;
-
-killed:
-	/* protects against exit_notify() and __exit_signal() */
-	read_lock(&tasklist_lock);
-	sig->group_exit_task = NULL;
-	sig->notify_count = 0;
-	read_unlock(&tasklist_lock);
-	return -EAGAIN;
 }
 
 char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
@@ -1264,13 +1271,19 @@  int flush_old_exec(struct linux_binprm * bprm)
 	int retval;
 
 	/*
-	 * Make sure we have a private signal table and that
-	 * we are unassociated from the previous thread group.
+	 * Make this the only thread in the thread group.
 	 */
 	retval = de_thread(me);
 	if (retval)
 		goto out;
 
+	/*
+	 * Make the signal table private.
+	 */
+	retval = unshare_sighand(me);
+	if (retval)
+		goto out;
+
 	/*
 	 * Must be called _before_ exec_mmap() as bprm->mm is
 	 * not visibile until then. This also enables the update