diff mbox series

uprobes: Improve scalability by reducing the contention on siglock

Message ID 20240801082407.1618451-1-liaochang1@huawei.com (mailing list archive)
State Superseded
Delegated to: Masami Hiramatsu
Headers show
Series uprobes: Improve scalability by reducing the contention on siglock | expand

Commit Message

Liao, Chang Aug. 1, 2024, 8:24 a.m. UTC
The profiling result of BPF selftest on ARM64 platform reveals the
significant contention on the current->sighand->siglock within the
handle_singlestep() is the scalability bottleneck. The reason is also
very straightforward that all producer threads of benchmark have to
contend the spinlock mentioned to resume the TIF_SIGPENDING bit in the
thread_info that might be removed in uprobe_deny_signal().

This patch introduces UTASK_SSTEP_DENY_SIGNAL to mark TIF_SIGPENDING is
suppress temporarily during the uprobe single-step. Upon uprobe single-step
is handled and UTASK_SSTEP_DENY_SIGNAL is confirmed, it could resume the
TIF_SIGPENDING directly without acquiring the siglock in most case, then
reducing contention and improving overall performance.

I've use the script developed by Andrii in [1] to run benchmark. The CPU
used was Kunpeng916 (Hi1616), 4 NUMA nodes, 64 cores@2.4GHz running
upstream kernel v6.11-rc1 + my optimization [2] for get_xol_insn_slot().

before-opt
----------
uprobe-nop      ( 1 cpus):    0.907 ± 0.003M/s  (  0.907M/s/cpu)
uprobe-nop      ( 2 cpus):    1.676 ± 0.008M/s  (  0.838M/s/cpu)
uprobe-nop      ( 4 cpus):    3.210 ± 0.003M/s  (  0.802M/s/cpu)
uprobe-nop      ( 8 cpus):    4.457 ± 0.003M/s  (  0.557M/s/cpu)
uprobe-nop      (16 cpus):    3.724 ± 0.011M/s  (  0.233M/s/cpu)
uprobe-nop      (32 cpus):    2.761 ± 0.003M/s  (  0.086M/s/cpu)
uprobe-nop      (64 cpus):    1.293 ± 0.015M/s  (  0.020M/s/cpu)

uprobe-push     ( 1 cpus):    0.883 ± 0.001M/s  (  0.883M/s/cpu)
uprobe-push     ( 2 cpus):    1.642 ± 0.005M/s  (  0.821M/s/cpu)
uprobe-push     ( 4 cpus):    3.086 ± 0.002M/s  (  0.771M/s/cpu)
uprobe-push     ( 8 cpus):    3.390 ± 0.003M/s  (  0.424M/s/cpu)
uprobe-push     (16 cpus):    2.652 ± 0.005M/s  (  0.166M/s/cpu)
uprobe-push     (32 cpus):    2.713 ± 0.005M/s  (  0.085M/s/cpu)
uprobe-push     (64 cpus):    1.313 ± 0.009M/s  (  0.021M/s/cpu)

uprobe-ret      ( 1 cpus):    1.774 ± 0.000M/s  (  1.774M/s/cpu)
uprobe-ret      ( 2 cpus):    3.350 ± 0.001M/s  (  1.675M/s/cpu)
uprobe-ret      ( 4 cpus):    6.604 ± 0.000M/s  (  1.651M/s/cpu)
uprobe-ret      ( 8 cpus):    6.706 ± 0.005M/s  (  0.838M/s/cpu)
uprobe-ret      (16 cpus):    5.231 ± 0.001M/s  (  0.327M/s/cpu)
uprobe-ret      (32 cpus):    5.743 ± 0.003M/s  (  0.179M/s/cpu)
uprobe-ret      (64 cpus):    4.726 ± 0.016M/s  (  0.074M/s/cpu)

after-opt
---------
uprobe-nop      ( 1 cpus):    0.985 ± 0.002M/s  (  0.985M/s/cpu)
uprobe-nop      ( 2 cpus):    1.773 ± 0.005M/s  (  0.887M/s/cpu)
uprobe-nop      ( 4 cpus):    3.304 ± 0.001M/s  (  0.826M/s/cpu)
uprobe-nop      ( 8 cpus):    5.328 ± 0.002M/s  (  0.666M/s/cpu)
uprobe-nop      (16 cpus):    6.475 ± 0.002M/s  (  0.405M/s/cpu)
uprobe-nop      (32 cpus):    4.831 ± 0.082M/s  (  0.151M/s/cpu)
uprobe-nop      (64 cpus):    2.564 ± 0.053M/s  (  0.040M/s/cpu)

uprobe-push     ( 1 cpus):    0.964 ± 0.001M/s  (  0.964M/s/cpu)
uprobe-push     ( 2 cpus):    1.766 ± 0.002M/s  (  0.883M/s/cpu)
uprobe-push     ( 4 cpus):    3.290 ± 0.009M/s  (  0.823M/s/cpu)
uprobe-push     ( 8 cpus):    4.670 ± 0.002M/s  (  0.584M/s/cpu)
uprobe-push     (16 cpus):    5.197 ± 0.004M/s  (  0.325M/s/cpu)
uprobe-push     (32 cpus):    5.068 ± 0.161M/s  (  0.158M/s/cpu)
uprobe-push     (64 cpus):    2.605 ± 0.026M/s  (  0.041M/s/cpu)

uprobe-ret      ( 1 cpus):    1.833 ± 0.001M/s  (  1.833M/s/cpu)
uprobe-ret      ( 2 cpus):    3.384 ± 0.003M/s  (  1.692M/s/cpu)
uprobe-ret      ( 4 cpus):    6.677 ± 0.004M/s  (  1.669M/s/cpu)
uprobe-ret      ( 8 cpus):    6.854 ± 0.005M/s  (  0.857M/s/cpu)
uprobe-ret      (16 cpus):    6.508 ± 0.006M/s  (  0.407M/s/cpu)
uprobe-ret      (32 cpus):    5.793 ± 0.009M/s  (  0.181M/s/cpu)
uprobe-ret      (64 cpus):    4.743 ± 0.016M/s  (  0.074M/s/cpu)

Above benchmark results demonstrates a obivious improvement in the
scalability of trig-uprobe-nop and trig-uprobe-push, the peak throughput
of which are from 4.5M/s to 6.4M/s and 3.3M/s to 5.1M/s individually.

[1] https://lore.kernel.org/all/20240731214256.3588718-1-andrii@kernel.org
[2] https://lore.kernel.org/all/20240727094405.1362496-1-liaochang1@huawei.com

Signed-off-by: Liao Chang <liaochang1@huawei.com>
---
 include/linux/uprobes.h |  1 +
 kernel/events/uprobes.c | 18 +++++++++++-------
 2 files changed, 12 insertions(+), 7 deletions(-)

Comments

Oleg Nesterov Aug. 1, 2024, 2:06 p.m. UTC | #1
On 08/01, Liao Chang wrote:
>
> @@ -2276,22 +2277,25 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
>  	int err = 0;
>  
>  	uprobe = utask->active_uprobe;
> -	if (utask->state == UTASK_SSTEP_ACK)
> +	switch (utask->state) {
> +	case UTASK_SSTEP_ACK:
>  		err = arch_uprobe_post_xol(&uprobe->arch, regs);
> -	else if (utask->state == UTASK_SSTEP_TRAPPED)
> +		break;
> +	case UTASK_SSTEP_TRAPPED:
>  		arch_uprobe_abort_xol(&uprobe->arch, regs);
> -	else
> +		fallthrough;
> +	case UTASK_SSTEP_DENY_SIGNAL:
> +		set_tsk_thread_flag(current, TIF_SIGPENDING);
> +		break;
> +	default:
>  		WARN_ON_ONCE(1);
> +	}

Liao, at first glance this change looks "obviously wrong" to me.

But let me read this patch more carefully and reply on weekend,
I am a bit busy right now.

Thanks,

Oleg.
Liao, Chang Aug. 2, 2024, 1:38 a.m. UTC | #2
在 2024/8/1 22:06, Oleg Nesterov 写道:
> On 08/01, Liao Chang wrote:
>>
>> @@ -2276,22 +2277,25 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
>>  	int err = 0;
>>  
>>  	uprobe = utask->active_uprobe;
>> -	if (utask->state == UTASK_SSTEP_ACK)
>> +	switch (utask->state) {
>> +	case UTASK_SSTEP_ACK:
>>  		err = arch_uprobe_post_xol(&uprobe->arch, regs);
>> -	else if (utask->state == UTASK_SSTEP_TRAPPED)
>> +		break;
>> +	case UTASK_SSTEP_TRAPPED:
>>  		arch_uprobe_abort_xol(&uprobe->arch, regs);
>> -	else
>> +		fallthrough;
>> +	case UTASK_SSTEP_DENY_SIGNAL:
>> +		set_tsk_thread_flag(current, TIF_SIGPENDING);
>> +		break;
>> +	default:
>>  		WARN_ON_ONCE(1);
>> +	}
> 
> Liao, at first glance this change looks "obviously wrong" to me.

Oleg. Did i overlook some thing obvious here?

> 
> But let me read this patch more carefully and reply on weekend,
> I am a bit busy right now.

Sure, thanks.

> 
> Thanks,
> 
> Oleg.
> 
>
Oleg Nesterov Aug. 2, 2024, 9:24 a.m. UTC | #3
On 08/02, Liao, Chang wrote:
>
>
> 在 2024/8/1 22:06, Oleg Nesterov 写道:
> > On 08/01, Liao Chang wrote:
> >>
> >> @@ -2276,22 +2277,25 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
> >>  	int err = 0;
> >>
> >>  	uprobe = utask->active_uprobe;
> >> -	if (utask->state == UTASK_SSTEP_ACK)
> >> +	switch (utask->state) {
> >> +	case UTASK_SSTEP_ACK:
> >>  		err = arch_uprobe_post_xol(&uprobe->arch, regs);
> >> -	else if (utask->state == UTASK_SSTEP_TRAPPED)
> >> +		break;
> >> +	case UTASK_SSTEP_TRAPPED:
> >>  		arch_uprobe_abort_xol(&uprobe->arch, regs);
> >> -	else
> >> +		fallthrough;
> >> +	case UTASK_SSTEP_DENY_SIGNAL:
> >> +		set_tsk_thread_flag(current, TIF_SIGPENDING);
> >> +		break;
> >> +	default:
> >>  		WARN_ON_ONCE(1);
> >> +	}
> >
> > Liao, at first glance this change looks "obviously wrong" to me.
>
> Oleg. Did i overlook some thing obvious here?

OK, lets suppose uprobe_deny_signal() sets UTASK_SSTEP_DENY_SIGNAL.

In this case handle_singlestep() will only set TIF_SIGPENDING and
do nothing else. This is wrong, either _post_xol() or _abort_xol()
must be called.

But I think handle_singlestep() will never hit this case. In the
likely case uprobe_post_sstep_notifier() will replace _DENY_SIGNAL
with _ACK, and this means that handle_singlestep() won't restore
TIF_SIGPENDING cleared by uprobe_deny_signal().

Oleg.
Liao, Chang Aug. 6, 2024, 3:06 a.m. UTC | #4
在 2024/8/2 17:24, Oleg Nesterov 写道:
> On 08/02, Liao, Chang wrote:
>>
>>
>> 在 2024/8/1 22:06, Oleg Nesterov 写道:
>>> On 08/01, Liao Chang wrote:
>>>>
>>>> @@ -2276,22 +2277,25 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
>>>>  	int err = 0;
>>>>
>>>>  	uprobe = utask->active_uprobe;
>>>> -	if (utask->state == UTASK_SSTEP_ACK)
>>>> +	switch (utask->state) {
>>>> +	case UTASK_SSTEP_ACK:
>>>>  		err = arch_uprobe_post_xol(&uprobe->arch, regs);
>>>> -	else if (utask->state == UTASK_SSTEP_TRAPPED)
>>>> +		break;
>>>> +	case UTASK_SSTEP_TRAPPED:
>>>>  		arch_uprobe_abort_xol(&uprobe->arch, regs);
>>>> -	else
>>>> +		fallthrough;
>>>> +	case UTASK_SSTEP_DENY_SIGNAL:
>>>> +		set_tsk_thread_flag(current, TIF_SIGPENDING);
>>>> +		break;
>>>> +	default:
>>>>  		WARN_ON_ONCE(1);
>>>> +	}
>>>
>>> Liao, at first glance this change looks "obviously wrong" to me.
>>
>> Oleg. Did i overlook some thing obvious here?
> 
> OK, lets suppose uprobe_deny_signal() sets UTASK_SSTEP_DENY_SIGNAL.
> 
> In this case handle_singlestep() will only set TIF_SIGPENDING and
> do nothing else. This is wrong, either _post_xol() or _abort_xol()
> must be called.
> 
> But I think handle_singlestep() will never hit this case. In the
> likely case uprobe_post_sstep_notifier() will replace _DENY_SIGNAL
> with _ACK, and this means that handle_singlestep() won't restore
> TIF_SIGPENDING cleared by uprobe_deny_signal().

You're absolutely right. handle_signlestep() has chance to handle _DENY_SIGANL
unless it followed by setting TIF_UPROBE in uprobe_deny_signal(). This means
_DENY_SIGNAL is likey replaced during next uprobe single-stepping.

I believe introducing _DENY_SIGNAL as the immediate state between UTASK_SSTEP
and UTASK_SSTEP_ACK is still necessary. This allow uprobe_post_sstep_notifier()
to correctly restore TIF_SIGPENDING upon the completion of single-step.

A revised implementation would look like this:

------------------%<------------------
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1980,6 +1980,7 @@ bool uprobe_deny_signal(void)

        if (task_sigpending(t)) {
                clear_tsk_thread_flag(t, TIF_SIGPENDING);
+               utask->state = UTASK_SSTEP_DENY_SIGNAL;

                if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) {
                        utask->state = UTASK_SSTEP_TRAPPED;
@@ -2276,22 +2277,23 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
        int err = 0;

        uprobe = utask->active_uprobe;
-       if (utask->state == UTASK_SSTEP_ACK)
+       switch (utask->state) {
+       case UTASK_SSTEP_ACK:
                err = arch_uprobe_post_xol(&uprobe->arch, regs);
-       else if (utask->state == UTASK_SSTEP_TRAPPED)
+               break;
+       case UTASK_SSTEP_TRAPPED:
                arch_uprobe_abort_xol(&uprobe->arch, regs);
-       else
+               set_thread_flag(TIF_SIGPENDING);
+               break;
+       default:
                WARN_ON_ONCE(1);
+       }

        put_uprobe(uprobe);
        utask->active_uprobe = NULL;
        utask->state = UTASK_RUNNING;
        xol_free_insn_slot(current);

-       spin_lock_irq(&current->sighand->siglock);
-       recalc_sigpending(); /* see uprobe_deny_signal() */
-       spin_unlock_irq(&current->sighand->siglock);
-
        if (unlikely(err)) {
                uprobe_warn(current, "execute the probed insn, sending SIGILL.");
                force_sig(SIGILL);
@@ -2351,6 +2353,8 @@ int uprobe_post_sstep_notifier(struct pt_regs *regs)
                /* task is currently not uprobed */
                return 0;

+       if (utask->state == UTASK_SSTEP_DENY_SIGNAL)
+               set_thread_flag(TIF_SIGPENDING);
        utask->state = UTASK_SSTEP_ACK;
        set_thread_flag(TIF_UPROBE);
        return 1;

------------------>%------------------

> 
> Oleg.
> 
>
Oleg Nesterov Aug. 6, 2024, 5:25 p.m. UTC | #5
On 08/06, Liao, Chang wrote:
>
> You're absolutely right. handle_signlestep() has chance to handle _DENY_SIGANL
> unless it followed by setting TIF_UPROBE in uprobe_deny_signal(). This means
> _DENY_SIGNAL is likey replaced during next uprobe single-stepping.
>
> I believe introducing _DENY_SIGNAL as the immediate state between UTASK_SSTEP
> and UTASK_SSTEP_ACK is still necessary. This allow uprobe_post_sstep_notifier()
> to correctly restore TIF_SIGPENDING upon the completion of single-step.
>
> A revised implementation would look like this:

Still looks "obviously wrong" to me... even the approach itself.

Perhaps I am wrong, yet another day when I can't even read emails on lkml
carefully, sorry.

But can you please send the patch which I could actually apply? This one
looks white-space damaged...

I'll try to reply with more details as soon I convince myself I fully
understand what does your patch actually do, but most probably not tomorrow.

Thanks,

Oleg.

> ------------------%<------------------
> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -1980,6 +1980,7 @@ bool uprobe_deny_signal(void)
> 
>         if (task_sigpending(t)) {
>                 clear_tsk_thread_flag(t, TIF_SIGPENDING);
> +               utask->state = UTASK_SSTEP_DENY_SIGNAL;
> 
>                 if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) {
>                         utask->state = UTASK_SSTEP_TRAPPED;
> @@ -2276,22 +2277,23 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
>         int err = 0;
> 
>         uprobe = utask->active_uprobe;
> -       if (utask->state == UTASK_SSTEP_ACK)
> +       switch (utask->state) {
> +       case UTASK_SSTEP_ACK:
>                 err = arch_uprobe_post_xol(&uprobe->arch, regs);
> -       else if (utask->state == UTASK_SSTEP_TRAPPED)
> +               break;
> +       case UTASK_SSTEP_TRAPPED:
>                 arch_uprobe_abort_xol(&uprobe->arch, regs);
> -       else
> +               set_thread_flag(TIF_SIGPENDING);
> +               break;
> +       default:
>                 WARN_ON_ONCE(1);
> +       }
> 
>         put_uprobe(uprobe);
>         utask->active_uprobe = NULL;
>         utask->state = UTASK_RUNNING;
>         xol_free_insn_slot(current);
> 
> -       spin_lock_irq(&current->sighand->siglock);
> -       recalc_sigpending(); /* see uprobe_deny_signal() */
> -       spin_unlock_irq(&current->sighand->siglock);
> -
>         if (unlikely(err)) {
>                 uprobe_warn(current, "execute the probed insn, sending SIGILL.");
>                 force_sig(SIGILL);
> @@ -2351,6 +2353,8 @@ int uprobe_post_sstep_notifier(struct pt_regs *regs)
>                 /* task is currently not uprobed */
>                 return 0;
> 
> +       if (utask->state == UTASK_SSTEP_DENY_SIGNAL)
> +               set_thread_flag(TIF_SIGPENDING);
>         utask->state = UTASK_SSTEP_ACK;
>         set_thread_flag(TIF_UPROBE);
>         return 1;
> 
> ------------------>%------------------
> 
> > 
> > Oleg.
> > 
> > 
> 
> -- 
> BR
> Liao, Chang
>
Oleg Nesterov Aug. 7, 2024, 10:17 a.m. UTC | #6
So. Liao, I am sorry, but I dislike your patch/approach in any case.

UTASK_SSTEP_DENY_SIGNAL complicates the state machine. And I don't like the fact
that set_thread_flag(TIF_SIGPENDING) is called twice, from handle_singlestep()
and uprobe_post_sstep_notifier(), this complicates the logic even more.

We need a flag, not the new state.

And if I read this patch correctly it is wrong:

	- uprobe_deny_signal() clears TIF_SIGPENDING and sets UTASK_SSTEP_DENY_SIGNAL

	- another signal cames after that and sets TIF_SIGPENDING again

	- in this case the task won't return to user-space and execute the probed
	  insn, exit_to_user_mode_loop() will notice another TIF_SIGPENDING and
	  call arch_do_signal_or_restart()->get_signal() again.

	- get_signal() will call uprobe_deny_signal() again hit

		WARN_ON_ONCE(utask->state != UTASK_SSTEP);


And no, we shouldn't change this check into UTASK_SSTEP || UTASK_SSTEP_DENY_SIGNAL.
Again, the fact that uprobe_deny_signal() cleared TIF_SIGPENDING must not be the
new state.

Oleg.

On 08/06, Oleg Nesterov wrote:
>
> On 08/06, Liao, Chang wrote:
> >
> > You're absolutely right. handle_signlestep() has chance to handle _DENY_SIGANL
> > unless it followed by setting TIF_UPROBE in uprobe_deny_signal(). This means
> > _DENY_SIGNAL is likey replaced during next uprobe single-stepping.
> >
> > I believe introducing _DENY_SIGNAL as the immediate state between UTASK_SSTEP
> > and UTASK_SSTEP_ACK is still necessary. This allow uprobe_post_sstep_notifier()
> > to correctly restore TIF_SIGPENDING upon the completion of single-step.
> >
> > A revised implementation would look like this:
>
> Still looks "obviously wrong" to me... even the approach itself.
>
> Perhaps I am wrong, yet another day when I can't even read emails on lkml
> carefully, sorry.
>
> But can you please send the patch which I could actually apply? This one
> looks white-space damaged...
>
> I'll try to reply with more details as soon I convince myself I fully
> understand what does your patch actually do, but most probably not tomorrow.
>
> Thanks,
>
> Oleg.
>
> > ------------------%<------------------
> > --- a/kernel/events/uprobes.c
> > +++ b/kernel/events/uprobes.c
> > @@ -1980,6 +1980,7 @@ bool uprobe_deny_signal(void)
> >
> >         if (task_sigpending(t)) {
> >                 clear_tsk_thread_flag(t, TIF_SIGPENDING);
> > +               utask->state = UTASK_SSTEP_DENY_SIGNAL;
> >
> >                 if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) {
> >                         utask->state = UTASK_SSTEP_TRAPPED;
> > @@ -2276,22 +2277,23 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
> >         int err = 0;
> >
> >         uprobe = utask->active_uprobe;
> > -       if (utask->state == UTASK_SSTEP_ACK)
> > +       switch (utask->state) {
> > +       case UTASK_SSTEP_ACK:
> >                 err = arch_uprobe_post_xol(&uprobe->arch, regs);
> > -       else if (utask->state == UTASK_SSTEP_TRAPPED)
> > +               break;
> > +       case UTASK_SSTEP_TRAPPED:
> >                 arch_uprobe_abort_xol(&uprobe->arch, regs);
> > -       else
> > +               set_thread_flag(TIF_SIGPENDING);
> > +               break;
> > +       default:
> >                 WARN_ON_ONCE(1);
> > +       }
> >
> >         put_uprobe(uprobe);
> >         utask->active_uprobe = NULL;
> >         utask->state = UTASK_RUNNING;
> >         xol_free_insn_slot(current);
> >
> > -       spin_lock_irq(&current->sighand->siglock);
> > -       recalc_sigpending(); /* see uprobe_deny_signal() */
> > -       spin_unlock_irq(&current->sighand->siglock);
> > -
> >         if (unlikely(err)) {
> >                 uprobe_warn(current, "execute the probed insn, sending SIGILL.");
> >                 force_sig(SIGILL);
> > @@ -2351,6 +2353,8 @@ int uprobe_post_sstep_notifier(struct pt_regs *regs)
> >                 /* task is currently not uprobed */
> >                 return 0;
> >
> > +       if (utask->state == UTASK_SSTEP_DENY_SIGNAL)
> > +               set_thread_flag(TIF_SIGPENDING);
> >         utask->state = UTASK_SSTEP_ACK;
> >         set_thread_flag(TIF_UPROBE);
> >         return 1;
> >
> > ------------------>%------------------
> >
> > >
> > > Oleg.
> > >
> > >
> >
> > --
> > BR
> > Liao, Chang
> >
Liao, Chang Aug. 8, 2024, 7:30 a.m. UTC | #7
在 2024/8/7 18:17, Oleg Nesterov 写道:
> So. Liao, I am sorry, but I dislike your patch/approach in any case.
> 
> UTASK_SSTEP_DENY_SIGNAL complicates the state machine. And I don't like the fact
> that set_thread_flag(TIF_SIGPENDING) is called twice, from handle_singlestep()
> and uprobe_post_sstep_notifier(), this complicates the logic even more.
> 
> We need a flag, not the new state.
> 
> And if I read this patch correctly it is wrong:
> 
> 	- uprobe_deny_signal() clears TIF_SIGPENDING and sets UTASK_SSTEP_DENY_SIGNAL
> 
> 	- another signal cames after that and sets TIF_SIGPENDING again
> 
> 	- in this case the task won't return to user-space and execute the probed
> 	  insn, exit_to_user_mode_loop() will notice another TIF_SIGPENDING and
> 	  call arch_do_signal_or_restart()->get_signal() again.
> 
> 	- get_signal() will call uprobe_deny_signal() again hit
> 
> 		WARN_ON_ONCE(utask->state != UTASK_SSTEP);
> 
> 
> And no, we shouldn't change this check into UTASK_SSTEP || UTASK_SSTEP_DENY_SIGNAL.
> Again, the fact that uprobe_deny_signal() cleared TIF_SIGPENDING must not be the
> new state.

Oleg, If i understand correctly, current state machine expects the single-step handling
should end up with either _ACK or _TRAPPED. Any new state would disrupt this logic. If so,
I'm convinced that adding a new state is uncessary. As you mentioned, I propose using a
boolean flag in the uprobe_task data to track whether a signal should be restored at the
cost of increased size. Here's outline of the changes:

  - pre_ssout() resets the deny signal flag

  - uprobe_deny_signal() sets the deny signal flag when TIF_SIGPENDING is cleared.

  - handle_singlestep() check the deny signal flag and restore TIF_SIGPENDING if necessary.

Does this approach look correct to you,do do you have any other way to implement the "flag"?

Thanks.

> 
> Oleg.
> 
> On 08/06, Oleg Nesterov wrote:
>>
>> On 08/06, Liao, Chang wrote:
>>>
>>> You're absolutely right. handle_signlestep() has chance to handle _DENY_SIGANL
>>> unless it followed by setting TIF_UPROBE in uprobe_deny_signal(). This means
>>> _DENY_SIGNAL is likey replaced during next uprobe single-stepping.
>>>
>>> I believe introducing _DENY_SIGNAL as the immediate state between UTASK_SSTEP
>>> and UTASK_SSTEP_ACK is still necessary. This allow uprobe_post_sstep_notifier()
>>> to correctly restore TIF_SIGPENDING upon the completion of single-step.
>>>
>>> A revised implementation would look like this:
>>
>> Still looks "obviously wrong" to me... even the approach itself.
>>
>> Perhaps I am wrong, yet another day when I can't even read emails on lkml
>> carefully, sorry.
>>
>> But can you please send the patch which I could actually apply? This one
>> looks white-space damaged...
>>
>> I'll try to reply with more details as soon I convince myself I fully
>> understand what does your patch actually do, but most probably not tomorrow.
>>
>> Thanks,
>>
>> Oleg.
>>
>>> ------------------%<------------------
>>> --- a/kernel/events/uprobes.c
>>> +++ b/kernel/events/uprobes.c
>>> @@ -1980,6 +1980,7 @@ bool uprobe_deny_signal(void)
>>>
>>>         if (task_sigpending(t)) {
>>>                 clear_tsk_thread_flag(t, TIF_SIGPENDING);
>>> +               utask->state = UTASK_SSTEP_DENY_SIGNAL;
>>>
>>>                 if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) {
>>>                         utask->state = UTASK_SSTEP_TRAPPED;
>>> @@ -2276,22 +2277,23 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
>>>         int err = 0;
>>>
>>>         uprobe = utask->active_uprobe;
>>> -       if (utask->state == UTASK_SSTEP_ACK)
>>> +       switch (utask->state) {
>>> +       case UTASK_SSTEP_ACK:
>>>                 err = arch_uprobe_post_xol(&uprobe->arch, regs);
>>> -       else if (utask->state == UTASK_SSTEP_TRAPPED)
>>> +               break;
>>> +       case UTASK_SSTEP_TRAPPED:
>>>                 arch_uprobe_abort_xol(&uprobe->arch, regs);
>>> -       else
>>> +               set_thread_flag(TIF_SIGPENDING);
>>> +               break;
>>> +       default:
>>>                 WARN_ON_ONCE(1);
>>> +       }
>>>
>>>         put_uprobe(uprobe);
>>>         utask->active_uprobe = NULL;
>>>         utask->state = UTASK_RUNNING;
>>>         xol_free_insn_slot(current);
>>>
>>> -       spin_lock_irq(&current->sighand->siglock);
>>> -       recalc_sigpending(); /* see uprobe_deny_signal() */
>>> -       spin_unlock_irq(&current->sighand->siglock);
>>> -
>>>         if (unlikely(err)) {
>>>                 uprobe_warn(current, "execute the probed insn, sending SIGILL.");
>>>                 force_sig(SIGILL);
>>> @@ -2351,6 +2353,8 @@ int uprobe_post_sstep_notifier(struct pt_regs *regs)
>>>                 /* task is currently not uprobed */
>>>                 return 0;
>>>
>>> +       if (utask->state == UTASK_SSTEP_DENY_SIGNAL)
>>> +               set_thread_flag(TIF_SIGPENDING);
>>>         utask->state = UTASK_SSTEP_ACK;
>>>         set_thread_flag(TIF_UPROBE);
>>>         return 1;
>>>
>>> ------------------>%------------------
>>>
>>>>
>>>> Oleg.
>>>>
>>>>
>>>
>>> --
>>> BR
>>> Liao, Chang
>>>
> 
>
Oleg Nesterov Aug. 8, 2024, 10:28 a.m. UTC | #8
On 08/08, Liao, Chang wrote:
>
>   - pre_ssout() resets the deny signal flag
>
>   - uprobe_deny_signal() sets the deny signal flag when TIF_SIGPENDING is cleared.
>
>   - handle_singlestep() check the deny signal flag and restore TIF_SIGPENDING if necessary.
>
> Does this approach look correct to you,do do you have any other way to implement the "flag"?

Yes. But I don't think pre_ssout() needs to clear this flag. handle_singlestep() resets/clears
state, active_uprobe, frees insn slot. So I guess we only need


--- x/kernel/events/uprobes.c
+++ x/kernel/events/uprobes.c
@@ -2308,9 +2308,10 @@ static void handle_singlestep(struct upr
 	utask->state = UTASK_RUNNING;
 	xol_free_insn_slot(current);
 
-	spin_lock_irq(&current->sighand->siglock);
-	recalc_sigpending(); /* see uprobe_deny_signal() */
-	spin_unlock_irq(&current->sighand->siglock);
+	if (utask->xxx) {
+		set_thread_flag(TIF_SIGPENDING);
+		utask->xxx = 0;
+	}
 
 	if (unlikely(err)) {
 		uprobe_warn(current, "execute the probed insn, sending SIGILL.");

and that is all.

Oleg.
Liao, Chang Aug. 8, 2024, 12:31 p.m. UTC | #9
在 2024/8/8 18:28, Oleg Nesterov 写道:
> On 08/08, Liao, Chang wrote:
>>
>>   - pre_ssout() resets the deny signal flag
>>
>>   - uprobe_deny_signal() sets the deny signal flag when TIF_SIGPENDING is cleared.
>>
>>   - handle_singlestep() check the deny signal flag and restore TIF_SIGPENDING if necessary.
>>
>> Does this approach look correct to you,do do you have any other way to implement the "flag"?
> 
> Yes. But I don't think pre_ssout() needs to clear this flag. handle_singlestep() resets/clears
> state, active_uprobe, frees insn slot. So I guess we only need
> 
> 
> --- x/kernel/events/uprobes.c
> +++ x/kernel/events/uprobes.c
> @@ -2308,9 +2308,10 @@ static void handle_singlestep(struct upr
>  	utask->state = UTASK_RUNNING;
>  	xol_free_insn_slot(current);
>  
> -	spin_lock_irq(&current->sighand->siglock);
> -	recalc_sigpending(); /* see uprobe_deny_signal() */
> -	spin_unlock_irq(&current->sighand->siglock);
> +	if (utask->xxx) {
> +		set_thread_flag(TIF_SIGPENDING);
> +		utask->xxx = 0;
> +	}

Agree, if no more discussion about this flag, I will just send v2 today.

Thanks.

>  
>  	if (unlikely(err)) {
>  		uprobe_warn(current, "execute the probed insn, sending SIGILL.");
> 
> and that is all.
> 
> Oleg.
> 
>
Oleg Nesterov Aug. 8, 2024, 1:17 p.m. UTC | #10
On 08/08, Liao, Chang wrote:
>
>
> 在 2024/8/8 18:28, Oleg Nesterov 写道:
> > --- x/kernel/events/uprobes.c
> > +++ x/kernel/events/uprobes.c
> > @@ -2308,9 +2308,10 @@ static void handle_singlestep(struct upr
> >  	utask->state = UTASK_RUNNING;
> >  	xol_free_insn_slot(current);
> >
> > -	spin_lock_irq(&current->sighand->siglock);
> > -	recalc_sigpending(); /* see uprobe_deny_signal() */
> > -	spin_unlock_irq(&current->sighand->siglock);
> > +	if (utask->xxx) {
> > +		set_thread_flag(TIF_SIGPENDING);
> > +		utask->xxx = 0;
> > +	}
>
> Agree, if no more discussion about this flag, I will just send v2 today.

Please also resend the previous patch a 1/2, this one as 2/2.

Oleg.
diff mbox series

Patch

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index b503fafb7fb3..50acbf96bccd 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -53,6 +53,7 @@  enum uprobe_task_state {
 	UTASK_SSTEP,
 	UTASK_SSTEP_ACK,
 	UTASK_SSTEP_TRAPPED,
+	UTASK_SSTEP_DENY_SIGNAL,
 };
 
 /*
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 76a51a1f51e2..4f9c10b3c7b9 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1980,6 +1980,7 @@  bool uprobe_deny_signal(void)
 
 	if (task_sigpending(t)) {
 		clear_tsk_thread_flag(t, TIF_SIGPENDING);
+		utask->state = UTASK_SSTEP_DENY_SIGNAL;
 
 		if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) {
 			utask->state = UTASK_SSTEP_TRAPPED;
@@ -2276,22 +2277,25 @@  static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
 	int err = 0;
 
 	uprobe = utask->active_uprobe;
-	if (utask->state == UTASK_SSTEP_ACK)
+	switch (utask->state) {
+	case UTASK_SSTEP_ACK:
 		err = arch_uprobe_post_xol(&uprobe->arch, regs);
-	else if (utask->state == UTASK_SSTEP_TRAPPED)
+		break;
+	case UTASK_SSTEP_TRAPPED:
 		arch_uprobe_abort_xol(&uprobe->arch, regs);
-	else
+		fallthrough;
+	case UTASK_SSTEP_DENY_SIGNAL:
+		set_tsk_thread_flag(current, TIF_SIGPENDING);
+		break;
+	default:
 		WARN_ON_ONCE(1);
+	}
 
 	put_uprobe(uprobe);
 	utask->active_uprobe = NULL;
 	utask->state = UTASK_RUNNING;
 	xol_free_insn_slot(current);
 
-	spin_lock_irq(&current->sighand->siglock);
-	recalc_sigpending(); /* see uprobe_deny_signal() */
-	spin_unlock_irq(&current->sighand->siglock);
-
 	if (unlikely(err)) {
 		uprobe_warn(current, "execute the probed insn, sending SIGILL.");
 		force_sig(SIGILL);