diff mbox series

riscv: Fixup boot failure when CONFIG_RT_MUTEXES=y

Message ID 20241130115355.3316160-1-guoren@kernel.org (mailing list archive)
State New
Headers show
Series riscv: Fixup boot failure when CONFIG_RT_MUTEXES=y | expand

Checks

Context Check Description
conchuod/vmtest-for-next-PR success PR summary
conchuod/patch-1-test-1 success .github/scripts/patches/tests/build_rv32_defconfig.sh took 215.99s
conchuod/patch-1-test-2 success .github/scripts/patches/tests/build_rv64_clang_allmodconfig.sh took 1407.84s
conchuod/patch-1-test-3 success .github/scripts/patches/tests/build_rv64_gcc_allmodconfig.sh took 1600.42s
conchuod/patch-1-test-4 success .github/scripts/patches/tests/build_rv64_nommu_k210_defconfig.sh took 78.74s
conchuod/patch-1-test-5 success .github/scripts/patches/tests/build_rv64_nommu_virt_defconfig.sh took 80.99s
conchuod/patch-1-test-6 warning .github/scripts/patches/tests/checkpatch.sh took 0.49s
conchuod/patch-1-test-7 success .github/scripts/patches/tests/dtb_warn_rv64.sh took 49.26s
conchuod/patch-1-test-8 success .github/scripts/patches/tests/header_inline.sh took 0.00s
conchuod/patch-1-test-9 success .github/scripts/patches/tests/kdoc.sh took 0.64s
conchuod/patch-1-test-10 success .github/scripts/patches/tests/module_param.sh took 0.01s
conchuod/patch-1-test-11 success .github/scripts/patches/tests/verify_fixes.sh took 0.02s
conchuod/patch-1-test-12 success .github/scripts/patches/tests/verify_signedoff.sh took 0.03s

Commit Message

Guo Ren Nov. 30, 2024, 11:53 a.m. UTC
From: Guo Ren <guoren@linux.alibaba.com>

When CONFIG_RT_MUTEXES=y, mutex_lock->rt_mutex_try_acquire would
change from rt_mutex_cmpxchg_acquire to rt_mutex_slowtrylock():
	raw_spin_lock_irqsave(&lock->wait_lock, flags);
	ret = __rt_mutex_slowtrylock(lock);
	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);

Because queued_spin_#ops to ticket_#ops is changed one by one by
jump_label, raw_spin_lock/unlock would cause a deadlock during the
changing.

That means in arch/riscv/kernel/jump_label.c:
1.
arch_jump_label_transform_queue() ->
mutex_lock(&text_mutex); +-> raw_spin_lock  -> queued_spin_lock
			 |-> raw_spin_unlock -> queued_spin_unlock
patch_insn_write -> change the raw_spin_lock to ticket_lock
mutex_unlock(&text_mutex);
...

2. /* Dirty the lock value */
arch_jump_label_transform_queue() ->
mutex_lock(&text_mutex); +-> raw_spin_lock -> *ticket_lock*
                         |-> raw_spin_unlock -> *queued_spin_unlock*
			  /* BUG: ticket_lock with queued_spin_unlock */
patch_insn_write  ->  change the raw_spin_unlock to ticket_unlock
mutex_unlock(&text_mutex);
...

3. /* Dead lock */
arch_jump_label_transform_queue() ->
mutex_lock(&text_mutex); +-> raw_spin_lock -> ticket_lock /* deadlock! */
                         |-> raw_spin_unlock -> ticket_unlock
patch_insn_write -> change other raw_spin_#op -> ticket_#op
mutex_unlock(&text_mutex);

So, the solution is to disable mutex usage of
arch_jump_label_transform_queue() during early_boot_irqs_disabled, just
like we have done for stop_machine.

Reported-by: Conor Dooley <conor@kernel.org>
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Fixes: ab83647fadae ("riscv: Add qspinlock support")
Link: https://lore.kernel.org/linux-riscv/CAJF2gTQwYTGinBmCSgVUoPv0_q4EPt_+WiyfUA1HViAKgUzxAg@mail.gmail.com/T/#mf488e6347817fca03bb93a7d34df33d8615b3775
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
---
 arch/riscv/kernel/jump_label.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

Comments

Guo Ren Nov. 30, 2024, 12:02 p.m. UTC | #1
On Sat, Nov 30, 2024 at 7:54 PM <guoren@kernel.org> wrote:
>
> From: Guo Ren <guoren@linux.alibaba.com>
>
> When CONFIG_RT_MUTEXES=y, mutex_lock->rt_mutex_try_acquire would
Correct: CONFIG_DEBUG_RT_MUTEXES=y

> change from rt_mutex_cmpxchg_acquire to rt_mutex_slowtrylock():
>         raw_spin_lock_irqsave(&lock->wait_lock, flags);
>         ret = __rt_mutex_slowtrylock(lock);
>         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
>
> Because queued_spin_#ops to ticket_#ops is changed one by one by
> jump_label, raw_spin_lock/unlock would cause a deadlock during the
> changing.
>
> That means in arch/riscv/kernel/jump_label.c:
> 1.
> arch_jump_label_transform_queue() ->
> mutex_lock(&text_mutex); +-> raw_spin_lock  -> queued_spin_lock
>                          |-> raw_spin_unlock -> queued_spin_unlock
> patch_insn_write -> change the raw_spin_lock to ticket_lock
> mutex_unlock(&text_mutex);
> ...
>
> 2. /* Dirty the lock value */
> arch_jump_label_transform_queue() ->
> mutex_lock(&text_mutex); +-> raw_spin_lock -> *ticket_lock*
>                          |-> raw_spin_unlock -> *queued_spin_unlock*
>                           /* BUG: ticket_lock with queued_spin_unlock */
> patch_insn_write  ->  change the raw_spin_unlock to ticket_unlock
> mutex_unlock(&text_mutex);
> ...
>
> 3. /* Dead lock */
> arch_jump_label_transform_queue() ->
> mutex_lock(&text_mutex); +-> raw_spin_lock -> ticket_lock /* deadlock! */
>                          |-> raw_spin_unlock -> ticket_unlock
> patch_insn_write -> change other raw_spin_#op -> ticket_#op
> mutex_unlock(&text_mutex);
>
> So, the solution is to disable mutex usage of
> arch_jump_label_transform_queue() during early_boot_irqs_disabled, just
> like we have done for stop_machine.
>
> Reported-by: Conor Dooley <conor@kernel.org>
> Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> Signed-off-by: Guo Ren <guoren@kernel.org>
> Fixes: ab83647fadae ("riscv: Add qspinlock support")
> Link: https://lore.kernel.org/linux-riscv/CAJF2gTQwYTGinBmCSgVUoPv0_q4EPt_+WiyfUA1HViAKgUzxAg@mail.gmail.com/T/#mf488e6347817fca03bb93a7d34df33d8615b3775
> Cc: Palmer Dabbelt <palmer@dabbelt.com>
> Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
>  arch/riscv/kernel/jump_label.c | 12 +++++++++---
>  1 file changed, 9 insertions(+), 3 deletions(-)
>
> diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c
> index 6eee6f736f68..654ed159c830 100644
> --- a/arch/riscv/kernel/jump_label.c
> +++ b/arch/riscv/kernel/jump_label.c
> @@ -36,9 +36,15 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
>                 insn = RISCV_INSN_NOP;
>         }
>
> -       mutex_lock(&text_mutex);
> -       patch_insn_write(addr, &insn, sizeof(insn));
> -       mutex_unlock(&text_mutex);
> +       if (early_boot_irqs_disabled) {
> +               riscv_patch_in_stop_machine = 1;
> +               patch_insn_write(addr, &insn, sizeof(insn));
> +               riscv_patch_in_stop_machine = 0;
> +       } else {
> +               mutex_lock(&text_mutex);
> +               patch_insn_write(addr, &insn, sizeof(insn));
> +               mutex_unlock(&text_mutex);
> +       }
>
>         return true;
>  }
> --
> 2.40.1
>
Guo Ren Nov. 30, 2024, 3:36 p.m. UTC | #2
On Sat, Nov 30, 2024 at 8:02 PM Guo Ren <guoren@kernel.org> wrote:
>
> On Sat, Nov 30, 2024 at 7:54 PM <guoren@kernel.org> wrote:
> >
> > From: Guo Ren <guoren@linux.alibaba.com>
> >
> > When CONFIG_RT_MUTEXES=y, mutex_lock->rt_mutex_try_acquire would
> Correct: CONFIG_DEBUG_RT_MUTEXES=y
Abandoned.

Here is the RESEND PATCH:
https://lore.kernel.org/linux-riscv/20241130153310.3349484-1-guoren@kernel.org/

>
> > change from rt_mutex_cmpxchg_acquire to rt_mutex_slowtrylock():
> >         raw_spin_lock_irqsave(&lock->wait_lock, flags);
> >         ret = __rt_mutex_slowtrylock(lock);
> >         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
> >
> > Because queued_spin_#ops to ticket_#ops is changed one by one by
> > jump_label, raw_spin_lock/unlock would cause a deadlock during the
> > changing.
> >
> > That means in arch/riscv/kernel/jump_label.c:
> > 1.
> > arch_jump_label_transform_queue() ->
> > mutex_lock(&text_mutex); +-> raw_spin_lock  -> queued_spin_lock
> >                          |-> raw_spin_unlock -> queued_spin_unlock
> > patch_insn_write -> change the raw_spin_lock to ticket_lock
> > mutex_unlock(&text_mutex);
> > ...
> >
> > 2. /* Dirty the lock value */
> > arch_jump_label_transform_queue() ->
> > mutex_lock(&text_mutex); +-> raw_spin_lock -> *ticket_lock*
> >                          |-> raw_spin_unlock -> *queued_spin_unlock*
> >                           /* BUG: ticket_lock with queued_spin_unlock */
> > patch_insn_write  ->  change the raw_spin_unlock to ticket_unlock
> > mutex_unlock(&text_mutex);
> > ...
> >
> > 3. /* Dead lock */
> > arch_jump_label_transform_queue() ->
> > mutex_lock(&text_mutex); +-> raw_spin_lock -> ticket_lock /* deadlock! */
> >                          |-> raw_spin_unlock -> ticket_unlock
> > patch_insn_write -> change other raw_spin_#op -> ticket_#op
> > mutex_unlock(&text_mutex);
> >
> > So, the solution is to disable mutex usage of
> > arch_jump_label_transform_queue() during early_boot_irqs_disabled, just
> > like we have done for stop_machine.
> >
> > Reported-by: Conor Dooley <conor@kernel.org>
> > Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> > Signed-off-by: Guo Ren <guoren@kernel.org>
> > Fixes: ab83647fadae ("riscv: Add qspinlock support")
> > Link: https://lore.kernel.org/linux-riscv/CAJF2gTQwYTGinBmCSgVUoPv0_q4EPt_+WiyfUA1HViAKgUzxAg@mail.gmail.com/T/#mf488e6347817fca03bb93a7d34df33d8615b3775
> > Cc: Palmer Dabbelt <palmer@dabbelt.com>
> > Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
> > ---
> >  arch/riscv/kernel/jump_label.c | 12 +++++++++---
> >  1 file changed, 9 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c
> > index 6eee6f736f68..654ed159c830 100644
> > --- a/arch/riscv/kernel/jump_label.c
> > +++ b/arch/riscv/kernel/jump_label.c
> > @@ -36,9 +36,15 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
> >                 insn = RISCV_INSN_NOP;
> >         }
> >
> > -       mutex_lock(&text_mutex);
> > -       patch_insn_write(addr, &insn, sizeof(insn));
> > -       mutex_unlock(&text_mutex);
> > +       if (early_boot_irqs_disabled) {
> > +               riscv_patch_in_stop_machine = 1;
> > +               patch_insn_write(addr, &insn, sizeof(insn));
> > +               riscv_patch_in_stop_machine = 0;
> > +       } else {
> > +               mutex_lock(&text_mutex);
> > +               patch_insn_write(addr, &insn, sizeof(insn));
> > +               mutex_unlock(&text_mutex);
> > +       }
> >
> >         return true;
> >  }
> > --
> > 2.40.1
> >
>
>
> --
> Best Regards
>  Guo Ren
diff mbox series

Patch

diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c
index 6eee6f736f68..654ed159c830 100644
--- a/arch/riscv/kernel/jump_label.c
+++ b/arch/riscv/kernel/jump_label.c
@@ -36,9 +36,15 @@  bool arch_jump_label_transform_queue(struct jump_entry *entry,
 		insn = RISCV_INSN_NOP;
 	}
 
-	mutex_lock(&text_mutex);
-	patch_insn_write(addr, &insn, sizeof(insn));
-	mutex_unlock(&text_mutex);
+	if (early_boot_irqs_disabled) {
+		riscv_patch_in_stop_machine = 1;
+		patch_insn_write(addr, &insn, sizeof(insn));
+		riscv_patch_in_stop_machine = 0;
+	} else {
+		mutex_lock(&text_mutex);
+		patch_insn_write(addr, &insn, sizeof(insn));
+		mutex_unlock(&text_mutex);
+	}
 
 	return true;
 }