diff mbox series

[-next,V17,4/7] riscv: entry: Convert to generic entry

Message ID 20230222033021.983168-5-guoren@kernel.org (mailing list archive)
State Accepted
Commit f0bddf50586da81360627a772be0e355b62f071e
Headers show
Series riscv: Add GENERIC_ENTRY support | expand

Checks

Context Check Description
conchuod/cover_letter success Series has a cover letter
conchuod/tree_selection success Guessed tree name to be for-next
conchuod/fixes_present success Fixes tag not required for -next series
conchuod/maintainers_pattern success MAINTAINERS pattern errors before the patch: 13 and now 13
conchuod/verify_signedoff success Signed-off-by tag matches author and committer
conchuod/kdoc success Errors and warnings before: 0 this patch: 0
conchuod/build_rv64_clang_allmodconfig fail Errors and warnings before: 2471 this patch: 2473
conchuod/module_param success Was 0 now: 0
conchuod/build_rv64_gcc_allmodconfig fail Errors and warnings before: 17333 this patch: 17334
conchuod/alphanumeric_selects warning Out of order selects before the patch: 729 and now 730
conchuod/build_rv32_defconfig success Build OK
conchuod/dtb_warn_rv64 success Errors and warnings before: 2 this patch: 2
conchuod/header_inline success No static functions without inline keyword in header files
conchuod/checkpatch warning CHECK: Alignment should match open parenthesis CHECK: Blank lines aren't necessary before a close brace '}' WARNING: ENOSYS means 'invalid syscall nr' and nothing else WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
conchuod/source_inline success Was 0 now: 0
conchuod/build_rv64_nommu_k210_defconfig success Build OK
conchuod/verify_fixes success No Fixes tag
conchuod/build_rv64_nommu_virt_defconfig success Build OK

Commit Message

Guo Ren Feb. 22, 2023, 3:30 a.m. UTC
From: Guo Ren <guoren@linux.alibaba.com>

This patch converts riscv to use the generic entry infrastructure from
kernel/entry/*. The generic entry makes maintainers' work easier and
codes more elegant. Here are the changes:

 - More clear entry.S with handle_exception and ret_from_exception
 - Get rid of complex custom signal implementation
 - Move syscall procedure from assembly to C, which is much more
   readable.
 - Connect ret_from_fork & ret_from_kernel_thread to generic entry.
 - Wrap with irqentry_enter/exit and syscall_enter/exit_from_user_mode
 - Use the standard preemption code instead of custom

Suggested-by: Huacai Chen <chenhuacai@kernel.org>
Reviewed-by: Björn Töpel <bjorn@rivosinc.com>
Tested-by: Yipeng Zou <zouyipeng@huawei.com>
Tested-by: Jisheng Zhang <jszhang@kernel.org>
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Cc: Ben Hutchings <ben@decadent.org.uk>
---
 arch/riscv/Kconfig                      |   1 +
 arch/riscv/include/asm/asm-prototypes.h |   2 +
 arch/riscv/include/asm/csr.h            |   1 -
 arch/riscv/include/asm/entry-common.h   |  11 ++
 arch/riscv/include/asm/ptrace.h         |  10 +-
 arch/riscv/include/asm/stacktrace.h     |   5 +
 arch/riscv/include/asm/syscall.h        |  21 ++
 arch/riscv/include/asm/thread_info.h    |  13 +-
 arch/riscv/kernel/entry.S               | 242 ++++--------------------
 arch/riscv/kernel/head.h                |   1 -
 arch/riscv/kernel/ptrace.c              |  43 -----
 arch/riscv/kernel/signal.c              |  29 +--
 arch/riscv/kernel/traps.c               | 140 ++++++++++++--
 arch/riscv/mm/fault.c                   |   6 +-
 14 files changed, 210 insertions(+), 315 deletions(-)
 create mode 100644 arch/riscv/include/asm/entry-common.h

Comments

Conor Dooley March 31, 2023, 6:34 p.m. UTC | #1
On Tue, Feb 21, 2023 at 10:30:18PM -0500, guoren@kernel.org wrote:
> From: Guo Ren <guoren@linux.alibaba.com>
> 
> This patch converts riscv to use the generic entry infrastructure from
> kernel/entry/*. The generic entry makes maintainers' work easier and
> codes more elegant. Here are the changes:
> 
>  - More clear entry.S with handle_exception and ret_from_exception
>  - Get rid of complex custom signal implementation
>  - Move syscall procedure from assembly to C, which is much more
>    readable.
>  - Connect ret_from_fork & ret_from_kernel_thread to generic entry.
>  - Wrap with irqentry_enter/exit and syscall_enter/exit_from_user_mode
>  - Use the standard preemption code instead of custom

This has unfortunately broken booting my usual NFS rootfs on both my D1
and Icicle. It's one of the Fedora images from David, I think this one:
http://fedora.riscv.rocks/kojifiles/work/tasks/3933/1313933/

It gets pretty far into things, it's once systemd is operational that
things go pear shaped:

[  OK  ] Mounted Huge Pages File System.
[   70.297439] systemd[1]: Mounted POSIX Message Queue File System.
[  OK  ] Mounted POSIX Message Queue File System.
[   70.453489] systemd[1]: Mounted Kernel Debug File System.
[  OK  ] Mounted Kernel Debug File System.
[   70.516331] systemd[1]: Mounted Kernel Trace File System.
[  OK  ] Mounted Kernel Trace File System.
[   70.679253] systemd[1]: modprobe@configfs.service: Succeeded.
[   70.788400] systemd[1]: Finished Load Kernel Module configfs.
[  OK  ] Finished Load Kernel Module configfs.
[   71.501222] systemd[1]: modprobe@drm.service: Succeeded.
[   71.573295] systemd[1]: Finished Load Kernel Module drm.
[  OK  ] Finished Load Kernel Module drm.
[   71.825934] systemd[1]: modprobe@fuse.service: Succeeded.
[   71.886945] systemd[1]: Finished Load Kernel Module fuse.
[  OK  ] Finished Load Kernel Module fuse.
[   71.991932] systemd[1]: nfs-convert.service: Succeeded.
[   72.034674] systemd[1]: Finished Preprocess NFS configuration convertion.
[  OK  ] Finished Preprocess NFS configuration convertion.
[   72.148778] systemd[1]: systemd-modules-load.service: Main process exited, code=exited, status=1/FAILURE
[   72.256659] systemd[1]: systemd-modules-load.service: Failed with result 'exit-code'.
[   72.337818] systemd[1]: Failed to start Load Kernel Modules.
[FAILED] Failed to start Load Kernel Modules.
See 'systemctl status systemd-modules-load.service' for details.
[   72.410491] systemd[1]: systemd-modules-load.service: Consumed 1.463s CPU time.
[   72.496739] systemd[1]: Condition check resulted in FUSE Control File System being skipped.
[   72.513689] systemd[1]: Condition check resulted in Kernel Configuration File System being skipped.
[   72.682549] systemd[1]: Starting Apply Kernel Variables..
[  OK  ] Finished Apply Kernel Variables.
[   76.314434] systemd[1]: Finished Load/Save Random Seed.
[  OK  ] Finished Load/Save Random Seed.
[***   ] (1 of 6) A start job is running for…p Virtual Console (14s / no limit)
[  OK  ] Finished Create Static Device Nodes in /dev.
[   79.787065] systemd[1]: Started Entropy Daemon based on the HAVEGE algorithm.
[  OK  ] Started Entropy Daemon based on the HAVEGE algorithm.
[   80.186295] systemd[1]: Starting Journal Service...
         Starting Journal Service...
[   80.713508] systemd[1]: Starting Rule-based Manager for Device Events and Files...
         Starting Rule-based Manage…for Device Events and Files...
[  *** ] (2 of 7) A start job is running for… All udev Devices (17s / no limit)
[   82.939347] systemd[1]: systemd-journald.service: Main process exited, code=exited, status=1/FAILURE
[   83.032046] systemd[1]: systemd-journald.service: Failed with result 'exit-code'.
[FAILED] Failed to start Journal Service.
See 'systemctl status systemd-journald.service' for details.
[   83.210041] systemd[1]: Dependency failed for Flush Journal to Persistent Storage.
[DEPEND] Dependency failed for Flus…Journal to Persistent Storage.
[   83.254122] systemd[1]: systemd-journal-flush.service: Job systemd-journal-flush.service/start failed with result 'dependency'.
[   83.272366] systemd[1]: systemd-journald.service: Consumed 1.443s CPU time.
[   83.334360] systemd[1]: systemd-journald.service: Scheduled restart job, restart counter is at 1.
[   83.427839] systemd[1]: Finished Setup Virtual Console.
[  OK  ] Finished Setup Virtual Console.
[   83.510650] systemd[1]: Stopped Journal Service.
[  OK  ] Stopped Journal Service.
[   83.554417] systemd[1]: systemd-journald.service: Consumed 1.443s CPU time.
[   83.576573] systemd[1]: Condition check resulted in Journal Audit Socket being skipped.
[   83.904878] systemd[1]: Starting Journal Service...
         Starting Journal Service...
[   85.752090] systemd[1]: systemd-journald.service: Main process exited, code=exited, status=1/FAILURE
[   85.826421] systemd[1]: systemd-journald.service: Failed with result 'exit-code'.
[   85.876165] systemd[1]: Failed to start Journal Service.
[FAILED] Failed to start Journal Service.
See 'systemctl status systemd-journald.service' for details.
[   85.952221] systemd[1]: systemd-journald.service: Consumed 1.355s CPU time.
[   86.002092] systemd[1]: systemd-journald.service: Scheduled restart job, restart counter is at 2.
[   86.015081] systemd[1]: Stopped Journal Service.
[  OK  ] Stopped Journal Service.
[   86.076429] systemd[1]: systemd-journald.service: Consumed 1.355s CPU time.
[   86.089700] systemd[1]: Condition check resulted in Journal Audit Socket being skipped.
[   86.390162] systemd[1]: Starting Journal Service...
         Starting Journal Service...
[   87.904427] systemd[1]: systemd-journald.service: Main process exited, code=exited, status=1/FAILURE
[   87.950259] systemd[1]: systemd-journald.service: Failed with result 'exit-code'.
[   88.000661] systemd[1]: Failed to start Journal Service.
[FAILED] Failed to start Journal Service.
See 'systemctl status systemd-journald.service' for details.
[   88.079953] systemd[1]: systemd-journald.service: Consumed 1.316s CPU time.
[   88.128956] systemd[1]: systemd-journald.service: Scheduled restart job, restart counter is at 3.
[   88.145365] systemd[1]: Stopped Journal Service.
[  OK  ] Stopped Journal Service.
[   88.189975] systemd[1]: systemd-journald.service: Consumed 1.316s CPU time.
[   88.205799] systemd[1]: Condition check resulted in Journal Audit Socket being skipped.
[   88.514817] systemd[1]: Starting Journal Service...
         Starting Journal Service...

(Note, you need to merge -rc2 into riscv/for-next to actually boot)

Cheers,
Conor.
Conor Dooley March 31, 2023, 6:41 p.m. UTC | #2
On Fri, Mar 31, 2023 at 07:34:38PM +0100, Conor Dooley wrote:
> On Tue, Feb 21, 2023 at 10:30:18PM -0500, guoren@kernel.org wrote:
> > From: Guo Ren <guoren@linux.alibaba.com>
> > 
> > This patch converts riscv to use the generic entry infrastructure from
> > kernel/entry/*. The generic entry makes maintainers' work easier and
> > codes more elegant. Here are the changes:
> > 
> >  - More clear entry.S with handle_exception and ret_from_exception
> >  - Get rid of complex custom signal implementation
> >  - Move syscall procedure from assembly to C, which is much more
> >    readable.
> >  - Connect ret_from_fork & ret_from_kernel_thread to generic entry.
> >  - Wrap with irqentry_enter/exit and syscall_enter/exit_from_user_mode
> >  - Use the standard preemption code instead of custom
> 
> This has unfortunately broken booting my usual NFS rootfs on both my D1
> and Icicle. It's one of the Fedora images from David, I think this one:
> http://fedora.riscv.rocks/kojifiles/work/tasks/3933/1313933/
> 
> It gets pretty far into things, it's once systemd is operational that
> things go pear shaped:

Shoulda said, can share the full logs if required of course, but they're
quite verbose cos systemd etc.

> 
> [  OK  ] Mounted Huge Pages File System.
> [   70.297439] systemd[1]: Mounted POSIX Message Queue File System.
> [  OK  ] Mounted POSIX Message Queue File System.
> [   70.453489] systemd[1]: Mounted Kernel Debug File System.
> [  OK  ] Mounted Kernel Debug File System.
> [   70.516331] systemd[1]: Mounted Kernel Trace File System.
> [  OK  ] Mounted Kernel Trace File System.
> [   70.679253] systemd[1]: modprobe@configfs.service: Succeeded.
> [   70.788400] systemd[1]: Finished Load Kernel Module configfs.
> [  OK  ] Finished Load Kernel Module configfs.
> [   71.501222] systemd[1]: modprobe@drm.service: Succeeded.
> [   71.573295] systemd[1]: Finished Load Kernel Module drm.
> [  OK  ] Finished Load Kernel Module drm.
> [   71.825934] systemd[1]: modprobe@fuse.service: Succeeded.
> [   71.886945] systemd[1]: Finished Load Kernel Module fuse.
> [  OK  ] Finished Load Kernel Module fuse.
> [   71.991932] systemd[1]: nfs-convert.service: Succeeded.
> [   72.034674] systemd[1]: Finished Preprocess NFS configuration convertion.
> [  OK  ] Finished Preprocess NFS configuration convertion.
> [   72.148778] systemd[1]: systemd-modules-load.service: Main process exited, code=exited, status=1/FAILURE
> [   72.256659] systemd[1]: systemd-modules-load.service: Failed with result 'exit-code'.
> [   72.337818] systemd[1]: Failed to start Load Kernel Modules.
> [FAILED] Failed to start Load Kernel Modules.
> See 'systemctl status systemd-modules-load.service' for details.
> [   72.410491] systemd[1]: systemd-modules-load.service: Consumed 1.463s CPU time.
> [   72.496739] systemd[1]: Condition check resulted in FUSE Control File System being skipped.
> [   72.513689] systemd[1]: Condition check resulted in Kernel Configuration File System being skipped.
> [   72.682549] systemd[1]: Starting Apply Kernel Variables..
> [  OK  ] Finished Apply Kernel Variables.
> [   76.314434] systemd[1]: Finished Load/Save Random Seed.
> [  OK  ] Finished Load/Save Random Seed.
> [***   ] (1 of 6) A start job is running for…p Virtual Console (14s / no limit)
> [  OK  ] Finished Create Static Device Nodes in /dev.
> [   79.787065] systemd[1]: Started Entropy Daemon based on the HAVEGE algorithm.
> [  OK  ] Started Entropy Daemon based on the HAVEGE algorithm.
> [   80.186295] systemd[1]: Starting Journal Service...
>          Starting Journal Service...
> [   80.713508] systemd[1]: Starting Rule-based Manager for Device Events and Files...
>          Starting Rule-based Manage…for Device Events and Files...
> [  *** ] (2 of 7) A start job is running for… All udev Devices (17s / no limit)
> [   82.939347] systemd[1]: systemd-journald.service: Main process exited, code=exited, status=1/FAILURE
> [   83.032046] systemd[1]: systemd-journald.service: Failed with result 'exit-code'.
> [FAILED] Failed to start Journal Service.
> See 'systemctl status systemd-journald.service' for details.
> [   83.210041] systemd[1]: Dependency failed for Flush Journal to Persistent Storage.
> [DEPEND] Dependency failed for Flus…Journal to Persistent Storage.
> [   83.254122] systemd[1]: systemd-journal-flush.service: Job systemd-journal-flush.service/start failed with result 'dependency'.
> [   83.272366] systemd[1]: systemd-journald.service: Consumed 1.443s CPU time.
> [   83.334360] systemd[1]: systemd-journald.service: Scheduled restart job, restart counter is at 1.
> [   83.427839] systemd[1]: Finished Setup Virtual Console.
> [  OK  ] Finished Setup Virtual Console.
> [   83.510650] systemd[1]: Stopped Journal Service.
> [  OK  ] Stopped Journal Service.
> [   83.554417] systemd[1]: systemd-journald.service: Consumed 1.443s CPU time.
> [   83.576573] systemd[1]: Condition check resulted in Journal Audit Socket being skipped.
> [   83.904878] systemd[1]: Starting Journal Service...
>          Starting Journal Service...
> [   85.752090] systemd[1]: systemd-journald.service: Main process exited, code=exited, status=1/FAILURE
> [   85.826421] systemd[1]: systemd-journald.service: Failed with result 'exit-code'.
> [   85.876165] systemd[1]: Failed to start Journal Service.
> [FAILED] Failed to start Journal Service.
> See 'systemctl status systemd-journald.service' for details.
> [   85.952221] systemd[1]: systemd-journald.service: Consumed 1.355s CPU time.
> [   86.002092] systemd[1]: systemd-journald.service: Scheduled restart job, restart counter is at 2.
> [   86.015081] systemd[1]: Stopped Journal Service.
> [  OK  ] Stopped Journal Service.
> [   86.076429] systemd[1]: systemd-journald.service: Consumed 1.355s CPU time.
> [   86.089700] systemd[1]: Condition check resulted in Journal Audit Socket being skipped.
> [   86.390162] systemd[1]: Starting Journal Service...
>          Starting Journal Service...
> [   87.904427] systemd[1]: systemd-journald.service: Main process exited, code=exited, status=1/FAILURE
> [   87.950259] systemd[1]: systemd-journald.service: Failed with result 'exit-code'.
> [   88.000661] systemd[1]: Failed to start Journal Service.
> [FAILED] Failed to start Journal Service.
> See 'systemctl status systemd-journald.service' for details.
> [   88.079953] systemd[1]: systemd-journald.service: Consumed 1.316s CPU time.
> [   88.128956] systemd[1]: systemd-journald.service: Scheduled restart job, restart counter is at 3.
> [   88.145365] systemd[1]: Stopped Journal Service.
> [  OK  ] Stopped Journal Service.
> [   88.189975] systemd[1]: systemd-journald.service: Consumed 1.316s CPU time.
> [   88.205799] systemd[1]: Condition check resulted in Journal Audit Socket being skipped.
> [   88.514817] systemd[1]: Starting Journal Service...
>          Starting Journal Service...
> 
> (Note, you need to merge -rc2 into riscv/for-next to actually boot)
> 
> Cheers,
> Conor.
Heiko Stübner March 31, 2023, 6:46 p.m. UTC | #3
Hi,

Am Freitag, 31. März 2023, 20:41:35 CEST schrieb Conor Dooley:
> On Fri, Mar 31, 2023 at 07:34:38PM +0100, Conor Dooley wrote:
> > On Tue, Feb 21, 2023 at 10:30:18PM -0500, guoren@kernel.org wrote:
> > > From: Guo Ren <guoren@linux.alibaba.com>
> > > 
> > > This patch converts riscv to use the generic entry infrastructure from
> > > kernel/entry/*. The generic entry makes maintainers' work easier and
> > > codes more elegant. Here are the changes:
> > > 
> > >  - More clear entry.S with handle_exception and ret_from_exception
> > >  - Get rid of complex custom signal implementation
> > >  - Move syscall procedure from assembly to C, which is much more
> > >    readable.
> > >  - Connect ret_from_fork & ret_from_kernel_thread to generic entry.
> > >  - Wrap with irqentry_enter/exit and syscall_enter/exit_from_user_mode
> > >  - Use the standard preemption code instead of custom
> > 
> > This has unfortunately broken booting my usual NFS rootfs on both my D1
> > and Icicle. It's one of the Fedora images from David, I think this one:
> > http://fedora.riscv.rocks/kojifiles/work/tasks/3933/1313933/
> > 
> > It gets pretty far into things, it's once systemd is operational that
> > things go pear shaped:
> 
> Shoulda said, can share the full logs if required of course, but they're
> quite verbose cos systemd etc.

I was just investigating the same thing just now. So that saves me some
tracking down the culprit :-) .

My main qemu is living as a "board" in my boardfarm (also doing nfsroot)
as well as my d1 nezha with nfsroot was affected.

Though my board is stuck in some failure loop with both the journal- as
well as the timesyncd service failing again and again. And I haven't
figured out how to get logs without a working login console yet.


Heiko


> 
> > 
> > [  OK  ] Mounted Huge Pages File System.
> > [   70.297439] systemd[1]: Mounted POSIX Message Queue File System.
> > [  OK  ] Mounted POSIX Message Queue File System.
> > [   70.453489] systemd[1]: Mounted Kernel Debug File System.
> > [  OK  ] Mounted Kernel Debug File System.
> > [   70.516331] systemd[1]: Mounted Kernel Trace File System.
> > [  OK  ] Mounted Kernel Trace File System.
> > [   70.679253] systemd[1]: modprobe@configfs.service: Succeeded.
> > [   70.788400] systemd[1]: Finished Load Kernel Module configfs.
> > [  OK  ] Finished Load Kernel Module configfs.
> > [   71.501222] systemd[1]: modprobe@drm.service: Succeeded.
> > [   71.573295] systemd[1]: Finished Load Kernel Module drm.
> > [  OK  ] Finished Load Kernel Module drm.
> > [   71.825934] systemd[1]: modprobe@fuse.service: Succeeded.
> > [   71.886945] systemd[1]: Finished Load Kernel Module fuse.
> > [  OK  ] Finished Load Kernel Module fuse.
> > [   71.991932] systemd[1]: nfs-convert.service: Succeeded.
> > [   72.034674] systemd[1]: Finished Preprocess NFS configuration convertion.
> > [  OK  ] Finished Preprocess NFS configuration convertion.
> > [   72.148778] systemd[1]: systemd-modules-load.service: Main process exited, code=exited, status=1/FAILURE
> > [   72.256659] systemd[1]: systemd-modules-load.service: Failed with result 'exit-code'.
> > [   72.337818] systemd[1]: Failed to start Load Kernel Modules.
> > [FAILED] Failed to start Load Kernel Modules.
> > See 'systemctl status systemd-modules-load.service' for details.
> > [   72.410491] systemd[1]: systemd-modules-load.service: Consumed 1.463s CPU time.
> > [   72.496739] systemd[1]: Condition check resulted in FUSE Control File System being skipped.
> > [   72.513689] systemd[1]: Condition check resulted in Kernel Configuration File System being skipped.
> > [   72.682549] systemd[1]: Starting Apply Kernel Variables..
> > [  OK  ] Finished Apply Kernel Variables.
> > [   76.314434] systemd[1]: Finished Load/Save Random Seed.
> > [  OK  ] Finished Load/Save Random Seed.
> > [***   ] (1 of 6) A start job is running for…p Virtual Console (14s / no limit)
> > [  OK  ] Finished Create Static Device Nodes in /dev.
> > [   79.787065] systemd[1]: Started Entropy Daemon based on the HAVEGE algorithm.
> > [  OK  ] Started Entropy Daemon based on the HAVEGE algorithm.
> > [   80.186295] systemd[1]: Starting Journal Service...
> >          Starting Journal Service...
> > [   80.713508] systemd[1]: Starting Rule-based Manager for Device Events and Files...
> >          Starting Rule-based Manage…for Device Events and Files...
> > [  *** ] (2 of 7) A start job is running for… All udev Devices (17s / no limit)
> > [   82.939347] systemd[1]: systemd-journald.service: Main process exited, code=exited, status=1/FAILURE
> > [   83.032046] systemd[1]: systemd-journald.service: Failed with result 'exit-code'.
> > [FAILED] Failed to start Journal Service.
> > See 'systemctl status systemd-journald.service' for details.
> > [   83.210041] systemd[1]: Dependency failed for Flush Journal to Persistent Storage.
> > [DEPEND] Dependency failed for Flus…Journal to Persistent Storage.
> > [   83.254122] systemd[1]: systemd-journal-flush.service: Job systemd-journal-flush.service/start failed with result 'dependency'.
> > [   83.272366] systemd[1]: systemd-journald.service: Consumed 1.443s CPU time.
> > [   83.334360] systemd[1]: systemd-journald.service: Scheduled restart job, restart counter is at 1.
> > [   83.427839] systemd[1]: Finished Setup Virtual Console.
> > [  OK  ] Finished Setup Virtual Console.
> > [   83.510650] systemd[1]: Stopped Journal Service.
> > [  OK  ] Stopped Journal Service.
> > [   83.554417] systemd[1]: systemd-journald.service: Consumed 1.443s CPU time.
> > [   83.576573] systemd[1]: Condition check resulted in Journal Audit Socket being skipped.
> > [   83.904878] systemd[1]: Starting Journal Service...
> >          Starting Journal Service...
> > [   85.752090] systemd[1]: systemd-journald.service: Main process exited, code=exited, status=1/FAILURE
> > [   85.826421] systemd[1]: systemd-journald.service: Failed with result 'exit-code'.
> > [   85.876165] systemd[1]: Failed to start Journal Service.
> > [FAILED] Failed to start Journal Service.
> > See 'systemctl status systemd-journald.service' for details.
> > [   85.952221] systemd[1]: systemd-journald.service: Consumed 1.355s CPU time.
> > [   86.002092] systemd[1]: systemd-journald.service: Scheduled restart job, restart counter is at 2.
> > [   86.015081] systemd[1]: Stopped Journal Service.
> > [  OK  ] Stopped Journal Service.
> > [   86.076429] systemd[1]: systemd-journald.service: Consumed 1.355s CPU time.
> > [   86.089700] systemd[1]: Condition check resulted in Journal Audit Socket being skipped.
> > [   86.390162] systemd[1]: Starting Journal Service...
> >          Starting Journal Service...
> > [   87.904427] systemd[1]: systemd-journald.service: Main process exited, code=exited, status=1/FAILURE
> > [   87.950259] systemd[1]: systemd-journald.service: Failed with result 'exit-code'.
> > [   88.000661] systemd[1]: Failed to start Journal Service.
> > [FAILED] Failed to start Journal Service.
> > See 'systemctl status systemd-journald.service' for details.
> > [   88.079953] systemd[1]: systemd-journald.service: Consumed 1.316s CPU time.
> > [   88.128956] systemd[1]: systemd-journald.service: Scheduled restart job, restart counter is at 3.
> > [   88.145365] systemd[1]: Stopped Journal Service.
> > [  OK  ] Stopped Journal Service.
> > [   88.189975] systemd[1]: systemd-journald.service: Consumed 1.316s CPU time.
> > [   88.205799] systemd[1]: Condition check resulted in Journal Audit Socket being skipped.
> > [   88.514817] systemd[1]: Starting Journal Service...
> >          Starting Journal Service...
> > 
> > (Note, you need to merge -rc2 into riscv/for-next to actually boot)
> > 
> > Cheers,
> > Conor.
> 
> 
>
Conor Dooley March 31, 2023, 6:55 p.m. UTC | #4
On Fri, Mar 31, 2023 at 08:46:44PM +0200, Heiko Stübner wrote:
> Hi,
> 
> Am Freitag, 31. März 2023, 20:41:35 CEST schrieb Conor Dooley:
> > On Fri, Mar 31, 2023 at 07:34:38PM +0100, Conor Dooley wrote:
> > > On Tue, Feb 21, 2023 at 10:30:18PM -0500, guoren@kernel.org wrote:
> > > > From: Guo Ren <guoren@linux.alibaba.com>
> > > > 
> > > > This patch converts riscv to use the generic entry infrastructure from
> > > > kernel/entry/*. The generic entry makes maintainers' work easier and
> > > > codes more elegant. Here are the changes:
> > > > 
> > > >  - More clear entry.S with handle_exception and ret_from_exception
> > > >  - Get rid of complex custom signal implementation
> > > >  - Move syscall procedure from assembly to C, which is much more
> > > >    readable.
> > > >  - Connect ret_from_fork & ret_from_kernel_thread to generic entry.
> > > >  - Wrap with irqentry_enter/exit and syscall_enter/exit_from_user_mode
> > > >  - Use the standard preemption code instead of custom
> > > 
> > > This has unfortunately broken booting my usual NFS rootfs on both my D1
> > > and Icicle. It's one of the Fedora images from David, I think this one:
> > > http://fedora.riscv.rocks/kojifiles/work/tasks/3933/1313933/
> > > 
> > > It gets pretty far into things, it's once systemd is operational that
> > > things go pear shaped:
> > 
> > Shoulda said, can share the full logs if required of course, but they're
> > quite verbose cos systemd etc.
> 
> I was just investigating the same thing just now. So that saves me some
> tracking down the culprit :-) .
> 
> My main qemu is living as a "board" in my boardfarm (also doing nfsroot)
> as well as my d1 nezha with nfsroot was affected.
> 
> Though my board is stuck in some failure loop with both the journal- as
> well as the timesyncd service failing again and again. And I haven't
> figured out how to get logs without a working login console yet.

I'll attach the full output from a run I guess. journald fails ad
infinitum for me too after I cut this log off.

Cheers,
Conor.
[    0.000000] Linux version 6.3.0-rc2-gd5e0396cf8bf-dirty (conor@spud) (ClangBuiltLinux clang version 15.0.7 (/stuff/brsdk/llvm/clang 8dfdcc7b7bf66834a761bd8de445840ef68e4d1a), ClangBuiltLinux LLD 15.0.7) #1 SMP PREEMPT @7
[    0.000000] OF: fdt: Ignoring memory range 0x80000000 - 0x80200000
[    0.000000] Machine model: Microchip PolarFire-SoC Icicle Kit
[    0.000000] earlycon: ns16550a0 at MMIO32 0x0000000020000000 (options '115200n8')
[    0.000000] printk: bootconsole [ns16550a0] enabled
[    0.000000] efi: UEFI not found.
[    0.000000] OF: fdt: Reserved memory: failed to reserve memory for node 'region@BFC00000': base 0x00000000bfc00000, size 4 MiB
[    0.000000] OF: reserved mem: 0x00000000bfc00000..0x00000000bfffffff (4096 KiB) nomap non-reusable region@BFC00000
[    0.000000] Zone ranges:
[    0.000000]   DMA32    [mem 0x0000000080200000-0x00000000bfffffff]
[    0.000000]   Normal   empty
[    0.000000] Movable zone start for each node
[    0.000000] Early memory node ranges
[    0.000000]   node   0: [mem 0x0000000080200000-0x00000000bfffffff]
[    0.000000] Initmem setup node 0 [mem 0x0000000080200000-0x00000000bfffffff]
[    0.000000] On node 0, zone DMA32: 512 pages in unavailable ranges
[    0.000000] SBI specification v0.3 detected
[    0.000000] SBI implementation ID=0x1 Version=0x10000
[    0.000000] SBI TIME extension detected
[    0.000000] SBI IPI extension detected
[    0.000000] SBI RFENCE extension detected
[    0.000000] SBI SRST extension detected
[    0.000000] SBI HSM extension detected
[    0.000000] CPU with hartid=0 is not available
[    0.000000] CPU with hartid=0 is not available
[    0.000000] CPU with hartid=0 is not available
[    0.000000] riscv: base ISA extensions acdfim
[    0.000000] riscv: ELF capabilities acdfim
[    0.000000] percpu: Embedded 29 pages/cpu s79648 r8192 d30944 u118784
[    0.000000] Built 1 zonelists, mobility grouping on.  Total pages: 258055
[    0.000000] Kernel command line: root=/dev/nfs rw ip=dhcp nfsroot=99.99.99.5:/stuff/nfs_share,tcp,v3 rdinit=/usr/sbin/init rootwait=10 earlycon
[    0.000000] Unknown kernel command line parameters "rootwait=10", will be passed to user space.
[    0.000000] Dentry cache hash table entries: 131072 (order: 8, 1048576 bytes, linear)
[    0.000000] Inode-cache hash table entries: 65536 (order: 7, 524288 bytes, linear)
[    0.000000] mem auto-init: stack:all(zero), heap alloc:off, heap free:off
[    0.000000] stackdepot: allocating hash table via alloc_large_system_hash
[    0.000000] stackdepot hash table entries: 1048576 (order: 11, 8388608 bytes, linear)
[    0.000000] Virtual kernel memory layout:
[    0.000000]       fixmap : 0xffffffc6fee00000 - 0xffffffc6ff000000   (2048 kB)
[    0.000000]       pci io : 0xffffffc6ff000000 - 0xffffffc700000000   (  16 MB)
[    0.000000]      vmemmap : 0xffffffc700000000 - 0xffffffc800000000   (4096 MB)
[    0.000000]      vmalloc : 0xffffffc800000000 - 0xffffffd800000000   (  64 GB)
[    0.000000]      modules : 0xffffffff0305f000 - 0xffffffff80000000   (1999 MB)
[    0.000000]       lowmem : 0xffffffd800000000 - 0xffffffd83fe00000   (1022 MB)
[    0.000000]        kasan : 0xfffffff700000000 - 0xffffffff00000000   (  32 GB)
[    0.000000]       kernel : 0xffffffff80000000 - 0xffffffffffffffff   (2047 MB)
[    0.000000] Memory: 545616K/1046528K available (16518K kernel code, 8042K rwdata, 8192K rodata, 2303K init, 12559K bss, 500912K reserved, 0K cma-reserved)
[    0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=4, Nodes=1
[    0.000000] trace event string verifier disabled
[    0.000000] Running RCU self tests
[    0.000000] Running RCU synchronous self tests
[    0.000000] rcu: Preemptible hierarchical RCU implementation.
[    0.000000] rcu: 	RCU lockdep checking is enabled.
[    0.000000] rcu: 	RCU restricting CPUs from NR_CPUS=64 to nr_cpu_ids=4.
[    0.000000] rcu: 	RCU debug extended QS entry/exit.
[    0.000000] 	Trampoline variant of Tasks RCU enabled.
[    0.000000] 	Tracing variant of Tasks RCU enabled.
[    0.000000] rcu: RCU calculated value of scheduler-enlistment delay is 25 jiffies.
[    0.000000] rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=4
[    0.000000] Running RCU synchronous self tests
[    0.000000] NR_IRQS: 64, nr_irqs: 64, preallocated irqs: 0
[    0.000000] CPU with hartid=0 is not available
[    0.000000] riscv-intc: unable to find hart id for /cpus/cpu@0/interrupt-controller
[    0.000000] riscv-intc: 64 local interrupts mapped
[    0.000000] plic: interrupt-controller@c000000: mapped 186 interrupts with 4 handlers for 9 contexts.
[    0.000000] rcu: srcu_init: Setting srcu_struct sizes based on contention.
[    0.000000] riscv-timer: riscv_timer_init_dt: Registering clocksource cpuid [0] hartid [1]
[    0.000000] clocksource: riscv_clocksource: mask: 0xffffffffffffffff max_cycles: 0x1d854df40, max_idle_ns: 3526361616960 ns
[    0.000006] sched_clock: 64 bits at 1000kHz, resolution 1000ns, wraps every 2199023255500ns
[    0.015372] Console: colour dummy device 80x25
[    0.020991] printk: console [tty0] enabled
[    0.026672] printk: bootconsole [ns16550a0] disabled
[    0.000000] Linux version 6.3.0-rc2-gd5e0396cf8bf-dirty (conor@spud) (ClangBuiltLinux clang version 15.0.7 (/stuff/brsdk/llvm/clang 8dfdcc7b7bf66834a761bd8de445840ef68e4d1a), ClangBuiltLinux LLD 15.0.7) #1 SMP PREEMPT @7
[    0.000000] OF: fdt: Ignoring memory range 0x80000000 - 0x80200000
[    0.000000] Machine model: Microchip PolarFire-SoC Icicle Kit
[    0.000000] earlycon: ns16550a0 at MMIO32 0x0000000020000000 (options '115200n8')
[    0.000000] printk: bootconsole [ns16550a0] enabled
[    0.000000] efi: UEFI not found.
[    0.000000] OF: fdt: Reserved memory: failed to reserve memory for node 'region@BFC00000': base 0x00000000bfc00000, size 4 MiB
[    0.000000] OF: reserved mem: 0x00000000bfc00000..0x00000000bfffffff (4096 KiB) nomap non-reusable region@BFC00000
[    0.000000] Zone ranges:
[    0.000000]   DMA32    [mem 0x0000000080200000-0x00000000bfffffff]
[    0.000000]   Normal   empty
[    0.000000] Movable zone start for each node
[    0.000000] Early memory node ranges
[    0.000000]   node   0: [mem 0x0000000080200000-0x00000000bfffffff]
[    0.000000] Initmem setup node 0 [mem 0x0000000080200000-0x00000000bfffffff]
[    0.000000] On node 0, zone DMA32: 512 pages in unavailable ranges
[    0.000000] SBI specification v0.3 detected
[    0.000000] SBI implementation ID=0x1 Version=0x10000
[    0.000000] SBI TIME extension detected
[    0.000000] SBI IPI extension detected
[    0.000000] SBI RFENCE extension detected
[    0.000000] SBI SRST extension detected
[    0.000000] SBI HSM extension detected
[    0.000000] CPU with hartid=0 is not available
[    0.000000] CPU with hartid=0 is not available
[    0.000000] CPU with hartid=0 is not available
[    0.000000] riscv: base ISA extensions acdfim
[    0.000000] riscv: ELF capabilities acdfim
[    0.000000] percpu: Embedded 29 pages/cpu s79648 r8192 d30944 u118784
[    0.000000] Built 1 zonelists, mobility grouping on.  Total pages: 258055
[    0.000000] Kernel command line: root=/dev/nfs rw ip=dhcp nfsroot=99.99.99.5:/stuff/nfs_share,tcp,v3 rdinit=/usr/sbin/init rootwait=10 earlycon
[    0.000000] Unknown kernel command line parameters "rootwait=10", will be passed to user space.
[    0.000000] Dentry cache hash table entries: 131072 (order: 8, 1048576 bytes, linear)
[    0.000000] Inode-cache hash table entries: 65536 (order: 7, 524288 bytes, linear)
[    0.000000] mem auto-init: stack:all(zero), heap alloc:off, heap free:off
[    0.000000] stackdepot: allocating hash table via alloc_large_system_hash
[    0.000000] stackdepot hash table entries: 1048576 (order: 11, 8388608 bytes, linear)
[    0.000000] Virtual kernel memory layout:
[    0.000000]       fixmap : 0xffffffc6fee00000 - 0xffffffc6ff000000   (2048 kB)
[    0.000000]       pci io : 0xffffffc6ff000000 - 0xffffffc700000000   (  16 MB)
[    0.000000]      vmemmap : 0xffffffc700000000 - 0xffffffc800000000   (4096 MB)
[    0.000000]      vmalloc : 0xffffffc800000000 - 0xffffffd800000000   (  64 GB)
[    0.000000]      modules : 0xffffffff0305f000 - 0xffffffff80000000   (1999 MB)
[    0.000000]       lowmem : 0xffffffd800000000 - 0xffffffd83fe00000   (1022 MB)
[    0.000000]        kasan : 0xfffffff700000000 - 0xffffffff00000000   (  32 GB)
[    0.000000]       kernel : 0xffffffff80000000 - 0xffffffffffffffff   (2047 MB)
[    0.000000] Memory: 545616K/1046528K available (16518K kernel code, 8042K rwdata, 8192K rodata, 2303K init, 12559K bss, 500912K reserved, 0K cma-reserved)
[    0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=4, Nodes=1
[    0.000000] trace event string verifier disabled
[    0.000000] Running RCU self tests
[    0.000000] Running RCU synchronous self tests
[    0.000000] rcu: Preemptible hierarchical RCU implementation.
[    0.000000] rcu: 	RCU lockdep checking is enabled.
[    0.000000] rcu: 	RCU restricting CPUs from NR_CPUS=64 to nr_cpu_ids=4.
[    0.000000] rcu: 	RCU debug extended QS entry/exit.
[    0.000000] 	Trampoline variant of Tasks RCU enabled.
[    0.000000] 	Tracing variant of Tasks RCU enabled.
[    0.000000] rcu: RCU calculated value of scheduler-enlistment delay is 25 jiffies.
[    0.000000] rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=4
[    0.000000] Running RCU synchronous self tests
[    0.000000] NR_IRQS: 64, nr_irqs: 64, preallocated irqs: 0
[    0.000000] CPU with hartid=0 is not available
[    0.000000] riscv-intc: unable to find hart id for /cpus/cpu@0/interrupt-controller
[    0.000000] riscv-intc: 64 local interrupts mapped
[    0.000000] plic: interrupt-controller@c000000: mapped 186 interrupts with 4 handlers for 9 contexts.
[    0.000000] rcu: srcu_init: Setting srcu_struct sizes based on contention.
[    0.000000] riscv-timer: riscv_timer_init_dt: Registering clocksource cpuid [0] hartid [1]
[    0.000000] clocksource: riscv_clocksource: mask: 0xffffffffffffffff max_cycles: 0x1d854df40, max_idle_ns: 3526361616960 ns
[    0.000006] sched_clock: 64 bits at 1000kHz, resolution 1000ns, wraps every 2199023255500ns
[    0.015372] Console: colour dummy device 80x25
[    0.020991] printk: console [tty0] enabled
[    0.026672] printk: bootconsole [ns16550a0] disabled
[    0.033749] Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar
[    0.035065] ... MAX_LOCKDEP_SUBCLASSES:  8
[    0.035844] ... MAX_LOCK_DEPTH:          48
[    0.036633] ... MAX_LOCKDEP_KEYS:        8192
[    0.037658] ... CLASSHASH_SIZE:          4096
[    0.038478] ... MAX_LOCKDEP_ENTRIES:     32768
[    0.039307] ... MAX_LOCKDEP_CHAINS:      65536
[    0.040135] ... CHAINHASH_SIZE:          32768
[    0.040963]  memory used by lock dependency info: 6365 kB
[    0.042130]  memory used for stack traces: 4224 kB
[    0.043012]  per task-struct memory footprint: 1920 bytes
[    0.044452] Calibrating delay loop (skipped), value calculated using timer frequency.. 2.00 BogoMIPS (lpj=4000)
[    0.046447] pid_max: default: 32768 minimum: 301
[    0.052917] Mount-cache hash table entries: 2048 (order: 2, 16384 bytes, linear)
[    0.054369] Mountpoint-cache hash table entries: 2048 (order: 2, 16384 bytes, linear)
[    0.084506] Running RCU synchronous self tests
[    0.085686] Running RCU synchronous self tests
[    0.095405] CPU node for /cpus/cpu@0 exist but the possible cpu range is :0-3
[    0.130236] cblist_init_generic: Setting adjustable number of callback queues.
[    0.133050] cblist_init_generic: Setting shift to 2 and lim to 1.
[    0.137669] cblist_init_generic: Setting shift to 2 and lim to 1.
[    0.142192] Running RCU-tasks wait API self tests
[    0.263638] riscv: ELF compat mode unsupported
[    0.263807] ASID allocator disabled (0 bits)
[    0.270226] Callback from call_rcu_tasks_trace() invoked.
[    0.272903] rcu: Hierarchical SRCU implementation.
[    0.274289] rcu: 	Max phase no-delay instances is 1000.
[    0.302638] EFI services will not be available.
[    0.315637] smp: Bringing up secondary CPUs ...
[    0.396166] smp: Brought up 1 node, 4 CPUs
[    0.422047] devtmpfs: initialized
[    0.490692] Callback from call_rcu_tasks() invoked.
[    0.681118] Running RCU synchronous self tests
[    0.682861] Running RCU synchronous self tests
[    0.691211] clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645041785100000 ns
[    0.693981] futex hash table entries: 1024 (order: 5, 131072 bytes, linear)
[    0.702882] pinctrl core: initialized pinctrl subsystem
[    0.744067] NET: Registered PF_NETLINK/PF_ROUTE protocol family
[    0.758314] DMA: preallocated 128 KiB GFP_KERNEL pool for atomic allocations
[    0.761255] DMA: preallocated 128 KiB GFP_KERNEL|GFP_DMA32 pool for atomic allocations
[    0.795383] cpuidle: using governor menu
[    1.272427] HugeTLB: registered 2.00 MiB page size, pre-allocated 0 pages
[    1.274292] HugeTLB: 0 KiB vmemmap can be freed for a 2.00 MiB page
[    1.411012] SCSI subsystem initialized
[    1.431885] usbcore: registered new interface driver usbfs
[    1.435740] usbcore: registered new interface driver hub
[    1.439351] usbcore: registered new device driver usb
[    1.462261] FPGA manager framework
[    1.512290] vgaarb: loaded
[    1.519890] clocksource: Switched to clocksource riscv_clocksource
[    2.009122] NET: Registered PF_INET protocol family
[    2.016053] IP idents hash table entries: 16384 (order: 5, 131072 bytes, linear)
[    2.052865] tcp_listen_portaddr_hash hash table entries: 512 (order: 3, 36864 bytes, linear)
[    2.056616] Table-perturb hash table entries: 65536 (order: 6, 262144 bytes, linear)
[    2.059500] TCP established hash table entries: 8192 (order: 4, 65536 bytes, linear)
[    2.072933] TCP bind hash table entries: 8192 (order: 8, 1179648 bytes, linear)
[    2.113517] TCP: Hash tables configured (established 8192 bind 8192)
[    2.120319] UDP hash table entries: 512 (order: 4, 81920 bytes, linear)
[    2.125390] UDP-Lite hash table entries: 512 (order: 4, 81920 bytes, linear)
[    2.136344] NET: Registered PF_UNIX/PF_LOCAL protocol family
[    2.161084] RPC: Registered named UNIX socket transport module.
[    2.162843] RPC: Registered udp transport module.
[    2.164402] RPC: Registered tcp transport module.
[    2.165631] RPC: Registered tcp NFSv4.1 backchannel transport module.
[    2.167599] PCI: CLS 0 bytes, default 64
[    2.190148] Unpacking initramfs...
[    2.251957] workingset: timestamp_bits=62 max_order=18 bucket_order=0
[    2.296023] NFS: Registering the id_resolver key type
[    2.299423] Key type id_resolver registered
[    2.300826] Key type id_legacy registered
[    2.303490] nfs4filelayout_init: NFSv4 File Layout Driver Registering...
[    2.305408] nfs4flexfilelayout_init: NFSv4 Flexfile Layout Driver Registering...
[    2.314304] 9p: Installing v9fs 9p2000 file system support
[    2.330388] NET: Registered PF_ALG protocol family
[    2.335558] Block layer SCSI generic (bsg) driver version 0.4 loaded (major 246)
[    2.337681] io scheduler mq-deadline registered
[    2.338886] io scheduler kyber registered
[    2.341159] io scheduler bfq registered
[   13.013052] String selftests succeeded
[   13.014173] test_string_helpers: Running tests...
[   13.386804] CCACHE: DataError @ 0x00000000.0807FFF8
[   13.391444] CCACHE: DataFail @ 0x00000000.0807FFF0
[   13.397053] CCACHE: 4 banks, 16 ways, sets/bank=512, bytes/block=64
[   13.398602] CCACHE: Index of the largest way enabled: 11
[   16.037879] Serial: 8250/16550 driver, 4 ports, IRQ sharing disabled
[   16.149343] 20000000.serial: ttyS0 at MMIO 0x20000000 (irq = 15, base_baud = 9375000) is a 16550A
[   16.156089] printk: console [ttyS0] enabled
[   17.265279] 20100000.serial: ttyS1 at MMIO 0x20100000 (irq = 16, base_baud = 9375000) is a 16550A
[   17.327497] 20102000.serial: ttyS2 at MMIO 0x20102000 (irq = 17, base_baud = 9375000) is a 16550A
[   17.389076] 20104000.serial: ttyS3 at MMIO 0x20104000 (irq = 18, base_baud = 9375000) is a 16550A
[   17.426517] of_serial: probe of 20106000.serial failed with error -28
[   17.916595] loop: module loaded
[   17.964831] zram: Added device: zram0
[   18.086264] microchip-corespi 20108000.spi: Registered SPI controller 0
[   18.121095] microchip-corespi 20109000.spi: Registered SPI controller 1
[   18.203919] spi-nor spi3.0: w25q128 (16384 Kbytes)
[   21.169496] Freeing initrd memory: 15668K
[   21.395452] macb 20110000.ethernet eth0: Cadence GEM rev 0x0107010c at 0x20110000 irq 23 (00:04:a3:41:d0:fd)
[   21.412679] e1000e: Intel(R) PRO/1000 Network Driver
[   21.419777] e1000e: Copyright(c) 1999 - 2015 Intel Corporation.
[   21.460117] usbcore: registered new interface driver uas
[   21.469638] usbcore: registered new interface driver usb-storage
[   21.524504] musb-hdrc musb-hdrc.1.auto: MUSB HDRC host driver
[   21.540183] musb-hdrc musb-hdrc.1.auto: new USB bus registered, assigned bus number 1
[   21.610837] hub 1-0:1.0: USB hub found
[   21.621142] hub 1-0:1.0: 1 port detected
[   21.680372] mpfs-musb 20201000.usb: Registered MPFS MUSB driver
[   21.702517] mousedev: PS/2 mouse device common for all mice
[   21.724021] i2c_dev: i2c /dev entries driver
[   21.763554] microchip-corei2c 2010a000.i2c: registered CoreI2C bus driver
[   21.797752] microchip-corei2c 2010b000.i2c: registered CoreI2C bus driver
[   21.852615] sdhci: Secure Digital Host Controller Interface driver
[   21.860773] sdhci: Copyright(c) Pierre Ossman
[   21.868727] sdhci-pltfm: SDHCI platform and OF driver helper
[   21.895884] usbcore: registered new interface driver usbhid
[   21.902221] usbhid: USB HID core driver
[   21.936265] mpfs-mailbox 37020000.mailbox: Registered MPFS mailbox controller driver
[   21.966319] riscv-pmu-sbi: SBI PMU extension is available
[   21.973922] riscv-pmu-sbi: 15 firmware and 4 hardware counters
[   21.981727] riscv-pmu-sbi: Perf sampling/filtering is not supported as sscof extension is not available
[   21.996006] mmc0: SDHCI controller on 20008000.mmc [20008000.mmc] using ADMA 64-bit
[   22.032435] NET: Registered PF_INET6 protocol family
[   22.080829] Segment Routing with IPv6
[   22.087604] In-situ OAM (IOAM) with IPv6
[   22.094555] sit: IPv6, IPv4 and MPLS over IPv4 tunneling driver
[   22.132241] NET: Registered PF_PACKET protocol family
[   22.143837] 9pnet: Installing 9P2000 support
[   22.145445] mmc0: new HS200 MMC card at address 0001
[   22.160350] Key type dns_resolver registered
[   22.198172] mmcblk0: mmc0:0001 TB2916 14.6 GiB 
[   22.359675] mmcblk0boot0: mmc0:0001 TB2916 4.00 MiB 
[   22.472842] mmcblk0boot1: mmc0:0001 TB2916 4.00 MiB 
[   22.574290] mmcblk0rpmb: mmc0:0001 TB2916 4.00 MiB, chardev (242:0)
[   23.619821] debug_vm_pgtable: [debug_vm_pgtable         ]: Validating architecture page table helpers
[   23.910330] mpfs-sys-controller syscontroller: Registered MPFS system controller
[   23.955403] random: crng init done
[   23.961736] mpfs-rng mpfs-rng: Registered MPFS hwrng
[   24.040123] macb 20110000.ethernet eth0: PHY [20110000.ethernet-ffffffff:00] driver [RTL8211F Gigabit Ethernet] (irq=POLL)
[   24.055924] macb 20110000.ethernet eth0: configuring for phy/sgmii link mode
[   28.254478] macb 20110000.ethernet eth0: Link is Up - 1Gbps/Full - flow control off
[   28.266341] IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
[   28.275892] Sending DHCP requests ., OK
[   28.306175] IP-Config: Got DHCP answer from 99.99.99.1, my address is 99.99.99.97
[   28.316744] IP-Config: Complete:
[   28.321357]      device=eth0, hwaddr=00:04:a3:41:d0:fd, ipaddr=99.99.99.97, mask=255.255.255.0, gw=99.99.99.1
[   28.334209]      host=99.99.99.97, domain=, nis-domain=(none)
[   28.341874]      bootserver=99.99.99.1, rootserver=99.99.99.5, rootpath=
[   28.342051]      nameserver0=99.99.99.1
[   28.684402] VFS: Mounted root (nfs filesystem) on device 0:16.
[   28.708272] devtmpfs: mounted
[   28.747831] Freeing unused kernel image (initmem) memory: 2300K
[   28.757755] Run /sbin/init as init process
[   39.469486] systemd[1]: System time before build time, advancing clock.
[   41.382394] systemd[1]: systemd v246.15-1.0.riscv64.fc33 running in system mode. (+PAM +AUDIT +SELINUX +IMA -APPARMOR +SMACK +SYSVINIT +UTMP +LIBCRYPTSETUP +GCRYPT +GNUTLS +ACL +XZ +LZ4 +ZSTD +SECCOMP +BLKID +ELFUTILS +KMOD +IDN2 -IDN +PCRE2 default-hierarchy=unified)
[   41.432906] systemd[1]: Detected architecture riscv64.

Welcome to Fedora 33 (Rawhide)!

[   41.587681] systemd[1]: Set hostname to <fedora-riscv>.
[   50.761418] systemd-sysv-generator[95]: SysV service '/etc/rc.d/init.d/livesys' lacks a native systemd unit file. Automatically generating a unit file for compatibility. Please update package to include a native systemd unit file, in order to make it more safe and robust.
[   51.527913] zram_generator::generator[97]: Creating dev-zram0.swap for /dev/zram0 (275MB)
[   57.964444] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-pre-udev.service:27: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
[   57.993549] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-pre-udev.service:28: Standard output type syslog+console is obsolete, automatically updating to journal+console. Please update your unit file, and consider removing the setting altogether.
[   58.053419] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-pre-trigger.service:23: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
[   58.081477] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-pre-trigger.service:24: Standard output type syslog+console is obsolete, automatically updating to journal+console. Please update your unit file, and consider removing the setting altogether.
[   58.145535] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-pre-pivot.service:30: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
[   58.174372] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-pre-pivot.service:31: Standard output type syslog+console is obsolete, automatically updating to journal+console. Please update your unit file, and consider removing the setting altogether.
[   58.447602] systemd[1]: /usr/lib/systemd/system/gssproxy.service:13: PIDFile= references a path below legacy directory /var/run/, updating /var/run/gssproxy.pid → /run/gssproxy.pid; please update the unit file accordingly.
[   59.596376] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-pre-mount.service:22: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
[   59.624034] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-pre-mount.service:23: Standard output type syslog+console is obsolete, automatically updating to journal+console. Please update your unit file, and consider removing the setting altogether.
[   59.682316] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-mount.service:22: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
[   59.709969] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-mount.service:23: Standard output type syslog+console is obsolete, automatically updating to journal+console. Please update your unit file, and consider removing the setting altogether.
[   59.766799] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-initqueue.service:24: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
[   59.795713] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-initqueue.service:25: Standard output type syslog+console is obsolete, automatically updating to journal+console. Please update your unit file, and consider removing the setting altogether.
[   59.854333] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-cmdline.service:26: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
[   59.882177] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-cmdline.service:27: Standard output type syslog+console is obsolete, automatically updating to journal+console. Please update your unit file, and consider removing the setting altogether.
[   63.677176] systemd[1]: /usr/lib/systemd/system/ip6tables.service:14: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
[   63.702465] systemd[1]: /usr/lib/systemd/system/ip6tables.service:15: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
[   63.755847] systemd[1]: /usr/lib/systemd/system/iptables.service:14: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
[   63.780923] systemd[1]: /usr/lib/systemd/system/iptables.service:15: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
[   64.176567] systemd[1]: Queued start job for default target Graphical Interface.
[   64.280125] systemd[1]: Created slice Slice /system/getty.
[  OK  ] Created slice Slice /system/getty.
[   64.354451] systemd[1]: Created slice Slice /system/modprobe.
[  OK  ] Created slice Slice /system/modprobe.
[   64.419976] systemd[1]: Created slice Slice /system/serial-getty.
[  OK  ] Created slice Slice /system/serial-getty.
[   64.488102] systemd[1]: Created slice Slice /system/sshd-keygen.
[  OK  ] Created slice Slice /system/sshd-keygen.
[   64.554661] systemd[1]: Created slice Slice /system/swap-create.
[  OK  ] Created slice Slice /system/swap-create.
[   64.621316] systemd[1]: Created slice User and Session Slice.
[  OK  ] Created slice User and Session Slice.
[   64.678839] systemd[1]: Started Forward Password Requests to Wall Directory Watch.
[  OK  ] Started Forward Password R…uests to Wall Directory Watch.
[   64.732402] systemd[1]: Condition check resulted in Arbitrary Executable File Formats File System Automount Point being skipped.
[   64.749679] systemd[1]: Reached target Slices.
[  OK  ] Reached target Slices.
[   64.804090] systemd[1]: Listening on Device-mapper event daemon FIFOs.
[  OK  ] Listening on Device-mapper event daemon FIFOs.
[   64.868427] systemd[1]: Listening on LVM2 poll daemon socket.
[  OK  ] Listening on LVM2 poll daemon socket.
[   64.998827] systemd[1]: Listening on Process Core Dump Socket.
[  OK  ] Listening on Process Core Dump Socket.
[   65.061733] systemd[1]: Listening on initctl Compatibility Named Pipe.
[  OK  ] Listening on initctl Compatibility Named Pipe.
[   65.762318] systemd[1]: Condition check resulted in Journal Audit Socket being skipped.
[   65.803879] systemd[1]: Listening on Journal Socket (/dev/log).
[  OK  ] Listening on Journal Socket (/dev/log).
[   65.870028] systemd[1]: Listening on Journal Socket.
[  OK  ] Listening on Journal Socket.
[   65.966580] systemd[1]: Listening on udev Control Socket.
[  OK  ] Listening on udev Control Socket.
[   66.016638] systemd[1]: Listening on udev Kernel Socket.
[  OK  ] Listening on udev Kernel Socket.
[   66.072113] systemd[1]: Listening on User Database Manager Socket.
[  OK  ] Listening on User Database Manager Socket.
[   66.265196] systemd[1]: Mounting Huge Pages File System...
         Mounting Huge Pages File System...
[   66.477098] systemd[1]: Mounting POSIX Message Queue File System...
         Mounting POSIX Message Queue File System...
[   66.717215] systemd[1]: Mounting Kernel Debug File System...
         Mounting Kernel Debug File System...
[   67.000634] systemd[1]: Mounting Kernel Trace File System...
         Mounting Kernel Trace File System...
[   67.061135] systemd[1]: Condition check resulted in Kernel Module supporting RPCSEC_GSS being skipped.
[   67.078706] systemd[1]: Condition check resulted in Create list of static device nodes for the current kernel being skipped.
[   67.246059] systemd[1]: Starting Monitoring of LVM2 mirrors, snapshots etc. using dmeventd or progress polling...
         Starting Monitoring of LVM…meventd or progress polling...
[   67.451615] systemd[1]: Starting Load Kernel Module configfs...
         Starting Load Kernel Module configfs...
[   67.643795] systemd[1]: Starting Load Kernel Module drm...
         Starting Load Kernel Module drm...
[   67.889066] systemd[1]: Starting Load Kernel Module fuse...
         Starting Load Kernel Module fuse...
[   68.223484] systemd[1]: Starting Preprocess NFS configuration convertion...
         Starting Preprocess NFS configuration convertion...
[   68.405239] systemd[1]: Condition check resulted in Set Up Additional Binary Formats being skipped.
[   68.685006] systemd[1]: Starting Load Kernel Modules...
         Starting Load Kernel Modules...
[   68.936926] systemd[1]: Starting Remount Root and Kernel File Systems...
         Starting Remount Root and Kernel File Systems...
[   69.026562] systemd[1]: Condition check resulted in Repartition Root Disk being skipped.
[   69.217824] systemd[1]: Starting Coldplug All udev Devices...
         Starting Coldplug All udev Devices...
[   69.476888] systemd[1]: Starting Setup Virtual Console...
         Starting Setup Virtual Console...
[   69.908789] systemd[1]: Mounted Huge Pages File System.
[  OK  ] Mounted Huge Pages File System.
[   69.985603] systemd[1]: Mounted POSIX Message Queue File System.
[  OK  ] Mounted POSIX Message Queue File System.
[   70.097151] systemd[1]: Mounted Kernel Debug File System.
[  OK  ] Mounted Kernel Debug File System.
[   70.243463] systemd[1]: Mounted Kernel Trace File System.
[  OK  ] Mounted Kernel Trace File System.
[   70.407632] systemd[1]: modprobe@configfs.service: Succeeded.
[   70.493564] systemd[1]: Finished Load Kernel Module configfs.
[  OK  ] Finished Load Kernel Module configfs.
[   71.599759] systemd[1]: modprobe@drm.service: Succeeded.
[   71.667485] systemd[1]: Finished Load Kernel Module drm.
[  OK  ] Finished Load Kernel Module drm.
[   71.800153] systemd[1]: modprobe@fuse.service: Succeeded.
[   71.890649] systemd[1]: Finished Load Kernel Module fuse.
[  OK  ] Finished Load Kernel Module fuse.
[   72.030929] systemd[1]: nfs-convert.service: Succeeded.
[   72.143720] systemd[1]: Finished Preprocess NFS configuration convertion.
[  OK  ] Finished Preprocess NFS configuration convertion.
[   72.226306] systemd[1]: systemd-modules-load.service: Main process exited, code=exited, status=1/FAILURE
[   72.288874] systemd[1]: systemd-modules-load.service: Failed with result 'exit-code'.
[   72.405669] systemd[1]: Failed to start Load Kernel Modules.
[FAILED] Failed to start Load Kernel Modules.
See 'systemctl status systemd-modules-load.service' for details.
[   72.490312] systemd[1]: systemd-modules-load.service: Consumed 1.437s CPU time.
[   72.572603] systemd[1]: Condition check resulted in FUSE Control File System being skipped.
[   72.596232] systemd[1]: Condition check resulted in Kernel Configuration File System being skipped.
[   72.813113] systemd[1]: Starting Apply Kernel Variables...
         Starting Apply Kernel Variables...
[   73.694614] systemd[1]: systemd-remount-fs.service: Main process exited, code=exited, status=1/FAILURE
[   73.736508] systemd[1]: systemd-remount-fs.service: Failed with result 'exit-code'.
[   73.781556] systemd[1]: Failed to start Remount Root and Kernel File Systems.
[FAILED] Failed to start Remount Root and Kernel File Systems.
See 'systemctl status systemd-remount-fs.service' for details.
[   73.866320] systemd[1]: systemd-remount-fs.service: Consumed 2.933s CPU time.
[   73.899554] systemd[1]: Condition check resulted in First Boot Wizard being skipped.
[   73.988234] systemd[1]: Condition check resulted in Rebuild Hardware Database being skipped.
[   74.177164] systemd[1]: Starting Load/Save Random Seed...
         Starting Load/Save Random Seed...
[   74.298545] systemd[1]: Condition check resulted in Create System Users being skipped.
[   74.658647] systemd[1]: Starting Create Static Device Nodes in /dev...
         Starting Create Static Device Nodes in /dev...
[   75.179823] systemd[1]: Finished Apply Kernel Variables.
[  OK  ] Finished Apply Kernel Variables.
Palmer Dabbelt March 31, 2023, 9:22 p.m. UTC | #5
On Fri, 31 Mar 2023 11:55:42 PDT (-0700), Conor Dooley wrote:
> On Fri, Mar 31, 2023 at 08:46:44PM +0200, Heiko Stübner wrote:
>> Hi,
>>
>> Am Freitag, 31. März 2023, 20:41:35 CEST schrieb Conor Dooley:
>> > On Fri, Mar 31, 2023 at 07:34:38PM +0100, Conor Dooley wrote:
>> > > On Tue, Feb 21, 2023 at 10:30:18PM -0500, guoren@kernel.org wrote:
>> > > > From: Guo Ren <guoren@linux.alibaba.com>
>> > > >
>> > > > This patch converts riscv to use the generic entry infrastructure from
>> > > > kernel/entry/*. The generic entry makes maintainers' work easier and
>> > > > codes more elegant. Here are the changes:
>> > > >
>> > > >  - More clear entry.S with handle_exception and ret_from_exception
>> > > >  - Get rid of complex custom signal implementation
>> > > >  - Move syscall procedure from assembly to C, which is much more
>> > > >    readable.
>> > > >  - Connect ret_from_fork & ret_from_kernel_thread to generic entry.
>> > > >  - Wrap with irqentry_enter/exit and syscall_enter/exit_from_user_mode
>> > > >  - Use the standard preemption code instead of custom
>> > >
>> > > This has unfortunately broken booting my usual NFS rootfs on both my D1
>> > > and Icicle. It's one of the Fedora images from David, I think this one:
>> > > http://fedora.riscv.rocks/kojifiles/work/tasks/3933/1313933/
>> > >
>> > > It gets pretty far into things, it's once systemd is operational that
>> > > things go pear shaped:
>> >
>> > Shoulda said, can share the full logs if required of course, but they're
>> > quite verbose cos systemd etc.
>>
>> I was just investigating the same thing just now. So that saves me some
>> tracking down the culprit :-) .
>>
>> My main qemu is living as a "board" in my boardfarm (also doing nfsroot)
>> as well as my d1 nezha with nfsroot was affected.
>>
>> Though my board is stuck in some failure loop with both the journal- as
>> well as the timesyncd service failing again and again. And I haven't
>> figured out how to get logs without a working login console yet.
>
> I'll attach the full output from a run I guess. journald fails ad
> infinitum for me too after I cut this log off.

Thanks for looking at this.  I'm not opposed to reverting the generic 
entry stuff if it's breaking things, but it's a nice cleanup so it'd be 
great to keep it if possible.  I'm going to keep picking up other 
features for now, but if folks run out of patience trying to fix the bug 
then LMK and I'll figure out how to drop the series.

>
> Cheers,
> Conor.
>
> [    0.000000] Linux version 6.3.0-rc2-gd5e0396cf8bf-dirty (conor@spud) (ClangBuiltLinux clang version 15.0.7 (/stuff/brsdk/llvm/clang 8dfdcc7b7bf66834a761bd8de445840ef68e4d1a), ClangBuiltLinux LLD 15.0.7) #1 SMP PREEMPT @7
> [    0.000000] OF: fdt: Ignoring memory range 0x80000000 - 0x80200000
> [    0.000000] Machine model: Microchip PolarFire-SoC Icicle Kit
> [    0.000000] earlycon: ns16550a0 at MMIO32 0x0000000020000000 (options '115200n8')
> [    0.000000] printk: bootconsole [ns16550a0] enabled
> [    0.000000] efi: UEFI not found.
> [    0.000000] OF: fdt: Reserved memory: failed to reserve memory for node 'region@BFC00000': base 0x00000000bfc00000, size 4 MiB
> [    0.000000] OF: reserved mem: 0x00000000bfc00000..0x00000000bfffffff (4096 KiB) nomap non-reusable region@BFC00000
> [    0.000000] Zone ranges:
> [    0.000000]   DMA32    [mem 0x0000000080200000-0x00000000bfffffff]
> [    0.000000]   Normal   empty
> [    0.000000] Movable zone start for each node
> [    0.000000] Early memory node ranges
> [    0.000000]   node   0: [mem 0x0000000080200000-0x00000000bfffffff]
> [    0.000000] Initmem setup node 0 [mem 0x0000000080200000-0x00000000bfffffff]
> [    0.000000] On node 0, zone DMA32: 512 pages in unavailable ranges
> [    0.000000] SBI specification v0.3 detected
> [    0.000000] SBI implementation ID=0x1 Version=0x10000
> [    0.000000] SBI TIME extension detected
> [    0.000000] SBI IPI extension detected
> [    0.000000] SBI RFENCE extension detected
> [    0.000000] SBI SRST extension detected
> [    0.000000] SBI HSM extension detected
> [    0.000000] CPU with hartid=0 is not available
> [    0.000000] CPU with hartid=0 is not available
> [    0.000000] CPU with hartid=0 is not available
> [    0.000000] riscv: base ISA extensions acdfim
> [    0.000000] riscv: ELF capabilities acdfim
> [    0.000000] percpu: Embedded 29 pages/cpu s79648 r8192 d30944 u118784
> [    0.000000] Built 1 zonelists, mobility grouping on.  Total pages: 258055
> [    0.000000] Kernel command line: root=/dev/nfs rw ip=dhcp nfsroot=99.99.99.5:/stuff/nfs_share,tcp,v3 rdinit=/usr/sbin/init rootwait=10 earlycon
> [    0.000000] Unknown kernel command line parameters "rootwait=10", will be passed to user space.
> [    0.000000] Dentry cache hash table entries: 131072 (order: 8, 1048576 bytes, linear)
> [    0.000000] Inode-cache hash table entries: 65536 (order: 7, 524288 bytes, linear)
> [    0.000000] mem auto-init: stack:all(zero), heap alloc:off, heap free:off
> [    0.000000] stackdepot: allocating hash table via alloc_large_system_hash
> [    0.000000] stackdepot hash table entries: 1048576 (order: 11, 8388608 bytes, linear)
> [    0.000000] Virtual kernel memory layout:
> [    0.000000]       fixmap : 0xffffffc6fee00000 - 0xffffffc6ff000000   (2048 kB)
> [    0.000000]       pci io : 0xffffffc6ff000000 - 0xffffffc700000000   (  16 MB)
> [    0.000000]      vmemmap : 0xffffffc700000000 - 0xffffffc800000000   (4096 MB)
> [    0.000000]      vmalloc : 0xffffffc800000000 - 0xffffffd800000000   (  64 GB)
> [    0.000000]      modules : 0xffffffff0305f000 - 0xffffffff80000000   (1999 MB)
> [    0.000000]       lowmem : 0xffffffd800000000 - 0xffffffd83fe00000   (1022 MB)
> [    0.000000]        kasan : 0xfffffff700000000 - 0xffffffff00000000   (  32 GB)
> [    0.000000]       kernel : 0xffffffff80000000 - 0xffffffffffffffff   (2047 MB)
> [    0.000000] Memory: 545616K/1046528K available (16518K kernel code, 8042K rwdata, 8192K rodata, 2303K init, 12559K bss, 500912K reserved, 0K cma-reserved)
> [    0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=4, Nodes=1
> [    0.000000] trace event string verifier disabled
> [    0.000000] Running RCU self tests
> [    0.000000] Running RCU synchronous self tests
> [    0.000000] rcu: Preemptible hierarchical RCU implementation.
> [    0.000000] rcu: 	RCU lockdep checking is enabled.
> [    0.000000] rcu: 	RCU restricting CPUs from NR_CPUS=64 to nr_cpu_ids=4.
> [    0.000000] rcu: 	RCU debug extended QS entry/exit.
> [    0.000000] 	Trampoline variant of Tasks RCU enabled.
> [    0.000000] 	Tracing variant of Tasks RCU enabled.
> [    0.000000] rcu: RCU calculated value of scheduler-enlistment delay is 25 jiffies.
> [    0.000000] rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=4
> [    0.000000] Running RCU synchronous self tests
> [    0.000000] NR_IRQS: 64, nr_irqs: 64, preallocated irqs: 0
> [    0.000000] CPU with hartid=0 is not available
> [    0.000000] riscv-intc: unable to find hart id for /cpus/cpu@0/interrupt-controller
> [    0.000000] riscv-intc: 64 local interrupts mapped
> [    0.000000] plic: interrupt-controller@c000000: mapped 186 interrupts with 4 handlers for 9 contexts.
> [    0.000000] rcu: srcu_init: Setting srcu_struct sizes based on contention.
> [    0.000000] riscv-timer: riscv_timer_init_dt: Registering clocksource cpuid [0] hartid [1]
> [    0.000000] clocksource: riscv_clocksource: mask: 0xffffffffffffffff max_cycles: 0x1d854df40, max_idle_ns: 3526361616960 ns
> [    0.000006] sched_clock: 64 bits at 1000kHz, resolution 1000ns, wraps every 2199023255500ns
> [    0.015372] Console: colour dummy device 80x25
> [    0.020991] printk: console [tty0] enabled
> [    0.026672] printk: bootconsole [ns16550a0] disabled
> [    0.000000] Linux version 6.3.0-rc2-gd5e0396cf8bf-dirty (conor@spud) (ClangBuiltLinux clang version 15.0.7 (/stuff/brsdk/llvm/clang 8dfdcc7b7bf66834a761bd8de445840ef68e4d1a), ClangBuiltLinux LLD 15.0.7) #1 SMP PREEMPT @7
> [    0.000000] OF: fdt: Ignoring memory range 0x80000000 - 0x80200000
> [    0.000000] Machine model: Microchip PolarFire-SoC Icicle Kit
> [    0.000000] earlycon: ns16550a0 at MMIO32 0x0000000020000000 (options '115200n8')
> [    0.000000] printk: bootconsole [ns16550a0] enabled
> [    0.000000] efi: UEFI not found.
> [    0.000000] OF: fdt: Reserved memory: failed to reserve memory for node 'region@BFC00000': base 0x00000000bfc00000, size 4 MiB
> [    0.000000] OF: reserved mem: 0x00000000bfc00000..0x00000000bfffffff (4096 KiB) nomap non-reusable region@BFC00000
> [    0.000000] Zone ranges:
> [    0.000000]   DMA32    [mem 0x0000000080200000-0x00000000bfffffff]
> [    0.000000]   Normal   empty
> [    0.000000] Movable zone start for each node
> [    0.000000] Early memory node ranges
> [    0.000000]   node   0: [mem 0x0000000080200000-0x00000000bfffffff]
> [    0.000000] Initmem setup node 0 [mem 0x0000000080200000-0x00000000bfffffff]
> [    0.000000] On node 0, zone DMA32: 512 pages in unavailable ranges
> [    0.000000] SBI specification v0.3 detected
> [    0.000000] SBI implementation ID=0x1 Version=0x10000
> [    0.000000] SBI TIME extension detected
> [    0.000000] SBI IPI extension detected
> [    0.000000] SBI RFENCE extension detected
> [    0.000000] SBI SRST extension detected
> [    0.000000] SBI HSM extension detected
> [    0.000000] CPU with hartid=0 is not available
> [    0.000000] CPU with hartid=0 is not available
> [    0.000000] CPU with hartid=0 is not available
> [    0.000000] riscv: base ISA extensions acdfim
> [    0.000000] riscv: ELF capabilities acdfim
> [    0.000000] percpu: Embedded 29 pages/cpu s79648 r8192 d30944 u118784
> [    0.000000] Built 1 zonelists, mobility grouping on.  Total pages: 258055
> [    0.000000] Kernel command line: root=/dev/nfs rw ip=dhcp nfsroot=99.99.99.5:/stuff/nfs_share,tcp,v3 rdinit=/usr/sbin/init rootwait=10 earlycon
> [    0.000000] Unknown kernel command line parameters "rootwait=10", will be passed to user space.
> [    0.000000] Dentry cache hash table entries: 131072 (order: 8, 1048576 bytes, linear)
> [    0.000000] Inode-cache hash table entries: 65536 (order: 7, 524288 bytes, linear)
> [    0.000000] mem auto-init: stack:all(zero), heap alloc:off, heap free:off
> [    0.000000] stackdepot: allocating hash table via alloc_large_system_hash
> [    0.000000] stackdepot hash table entries: 1048576 (order: 11, 8388608 bytes, linear)
> [    0.000000] Virtual kernel memory layout:
> [    0.000000]       fixmap : 0xffffffc6fee00000 - 0xffffffc6ff000000   (2048 kB)
> [    0.000000]       pci io : 0xffffffc6ff000000 - 0xffffffc700000000   (  16 MB)
> [    0.000000]      vmemmap : 0xffffffc700000000 - 0xffffffc800000000   (4096 MB)
> [    0.000000]      vmalloc : 0xffffffc800000000 - 0xffffffd800000000   (  64 GB)
> [    0.000000]      modules : 0xffffffff0305f000 - 0xffffffff80000000   (1999 MB)
> [    0.000000]       lowmem : 0xffffffd800000000 - 0xffffffd83fe00000   (1022 MB)
> [    0.000000]        kasan : 0xfffffff700000000 - 0xffffffff00000000   (  32 GB)
> [    0.000000]       kernel : 0xffffffff80000000 - 0xffffffffffffffff   (2047 MB)
> [    0.000000] Memory: 545616K/1046528K available (16518K kernel code, 8042K rwdata, 8192K rodata, 2303K init, 12559K bss, 500912K reserved, 0K cma-reserved)
> [    0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=4, Nodes=1
> [    0.000000] trace event string verifier disabled
> [    0.000000] Running RCU self tests
> [    0.000000] Running RCU synchronous self tests
> [    0.000000] rcu: Preemptible hierarchical RCU implementation.
> [    0.000000] rcu: 	RCU lockdep checking is enabled.
> [    0.000000] rcu: 	RCU restricting CPUs from NR_CPUS=64 to nr_cpu_ids=4.
> [    0.000000] rcu: 	RCU debug extended QS entry/exit.
> [    0.000000] 	Trampoline variant of Tasks RCU enabled.
> [    0.000000] 	Tracing variant of Tasks RCU enabled.
> [    0.000000] rcu: RCU calculated value of scheduler-enlistment delay is 25 jiffies.
> [    0.000000] rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=4
> [    0.000000] Running RCU synchronous self tests
> [    0.000000] NR_IRQS: 64, nr_irqs: 64, preallocated irqs: 0
> [    0.000000] CPU with hartid=0 is not available
> [    0.000000] riscv-intc: unable to find hart id for /cpus/cpu@0/interrupt-controller
> [    0.000000] riscv-intc: 64 local interrupts mapped
> [    0.000000] plic: interrupt-controller@c000000: mapped 186 interrupts with 4 handlers for 9 contexts.
> [    0.000000] rcu: srcu_init: Setting srcu_struct sizes based on contention.
> [    0.000000] riscv-timer: riscv_timer_init_dt: Registering clocksource cpuid [0] hartid [1]
> [    0.000000] clocksource: riscv_clocksource: mask: 0xffffffffffffffff max_cycles: 0x1d854df40, max_idle_ns: 3526361616960 ns
> [    0.000006] sched_clock: 64 bits at 1000kHz, resolution 1000ns, wraps every 2199023255500ns
> [    0.015372] Console: colour dummy device 80x25
> [    0.020991] printk: console [tty0] enabled
> [    0.026672] printk: bootconsole [ns16550a0] disabled
> [    0.033749] Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar
> [    0.035065] ... MAX_LOCKDEP_SUBCLASSES:  8
> [    0.035844] ... MAX_LOCK_DEPTH:          48
> [    0.036633] ... MAX_LOCKDEP_KEYS:        8192
> [    0.037658] ... CLASSHASH_SIZE:          4096
> [    0.038478] ... MAX_LOCKDEP_ENTRIES:     32768
> [    0.039307] ... MAX_LOCKDEP_CHAINS:      65536
> [    0.040135] ... CHAINHASH_SIZE:          32768
> [    0.040963]  memory used by lock dependency info: 6365 kB
> [    0.042130]  memory used for stack traces: 4224 kB
> [    0.043012]  per task-struct memory footprint: 1920 bytes
> [    0.044452] Calibrating delay loop (skipped), value calculated using timer frequency.. 2.00 BogoMIPS (lpj=4000)
> [    0.046447] pid_max: default: 32768 minimum: 301
> [    0.052917] Mount-cache hash table entries: 2048 (order: 2, 16384 bytes, linear)
> [    0.054369] Mountpoint-cache hash table entries: 2048 (order: 2, 16384 bytes, linear)
> [    0.084506] Running RCU synchronous self tests
> [    0.085686] Running RCU synchronous self tests
> [    0.095405] CPU node for /cpus/cpu@0 exist but the possible cpu range is :0-3
> [    0.130236] cblist_init_generic: Setting adjustable number of callback queues.
> [    0.133050] cblist_init_generic: Setting shift to 2 and lim to 1.
> [    0.137669] cblist_init_generic: Setting shift to 2 and lim to 1.
> [    0.142192] Running RCU-tasks wait API self tests
> [    0.263638] riscv: ELF compat mode unsupported
> [    0.263807] ASID allocator disabled (0 bits)
> [    0.270226] Callback from call_rcu_tasks_trace() invoked.
> [    0.272903] rcu: Hierarchical SRCU implementation.
> [    0.274289] rcu: 	Max phase no-delay instances is 1000.
> [    0.302638] EFI services will not be available.
> [    0.315637] smp: Bringing up secondary CPUs ...
> [    0.396166] smp: Brought up 1 node, 4 CPUs
> [    0.422047] devtmpfs: initialized
> [    0.490692] Callback from call_rcu_tasks() invoked.
> [    0.681118] Running RCU synchronous self tests
> [    0.682861] Running RCU synchronous self tests
> [    0.691211] clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645041785100000 ns
> [    0.693981] futex hash table entries: 1024 (order: 5, 131072 bytes, linear)
> [    0.702882] pinctrl core: initialized pinctrl subsystem
> [    0.744067] NET: Registered PF_NETLINK/PF_ROUTE protocol family
> [    0.758314] DMA: preallocated 128 KiB GFP_KERNEL pool for atomic allocations
> [    0.761255] DMA: preallocated 128 KiB GFP_KERNEL|GFP_DMA32 pool for atomic allocations
> [    0.795383] cpuidle: using governor menu
> [    1.272427] HugeTLB: registered 2.00 MiB page size, pre-allocated 0 pages
> [    1.274292] HugeTLB: 0 KiB vmemmap can be freed for a 2.00 MiB page
> [    1.411012] SCSI subsystem initialized
> [    1.431885] usbcore: registered new interface driver usbfs
> [    1.435740] usbcore: registered new interface driver hub
> [    1.439351] usbcore: registered new device driver usb
> [    1.462261] FPGA manager framework
> [    1.512290] vgaarb: loaded
> [    1.519890] clocksource: Switched to clocksource riscv_clocksource
> [    2.009122] NET: Registered PF_INET protocol family
> [    2.016053] IP idents hash table entries: 16384 (order: 5, 131072 bytes, linear)
> [    2.052865] tcp_listen_portaddr_hash hash table entries: 512 (order: 3, 36864 bytes, linear)
> [    2.056616] Table-perturb hash table entries: 65536 (order: 6, 262144 bytes, linear)
> [    2.059500] TCP established hash table entries: 8192 (order: 4, 65536 bytes, linear)
> [    2.072933] TCP bind hash table entries: 8192 (order: 8, 1179648 bytes, linear)
> [    2.113517] TCP: Hash tables configured (established 8192 bind 8192)
> [    2.120319] UDP hash table entries: 512 (order: 4, 81920 bytes, linear)
> [    2.125390] UDP-Lite hash table entries: 512 (order: 4, 81920 bytes, linear)
> [    2.136344] NET: Registered PF_UNIX/PF_LOCAL protocol family
> [    2.161084] RPC: Registered named UNIX socket transport module.
> [    2.162843] RPC: Registered udp transport module.
> [    2.164402] RPC: Registered tcp transport module.
> [    2.165631] RPC: Registered tcp NFSv4.1 backchannel transport module.
> [    2.167599] PCI: CLS 0 bytes, default 64
> [    2.190148] Unpacking initramfs...
> [    2.251957] workingset: timestamp_bits=62 max_order=18 bucket_order=0
> [    2.296023] NFS: Registering the id_resolver key type
> [    2.299423] Key type id_resolver registered
> [    2.300826] Key type id_legacy registered
> [    2.303490] nfs4filelayout_init: NFSv4 File Layout Driver Registering...
> [    2.305408] nfs4flexfilelayout_init: NFSv4 Flexfile Layout Driver Registering...
> [    2.314304] 9p: Installing v9fs 9p2000 file system support
> [    2.330388] NET: Registered PF_ALG protocol family
> [    2.335558] Block layer SCSI generic (bsg) driver version 0.4 loaded (major 246)
> [    2.337681] io scheduler mq-deadline registered
> [    2.338886] io scheduler kyber registered
> [    2.341159] io scheduler bfq registered
> [   13.013052] String selftests succeeded
> [   13.014173] test_string_helpers: Running tests...
> [   13.386804] CCACHE: DataError @ 0x00000000.0807FFF8
> [   13.391444] CCACHE: DataFail @ 0x00000000.0807FFF0
> [   13.397053] CCACHE: 4 banks, 16 ways, sets/bank=512, bytes/block=64
> [   13.398602] CCACHE: Index of the largest way enabled: 11
> [   16.037879] Serial: 8250/16550 driver, 4 ports, IRQ sharing disabled
> [   16.149343] 20000000.serial: ttyS0 at MMIO 0x20000000 (irq = 15, base_baud = 9375000) is a 16550A
> [   16.156089] printk: console [ttyS0] enabled
> [   17.265279] 20100000.serial: ttyS1 at MMIO 0x20100000 (irq = 16, base_baud = 9375000) is a 16550A
> [   17.327497] 20102000.serial: ttyS2 at MMIO 0x20102000 (irq = 17, base_baud = 9375000) is a 16550A
> [   17.389076] 20104000.serial: ttyS3 at MMIO 0x20104000 (irq = 18, base_baud = 9375000) is a 16550A
> [   17.426517] of_serial: probe of 20106000.serial failed with error -28
> [   17.916595] loop: module loaded
> [   17.964831] zram: Added device: zram0
> [   18.086264] microchip-corespi 20108000.spi: Registered SPI controller 0
> [   18.121095] microchip-corespi 20109000.spi: Registered SPI controller 1
> [   18.203919] spi-nor spi3.0: w25q128 (16384 Kbytes)
> [   21.169496] Freeing initrd memory: 15668K
> [   21.395452] macb 20110000.ethernet eth0: Cadence GEM rev 0x0107010c at 0x20110000 irq 23 (00:04:a3:41:d0:fd)
> [   21.412679] e1000e: Intel(R) PRO/1000 Network Driver
> [   21.419777] e1000e: Copyright(c) 1999 - 2015 Intel Corporation.
> [   21.460117] usbcore: registered new interface driver uas
> [   21.469638] usbcore: registered new interface driver usb-storage
> [   21.524504] musb-hdrc musb-hdrc.1.auto: MUSB HDRC host driver
> [   21.540183] musb-hdrc musb-hdrc.1.auto: new USB bus registered, assigned bus number 1
> [   21.610837] hub 1-0:1.0: USB hub found
> [   21.621142] hub 1-0:1.0: 1 port detected
> [   21.680372] mpfs-musb 20201000.usb: Registered MPFS MUSB driver
> [   21.702517] mousedev: PS/2 mouse device common for all mice
> [   21.724021] i2c_dev: i2c /dev entries driver
> [   21.763554] microchip-corei2c 2010a000.i2c: registered CoreI2C bus driver
> [   21.797752] microchip-corei2c 2010b000.i2c: registered CoreI2C bus driver
> [   21.852615] sdhci: Secure Digital Host Controller Interface driver
> [   21.860773] sdhci: Copyright(c) Pierre Ossman
> [   21.868727] sdhci-pltfm: SDHCI platform and OF driver helper
> [   21.895884] usbcore: registered new interface driver usbhid
> [   21.902221] usbhid: USB HID core driver
> [   21.936265] mpfs-mailbox 37020000.mailbox: Registered MPFS mailbox controller driver
> [   21.966319] riscv-pmu-sbi: SBI PMU extension is available
> [   21.973922] riscv-pmu-sbi: 15 firmware and 4 hardware counters
> [   21.981727] riscv-pmu-sbi: Perf sampling/filtering is not supported as sscof extension is not available
> [   21.996006] mmc0: SDHCI controller on 20008000.mmc [20008000.mmc] using ADMA 64-bit
> [   22.032435] NET: Registered PF_INET6 protocol family
> [   22.080829] Segment Routing with IPv6
> [   22.087604] In-situ OAM (IOAM) with IPv6
> [   22.094555] sit: IPv6, IPv4 and MPLS over IPv4 tunneling driver
> [   22.132241] NET: Registered PF_PACKET protocol family
> [   22.143837] 9pnet: Installing 9P2000 support
> [   22.145445] mmc0: new HS200 MMC card at address 0001
> [   22.160350] Key type dns_resolver registered
> [   22.198172] mmcblk0: mmc0:0001 TB2916 14.6 GiB
> [   22.359675] mmcblk0boot0: mmc0:0001 TB2916 4.00 MiB
> [   22.472842] mmcblk0boot1: mmc0:0001 TB2916 4.00 MiB
> [   22.574290] mmcblk0rpmb: mmc0:0001 TB2916 4.00 MiB, chardev (242:0)
> [   23.619821] debug_vm_pgtable: [debug_vm_pgtable         ]: Validating architecture page table helpers
> [   23.910330] mpfs-sys-controller syscontroller: Registered MPFS system controller
> [   23.955403] random: crng init done
> [   23.961736] mpfs-rng mpfs-rng: Registered MPFS hwrng
> [   24.040123] macb 20110000.ethernet eth0: PHY [20110000.ethernet-ffffffff:00] driver [RTL8211F Gigabit Ethernet] (irq=POLL)
> [   24.055924] macb 20110000.ethernet eth0: configuring for phy/sgmii link mode
> [   28.254478] macb 20110000.ethernet eth0: Link is Up - 1Gbps/Full - flow control off
> [   28.266341] IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
> [   28.275892] Sending DHCP requests ., OK
> [   28.306175] IP-Config: Got DHCP answer from 99.99.99.1, my address is 99.99.99.97
> [   28.316744] IP-Config: Complete:
> [   28.321357]      device=eth0, hwaddr=00:04:a3:41:d0:fd, ipaddr=99.99.99.97, mask=255.255.255.0, gw=99.99.99.1
> [   28.334209]      host=99.99.99.97, domain=, nis-domain=(none)
> [   28.341874]      bootserver=99.99.99.1, rootserver=99.99.99.5, rootpath=
> [   28.342051]      nameserver0=99.99.99.1
> [   28.684402] VFS: Mounted root (nfs filesystem) on device 0:16.
> [   28.708272] devtmpfs: mounted
> [   28.747831] Freeing unused kernel image (initmem) memory: 2300K
> [   28.757755] Run /sbin/init as init process
> [   39.469486] systemd[1]: System time before build time, advancing clock.
> [   41.382394] systemd[1]: systemd v246.15-1.0.riscv64.fc33 running in system mode. (+PAM +AUDIT +SELINUX +IMA -APPARMOR +SMACK +SYSVINIT +UTMP +LIBCRYPTSETUP +GCRYPT +GNUTLS +ACL +XZ +LZ4 +ZSTD +SECCOMP +BLKID +ELFUTILS +KMOD +IDN2 -IDN +PCRE2 default-hierarchy=unified)
> [   41.432906] systemd[1]: Detected architecture riscv64.
>
> Welcome to Fedora 33 (Rawhide)!
>
> [   41.587681] systemd[1]: Set hostname to <fedora-riscv>.
> [   50.761418] systemd-sysv-generator[95]: SysV service '/etc/rc.d/init.d/livesys' lacks a native systemd unit file. Automatically generating a unit file for compatibility. Please update package to include a native systemd unit file, in order to make it more safe and robust.
> [   51.527913] zram_generator::generator[97]: Creating dev-zram0.swap for /dev/zram0 (275MB)
> [   57.964444] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-pre-udev.service:27: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
> [   57.993549] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-pre-udev.service:28: Standard output type syslog+console is obsolete, automatically updating to journal+console. Please update your unit file, and consider removing the setting altogether.
> [   58.053419] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-pre-trigger.service:23: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
> [   58.081477] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-pre-trigger.service:24: Standard output type syslog+console is obsolete, automatically updating to journal+console. Please update your unit file, and consider removing the setting altogether.
> [   58.145535] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-pre-pivot.service:30: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
> [   58.174372] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-pre-pivot.service:31: Standard output type syslog+console is obsolete, automatically updating to journal+console. Please update your unit file, and consider removing the setting altogether.
> [   58.447602] systemd[1]: /usr/lib/systemd/system/gssproxy.service:13: PIDFile= references a path below legacy directory /var/run/, updating /var/run/gssproxy.pid → /run/gssproxy.pid; please update the unit file accordingly.
> [   59.596376] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-pre-mount.service:22: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
> [   59.624034] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-pre-mount.service:23: Standard output type syslog+console is obsolete, automatically updating to journal+console. Please update your unit file, and consider removing the setting altogether.
> [   59.682316] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-mount.service:22: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
> [   59.709969] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-mount.service:23: Standard output type syslog+console is obsolete, automatically updating to journal+console. Please update your unit file, and consider removing the setting altogether.
> [   59.766799] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-initqueue.service:24: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
> [   59.795713] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-initqueue.service:25: Standard output type syslog+console is obsolete, automatically updating to journal+console. Please update your unit file, and consider removing the setting altogether.
> [   59.854333] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-cmdline.service:26: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
> [   59.882177] systemd[1]: /usr/lib/dracut/modules.d/98dracut-systemd/dracut-cmdline.service:27: Standard output type syslog+console is obsolete, automatically updating to journal+console. Please update your unit file, and consider removing the setting altogether.
> [   63.677176] systemd[1]: /usr/lib/systemd/system/ip6tables.service:14: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
> [   63.702465] systemd[1]: /usr/lib/systemd/system/ip6tables.service:15: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
> [   63.755847] systemd[1]: /usr/lib/systemd/system/iptables.service:14: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
> [   63.780923] systemd[1]: /usr/lib/systemd/system/iptables.service:15: Standard output type syslog is obsolete, automatically updating to journal. Please update your unit file, and consider removing the setting altogether.
> [   64.176567] systemd[1]: Queued start job for default target Graphical Interface.
> [   64.280125] systemd[1]: Created slice Slice /system/getty.
> [  OK  ] Created slice Slice /system/getty.
> [   64.354451] systemd[1]: Created slice Slice /system/modprobe.
> [  OK  ] Created slice Slice /system/modprobe.
> [   64.419976] systemd[1]: Created slice Slice /system/serial-getty.
> [  OK  ] Created slice Slice /system/serial-getty.
> [   64.488102] systemd[1]: Created slice Slice /system/sshd-keygen.
> [  OK  ] Created slice Slice /system/sshd-keygen.
> [   64.554661] systemd[1]: Created slice Slice /system/swap-create.
> [  OK  ] Created slice Slice /system/swap-create.
> [   64.621316] systemd[1]: Created slice User and Session Slice.
> [  OK  ] Created slice User and Session Slice.
> [   64.678839] systemd[1]: Started Forward Password Requests to Wall Directory Watch.
> [  OK  ] Started Forward Password R…uests to Wall Directory Watch.
> [   64.732402] systemd[1]: Condition check resulted in Arbitrary Executable File Formats File System Automount Point being skipped.
> [   64.749679] systemd[1]: Reached target Slices.
> [  OK  ] Reached target Slices.
> [   64.804090] systemd[1]: Listening on Device-mapper event daemon FIFOs.
> [  OK  ] Listening on Device-mapper event daemon FIFOs.
> [   64.868427] systemd[1]: Listening on LVM2 poll daemon socket.
> [  OK  ] Listening on LVM2 poll daemon socket.
> [   64.998827] systemd[1]: Listening on Process Core Dump Socket.
> [  OK  ] Listening on Process Core Dump Socket.
> [   65.061733] systemd[1]: Listening on initctl Compatibility Named Pipe.
> [  OK  ] Listening on initctl Compatibility Named Pipe.
> [   65.762318] systemd[1]: Condition check resulted in Journal Audit Socket being skipped.
> [   65.803879] systemd[1]: Listening on Journal Socket (/dev/log).
> [  OK  ] Listening on Journal Socket (/dev/log).
> [   65.870028] systemd[1]: Listening on Journal Socket.
> [  OK  ] Listening on Journal Socket.
> [   65.966580] systemd[1]: Listening on udev Control Socket.
> [  OK  ] Listening on udev Control Socket.
> [   66.016638] systemd[1]: Listening on udev Kernel Socket.
> [  OK  ] Listening on udev Kernel Socket.
> [   66.072113] systemd[1]: Listening on User Database Manager Socket.
> [  OK  ] Listening on User Database Manager Socket.
> [   66.265196] systemd[1]: Mounting Huge Pages File System...
>          Mounting Huge Pages File System...
> [   66.477098] systemd[1]: Mounting POSIX Message Queue File System...
>          Mounting POSIX Message Queue File System...
> [   66.717215] systemd[1]: Mounting Kernel Debug File System...
>          Mounting Kernel Debug File System...
> [   67.000634] systemd[1]: Mounting Kernel Trace File System...
>          Mounting Kernel Trace File System...
> [   67.061135] systemd[1]: Condition check resulted in Kernel Module supporting RPCSEC_GSS being skipped.
> [   67.078706] systemd[1]: Condition check resulted in Create list of static device nodes for the current kernel being skipped.
> [   67.246059] systemd[1]: Starting Monitoring of LVM2 mirrors, snapshots etc. using dmeventd or progress polling...
>          Starting Monitoring of LVM…meventd or progress polling...
> [   67.451615] systemd[1]: Starting Load Kernel Module configfs...
>          Starting Load Kernel Module configfs...
> [   67.643795] systemd[1]: Starting Load Kernel Module drm...
>          Starting Load Kernel Module drm...
> [   67.889066] systemd[1]: Starting Load Kernel Module fuse...
>          Starting Load Kernel Module fuse...
> [   68.223484] systemd[1]: Starting Preprocess NFS configuration convertion...
>          Starting Preprocess NFS configuration convertion...
> [   68.405239] systemd[1]: Condition check resulted in Set Up Additional Binary Formats being skipped.
> [   68.685006] systemd[1]: Starting Load Kernel Modules...
>          Starting Load Kernel Modules...
> [   68.936926] systemd[1]: Starting Remount Root and Kernel File Systems...
>          Starting Remount Root and Kernel File Systems...
> [   69.026562] systemd[1]: Condition check resulted in Repartition Root Disk being skipped.
> [   69.217824] systemd[1]: Starting Coldplug All udev Devices...
>          Starting Coldplug All udev Devices...
> [   69.476888] systemd[1]: Starting Setup Virtual Console...
>          Starting Setup Virtual Console...
> [   69.908789] systemd[1]: Mounted Huge Pages File System.
> [  OK  ] Mounted Huge Pages File System.
> [   69.985603] systemd[1]: Mounted POSIX Message Queue File System.
> [  OK  ] Mounted POSIX Message Queue File System.
> [   70.097151] systemd[1]: Mounted Kernel Debug File System.
> [  OK  ] Mounted Kernel Debug File System.
> [   70.243463] systemd[1]: Mounted Kernel Trace File System.
> [  OK  ] Mounted Kernel Trace File System.
> [   70.407632] systemd[1]: modprobe@configfs.service: Succeeded.
> [   70.493564] systemd[1]: Finished Load Kernel Module configfs.
> [  OK  ] Finished Load Kernel Module configfs.
> [   71.599759] systemd[1]: modprobe@drm.service: Succeeded.
> [   71.667485] systemd[1]: Finished Load Kernel Module drm.
> [  OK  ] Finished Load Kernel Module drm.
> [   71.800153] systemd[1]: modprobe@fuse.service: Succeeded.
> [   71.890649] systemd[1]: Finished Load Kernel Module fuse.
> [  OK  ] Finished Load Kernel Module fuse.
> [   72.030929] systemd[1]: nfs-convert.service: Succeeded.
> [   72.143720] systemd[1]: Finished Preprocess NFS configuration convertion.
> [  OK  ] Finished Preprocess NFS configuration convertion.
> [   72.226306] systemd[1]: systemd-modules-load.service: Main process exited, code=exited, status=1/FAILURE
> [   72.288874] systemd[1]: systemd-modules-load.service: Failed with result 'exit-code'.
> [   72.405669] systemd[1]: Failed to start Load Kernel Modules.
> [FAILED] Failed to start Load Kernel Modules.
> See 'systemctl status systemd-modules-load.service' for details.
> [   72.490312] systemd[1]: systemd-modules-load.service: Consumed 1.437s CPU time.
> [   72.572603] systemd[1]: Condition check resulted in FUSE Control File System being skipped.
> [   72.596232] systemd[1]: Condition check resulted in Kernel Configuration File System being skipped.
> [   72.813113] systemd[1]: Starting Apply Kernel Variables...
>          Starting Apply Kernel Variables...
> [   73.694614] systemd[1]: systemd-remount-fs.service: Main process exited, code=exited, status=1/FAILURE
> [   73.736508] systemd[1]: systemd-remount-fs.service: Failed with result 'exit-code'.
> [   73.781556] systemd[1]: Failed to start Remount Root and Kernel File Systems.
> [FAILED] Failed to start Remount Root and Kernel File Systems.
> See 'systemctl status systemd-remount-fs.service' for details.
> [   73.866320] systemd[1]: systemd-remount-fs.service: Consumed 2.933s CPU time.
> [   73.899554] systemd[1]: Condition check resulted in First Boot Wizard being skipped.
> [   73.988234] systemd[1]: Condition check resulted in Rebuild Hardware Database being skipped.
> [   74.177164] systemd[1]: Starting Load/Save Random Seed...
>          Starting Load/Save Random Seed...
> [   74.298545] systemd[1]: Condition check resulted in Create System Users being skipped.
> [   74.658647] systemd[1]: Starting Create Static Device Nodes in /dev...
>          Starting Create Static Device Nodes in /dev...
> [   75.179823] systemd[1]: Finished Apply Kernel Variables.
> [  OK  ] Finished Apply Kernel Variables.
Guo Ren April 1, 2023, 1:39 a.m. UTC | #6
On Fri, Mar 31, 2023 at 2:34 PM Conor Dooley <conor@kernel.org> wrote:
>
> On Tue, Feb 21, 2023 at 10:30:18PM -0500, guoren@kernel.org wrote:
> > From: Guo Ren <guoren@linux.alibaba.com>
> >
> > This patch converts riscv to use the generic entry infrastructure from
> > kernel/entry/*. The generic entry makes maintainers' work easier and
> > codes more elegant. Here are the changes:
> >
> >  - More clear entry.S with handle_exception and ret_from_exception
> >  - Get rid of complex custom signal implementation
> >  - Move syscall procedure from assembly to C, which is much more
> >    readable.
> >  - Connect ret_from_fork & ret_from_kernel_thread to generic entry.
> >  - Wrap with irqentry_enter/exit and syscall_enter/exit_from_user_mode
> >  - Use the standard preemption code instead of custom
>
> This has unfortunately broken booting my usual NFS rootfs on both my D1
> and Icicle. It's one of the Fedora images from David, I think this one:
> http://fedora.riscv.rocks/kojifiles/work/tasks/3933/1313933/
>
> It gets pretty far into things, it's once systemd is operational that
> things go pear shaped:
>
> [  OK  ] Mounted Huge Pages File System.
> [   70.297439] systemd[1]: Mounted POSIX Message Queue File System.
> [  OK  ] Mounted POSIX Message Queue File System.
> [   70.453489] systemd[1]: Mounted Kernel Debug File System.
> [  OK  ] Mounted Kernel Debug File System.
> [   70.516331] systemd[1]: Mounted Kernel Trace File System.
> [  OK  ] Mounted Kernel Trace File System.
> [   70.679253] systemd[1]: modprobe@configfs.service: Succeeded.
> [   70.788400] systemd[1]: Finished Load Kernel Module configfs.
> [  OK  ] Finished Load Kernel Module configfs.
> [   71.501222] systemd[1]: modprobe@drm.service: Succeeded.
> [   71.573295] systemd[1]: Finished Load Kernel Module drm.
> [  OK  ] Finished Load Kernel Module drm.
> [   71.825934] systemd[1]: modprobe@fuse.service: Succeeded.
> [   71.886945] systemd[1]: Finished Load Kernel Module fuse.
> [  OK  ] Finished Load Kernel Module fuse.
> [   71.991932] systemd[1]: nfs-convert.service: Succeeded.
> [   72.034674] systemd[1]: Finished Preprocess NFS configuration convertion.
> [  OK  ] Finished Preprocess NFS configuration convertion.
> [   72.148778] systemd[1]: systemd-modules-load.service: Main process exited, code=exited, status=1/FAILURE
> [   72.256659] systemd[1]: systemd-modules-load.service: Failed with result 'exit-code'.
> [   72.337818] systemd[1]: Failed to start Load Kernel Modules.
> [FAILED] Failed to start Load Kernel Modules.
Are you sure, you've compiled all kernel modules? This patch needs all
kernel stuff re-compiled.

> See 'systemctl status systemd-modules-load.service' for details.
> [   72.410491] systemd[1]: systemd-modules-load.service: Consumed 1.463s CPU time.
> [   72.496739] systemd[1]: Condition check resulted in FUSE Control File System being skipped.
> [   72.513689] systemd[1]: Condition check resulted in Kernel Configuration File System being skipped.
> [   72.682549] systemd[1]: Starting Apply Kernel Variables..
> [  OK  ] Finished Apply Kernel Variables.
> [   76.314434] systemd[1]: Finished Load/Save Random Seed.
> [  OK  ] Finished Load/Save Random Seed.
> [***   ] (1 of 6) A start job is running for…p Virtual Console (14s / no limit)
> [  OK  ] Finished Create Static Device Nodes in /dev.
> [   79.787065] systemd[1]: Started Entropy Daemon based on the HAVEGE algorithm.
> [  OK  ] Started Entropy Daemon based on the HAVEGE algorithm.
> [   80.186295] systemd[1]: Starting Journal Service...
>          Starting Journal Service...
> [   80.713508] systemd[1]: Starting Rule-based Manager for Device Events and Files...
>          Starting Rule-based Manage…for Device Events and Files...
> [  *** ] (2 of 7) A start job is running for… All udev Devices (17s / no limit)
> [   82.939347] systemd[1]: systemd-journald.service: Main process exited, code=exited, status=1/FAILURE
> [   83.032046] systemd[1]: systemd-journald.service: Failed with result 'exit-code'.
> [FAILED] Failed to start Journal Service.
> See 'systemctl status systemd-journald.service' for details.
> [   83.210041] systemd[1]: Dependency failed for Flush Journal to Persistent Storage.
> [DEPEND] Dependency failed for Flus…Journal to Persistent Storage.
> [   83.254122] systemd[1]: systemd-journal-flush.service: Job systemd-journal-flush.service/start failed with result 'dependency'.
> [   83.272366] systemd[1]: systemd-journald.service: Consumed 1.443s CPU time.
> [   83.334360] systemd[1]: systemd-journald.service: Scheduled restart job, restart counter is at 1.
> [   83.427839] systemd[1]: Finished Setup Virtual Console.
> [  OK  ] Finished Setup Virtual Console.
> [   83.510650] systemd[1]: Stopped Journal Service.
> [  OK  ] Stopped Journal Service.
> [   83.554417] systemd[1]: systemd-journald.service: Consumed 1.443s CPU time.
> [   83.576573] systemd[1]: Condition check resulted in Journal Audit Socket being skipped.
> [   83.904878] systemd[1]: Starting Journal Service...
>          Starting Journal Service...
> [   85.752090] systemd[1]: systemd-journald.service: Main process exited, code=exited, status=1/FAILURE
> [   85.826421] systemd[1]: systemd-journald.service: Failed with result 'exit-code'.
> [   85.876165] systemd[1]: Failed to start Journal Service.
> [FAILED] Failed to start Journal Service.
> See 'systemctl status systemd-journald.service' for details.
> [   85.952221] systemd[1]: systemd-journald.service: Consumed 1.355s CPU time.
> [   86.002092] systemd[1]: systemd-journald.service: Scheduled restart job, restart counter is at 2.
> [   86.015081] systemd[1]: Stopped Journal Service.
> [  OK  ] Stopped Journal Service.
> [   86.076429] systemd[1]: systemd-journald.service: Consumed 1.355s CPU time.
> [   86.089700] systemd[1]: Condition check resulted in Journal Audit Socket being skipped.
> [   86.390162] systemd[1]: Starting Journal Service...
>          Starting Journal Service...
> [   87.904427] systemd[1]: systemd-journald.service: Main process exited, code=exited, status=1/FAILURE
> [   87.950259] systemd[1]: systemd-journald.service: Failed with result 'exit-code'.
> [   88.000661] systemd[1]: Failed to start Journal Service.
> [FAILED] Failed to start Journal Service.
> See 'systemctl status systemd-journald.service' for details.
> [   88.079953] systemd[1]: systemd-journald.service: Consumed 1.316s CPU time.
> [   88.128956] systemd[1]: systemd-journald.service: Scheduled restart job, restart counter is at 3.
> [   88.145365] systemd[1]: Stopped Journal Service.
> [  OK  ] Stopped Journal Service.
> [   88.189975] systemd[1]: systemd-journald.service: Consumed 1.316s CPU time.
> [   88.205799] systemd[1]: Condition check resulted in Journal Audit Socket being skipped.
> [   88.514817] systemd[1]: Starting Journal Service...
>          Starting Journal Service...
>
> (Note, you need to merge -rc2 into riscv/for-next to actually boot)
>
> Cheers,
> Conor.
Guo Ren April 1, 2023, 2:15 a.m. UTC | #7
On Fri, Mar 31, 2023 at 2:47 PM Heiko Stübner <heiko@sntech.de> wrote:
>
> Hi,
>
> Am Freitag, 31. März 2023, 20:41:35 CEST schrieb Conor Dooley:
> > On Fri, Mar 31, 2023 at 07:34:38PM +0100, Conor Dooley wrote:
> > > On Tue, Feb 21, 2023 at 10:30:18PM -0500, guoren@kernel.org wrote:
> > > > From: Guo Ren <guoren@linux.alibaba.com>
> > > >
> > > > This patch converts riscv to use the generic entry infrastructure from
> > > > kernel/entry/*. The generic entry makes maintainers' work easier and
> > > > codes more elegant. Here are the changes:
> > > >
> > > >  - More clear entry.S with handle_exception and ret_from_exception
> > > >  - Get rid of complex custom signal implementation
> > > >  - Move syscall procedure from assembly to C, which is much more
> > > >    readable.
> > > >  - Connect ret_from_fork & ret_from_kernel_thread to generic entry.
> > > >  - Wrap with irqentry_enter/exit and syscall_enter/exit_from_user_mode
> > > >  - Use the standard preemption code instead of custom
> > >
> > > This has unfortunately broken booting my usual NFS rootfs on both my D1
> > > and Icicle. It's one of the Fedora images from David, I think this one:
> > > http://fedora.riscv.rocks/kojifiles/work/tasks/3933/1313933/
> > >
> > > It gets pretty far into things, it's once systemd is operational that
> > > things go pear shaped:
> >
> > Shoulda said, can share the full logs if required of course, but they're
> > quite verbose cos systemd etc.
>
> I was just investigating the same thing just now. So that saves me some
> tracking down the culprit :-) .
>
> My main qemu is living as a "board" in my boardfarm (also doing nfsroot)
> as well as my d1 nezha with nfsroot was affected.
Can you reproduce it with qemu? Could give me some tips and let me
reproduce it on qemu?

>
> Though my board is stuck in some failure loop with both the journal- as
> well as the timesyncd service failing again and again. And I haven't
> figured out how to get logs without a working login console yet.
>
>
> Heiko
>
>
> >
> > >
> > > [  OK  ] Mounted Huge Pages File System.
> > > [   70.297439] systemd[1]: Mounted POSIX Message Queue File System.
> > > [  OK  ] Mounted POSIX Message Queue File System.
> > > [   70.453489] systemd[1]: Mounted Kernel Debug File System.
> > > [  OK  ] Mounted Kernel Debug File System.
> > > [   70.516331] systemd[1]: Mounted Kernel Trace File System.
> > > [  OK  ] Mounted Kernel Trace File System.
> > > [   70.679253] systemd[1]: modprobe@configfs.service: Succeeded.
> > > [   70.788400] systemd[1]: Finished Load Kernel Module configfs.
> > > [  OK  ] Finished Load Kernel Module configfs.
> > > [   71.501222] systemd[1]: modprobe@drm.service: Succeeded.
> > > [   71.573295] systemd[1]: Finished Load Kernel Module drm.
> > > [  OK  ] Finished Load Kernel Module drm.
> > > [   71.825934] systemd[1]: modprobe@fuse.service: Succeeded.
> > > [   71.886945] systemd[1]: Finished Load Kernel Module fuse.
> > > [  OK  ] Finished Load Kernel Module fuse.
> > > [   71.991932] systemd[1]: nfs-convert.service: Succeeded.
> > > [   72.034674] systemd[1]: Finished Preprocess NFS configuration convertion.
> > > [  OK  ] Finished Preprocess NFS configuration convertion.
> > > [   72.148778] systemd[1]: systemd-modules-load.service: Main process exited, code=exited, status=1/FAILURE
> > > [   72.256659] systemd[1]: systemd-modules-load.service: Failed with result 'exit-code'.
> > > [   72.337818] systemd[1]: Failed to start Load Kernel Modules.
> > > [FAILED] Failed to start Load Kernel Modules.
> > > See 'systemctl status systemd-modules-load.service' for details.
> > > [   72.410491] systemd[1]: systemd-modules-load.service: Consumed 1.463s CPU time.
> > > [   72.496739] systemd[1]: Condition check resulted in FUSE Control File System being skipped.
> > > [   72.513689] systemd[1]: Condition check resulted in Kernel Configuration File System being skipped.
> > > [   72.682549] systemd[1]: Starting Apply Kernel Variables..
> > > [  OK  ] Finished Apply Kernel Variables.
> > > [   76.314434] systemd[1]: Finished Load/Save Random Seed.
> > > [  OK  ] Finished Load/Save Random Seed.
> > > [***   ] (1 of 6) A start job is running for…p Virtual Console (14s / no limit)
> > > [  OK  ] Finished Create Static Device Nodes in /dev.
> > > [   79.787065] systemd[1]: Started Entropy Daemon based on the HAVEGE algorithm.
> > > [  OK  ] Started Entropy Daemon based on the HAVEGE algorithm.
> > > [   80.186295] systemd[1]: Starting Journal Service...
> > >          Starting Journal Service...
> > > [   80.713508] systemd[1]: Starting Rule-based Manager for Device Events and Files...
> > >          Starting Rule-based Manage…for Device Events and Files...
> > > [  *** ] (2 of 7) A start job is running for… All udev Devices (17s / no limit)
> > > [   82.939347] systemd[1]: systemd-journald.service: Main process exited, code=exited, status=1/FAILURE
> > > [   83.032046] systemd[1]: systemd-journald.service: Failed with result 'exit-code'.
> > > [FAILED] Failed to start Journal Service.
> > > See 'systemctl status systemd-journald.service' for details.
> > > [   83.210041] systemd[1]: Dependency failed for Flush Journal to Persistent Storage.
> > > [DEPEND] Dependency failed for Flus…Journal to Persistent Storage.
> > > [   83.254122] systemd[1]: systemd-journal-flush.service: Job systemd-journal-flush.service/start failed with result 'dependency'.
> > > [   83.272366] systemd[1]: systemd-journald.service: Consumed 1.443s CPU time.
> > > [   83.334360] systemd[1]: systemd-journald.service: Scheduled restart job, restart counter is at 1.
> > > [   83.427839] systemd[1]: Finished Setup Virtual Console.
> > > [  OK  ] Finished Setup Virtual Console.
> > > [   83.510650] systemd[1]: Stopped Journal Service.
> > > [  OK  ] Stopped Journal Service.
> > > [   83.554417] systemd[1]: systemd-journald.service: Consumed 1.443s CPU time.
> > > [   83.576573] systemd[1]: Condition check resulted in Journal Audit Socket being skipped.
> > > [   83.904878] systemd[1]: Starting Journal Service...
> > >          Starting Journal Service...
> > > [   85.752090] systemd[1]: systemd-journald.service: Main process exited, code=exited, status=1/FAILURE
> > > [   85.826421] systemd[1]: systemd-journald.service: Failed with result 'exit-code'.
> > > [   85.876165] systemd[1]: Failed to start Journal Service.
> > > [FAILED] Failed to start Journal Service.
> > > See 'systemctl status systemd-journald.service' for details.
> > > [   85.952221] systemd[1]: systemd-journald.service: Consumed 1.355s CPU time.
> > > [   86.002092] systemd[1]: systemd-journald.service: Scheduled restart job, restart counter is at 2.
> > > [   86.015081] systemd[1]: Stopped Journal Service.
> > > [  OK  ] Stopped Journal Service.
> > > [   86.076429] systemd[1]: systemd-journald.service: Consumed 1.355s CPU time.
> > > [   86.089700] systemd[1]: Condition check resulted in Journal Audit Socket being skipped.
> > > [   86.390162] systemd[1]: Starting Journal Service...
> > >          Starting Journal Service...
> > > [   87.904427] systemd[1]: systemd-journald.service: Main process exited, code=exited, status=1/FAILURE
> > > [   87.950259] systemd[1]: systemd-journald.service: Failed with result 'exit-code'.
> > > [   88.000661] systemd[1]: Failed to start Journal Service.
> > > [FAILED] Failed to start Journal Service.
> > > See 'systemctl status systemd-journald.service' for details.
> > > [   88.079953] systemd[1]: systemd-journald.service: Consumed 1.316s CPU time.
> > > [   88.128956] systemd[1]: systemd-journald.service: Scheduled restart job, restart counter is at 3.
> > > [   88.145365] systemd[1]: Stopped Journal Service.
> > > [  OK  ] Stopped Journal Service.
> > > [   88.189975] systemd[1]: systemd-journald.service: Consumed 1.316s CPU time.
> > > [   88.205799] systemd[1]: Condition check resulted in Journal Audit Socket being skipped.
> > > [   88.514817] systemd[1]: Starting Journal Service...
> > >          Starting Journal Service...
> > >
> > > (Note, you need to merge -rc2 into riscv/for-next to actually boot)
> > >
> > > Cheers,
> > > Conor.
> >
> >
> >
>
>
>
>
Conor Dooley April 1, 2023, 9:32 a.m. UTC | #8
On Fri, Mar 31, 2023 at 09:39:32PM -0400, Guo Ren wrote:
> On Fri, Mar 31, 2023 at 2:34 PM Conor Dooley <conor@kernel.org> wrote:

> > [   72.337818] systemd[1]: Failed to start Load Kernel Modules.
> > [FAILED] Failed to start Load Kernel Modules.

> Are you sure, you've compiled all kernel modules? This patch needs all
> kernel stuff re-compiled.

It does this with CONFIG_MODULES=n kernels too FWIW.
Heiko Stübner April 1, 2023, 12:10 p.m. UTC | #9
Hi Guo,

Am Samstag, 1. April 2023, 04:15:32 CEST schrieb Guo Ren:
> On Fri, Mar 31, 2023 at 2:47 PM Heiko Stübner <heiko@sntech.de> wrote:
> > Am Freitag, 31. März 2023, 20:41:35 CEST schrieb Conor Dooley:
> > > On Fri, Mar 31, 2023 at 07:34:38PM +0100, Conor Dooley wrote:
> > > > On Tue, Feb 21, 2023 at 10:30:18PM -0500, guoren@kernel.org wrote:
> > > > > From: Guo Ren <guoren@linux.alibaba.com>
> > > > >
> > > > > This patch converts riscv to use the generic entry infrastructure from
> > > > > kernel/entry/*. The generic entry makes maintainers' work easier and
> > > > > codes more elegant. Here are the changes:
> > > > >
> > > > >  - More clear entry.S with handle_exception and ret_from_exception
> > > > >  - Get rid of complex custom signal implementation
> > > > >  - Move syscall procedure from assembly to C, which is much more
> > > > >    readable.
> > > > >  - Connect ret_from_fork & ret_from_kernel_thread to generic entry.
> > > > >  - Wrap with irqentry_enter/exit and syscall_enter/exit_from_user_mode
> > > > >  - Use the standard preemption code instead of custom
> > > >
> > > > This has unfortunately broken booting my usual NFS rootfs on both my D1
> > > > and Icicle. It's one of the Fedora images from David, I think this one:
> > > > http://fedora.riscv.rocks/kojifiles/work/tasks/3933/1313933/
> > > >
> > > > It gets pretty far into things, it's once systemd is operational that
> > > > things go pear shaped:
> > >
> > > Shoulda said, can share the full logs if required of course, but they're
> > > quite verbose cos systemd etc.
> >
> > I was just investigating the same thing just now. So that saves me some
> > tracking down the culprit :-) .
> >
> > My main qemu is living as a "board" in my boardfarm (also doing nfsroot)
> > as well as my d1 nezha with nfsroot was affected.
> Can you reproduce it with qemu? Could give me some tips and let me
> reproduce it on qemu?

As written the issue both happens on qemu-virt and also the d1-nezha board.
Below I've summarized my setup a bit:


(1) Qemu-commandline:
---------------------

/usr/local/bin/qemu-system-riscv64 -M virt -smp 2 -m 1G -display none \
  -cpu rv64,zbb=true,zbc=true,svpbmt=true,Zicbom=true,Zawrs=true,sscofpmf=true,v=true \
  -serial telnet:localhost:5500,server,nowait -kernel /home/devel/nfs/kernel/riscv64/Image \
  -append "earlycon=sbi root=/dev/nfs nfsroot=10.0.2.2:/home/devel/nfs/rootfs-riscv64virt ip=dhcp rw" \
  -netdev user,id=n1 -device virtio-net-pci,netdev=n1

Which does the start using a nfs-root coming from an nfs-server running
on the same host as qemu.

Though the issue does not seem to be related to the nfs. I also tried
starting with a local disk image like [0] and the issue with the journald
still persists.


(2) the rootfs-contents:
------------------------

Conor seems to be using Fedora, while my distribution of choice is Debian.
My rootfs was created following the instructions on the Debian wiki for
the debports with debootstrap [1].


This morning I also re-created a completely new and pristine rootfs using
those instructions and the issue appeared immediately on first-boot.


Hope this helps a bit
Heiko


[0] same result with a disk-image ... journald failing
/usr/local/bin/qemu-system-riscv64 -M virt -smp 2 -m 1G -display none \
  -cpu rv64,zbb=true,zbc=true,svpbmt=true,Zicbom=true,Zawrs=true,sscofpmf=true,v=true \
  -serial telnet:localhost:5500,server,nowait -kernel /home/devel/nfs/kernel/riscv64/Image \
  -append 'root=/dev/vda console=ttyS0' \
  -drive file=/home/devel/nfs/rootfs-riscv64virt.ext4,format=raw,id=hd0 \
  -device virtio-blk-pci,drive=hd0

[1] https://wiki.debian.org/RISC-V#debootstrap
Björn Töpel April 1, 2023, 1:19 p.m. UTC | #10
>> > > > This has unfortunately broken booting my usual NFS rootfs on both my D1
>> > > > and Icicle. It's one of the Fedora images from David, I think this one:
>> > > > http://fedora.riscv.rocks/kojifiles/work/tasks/3933/1313933/
>> > > >
>> > > > It gets pretty far into things, it's once systemd is operational that
>> > > > things go pear shaped:
>> > >
>> > > Shoulda said, can share the full logs if required of course, but they're
>> > > quite verbose cos systemd etc.
>> >
>> > I was just investigating the same thing just now. So that saves me some
>> > tracking down the culprit :-) .
>> >
>> > My main qemu is living as a "board" in my boardfarm (also doing nfsroot)
>> > as well as my d1 nezha with nfsroot was affected.
>> Can you reproduce it with qemu? Could give me some tips and let me
>> reproduce it on qemu?

FWIW, I'm getting the systemd issue w/o NFS, on a regular 9p virtfs.

| $ sudo mmdebstrap --architecture=riscv64 lunar rv-rootfs http://ports.ubuntu.com

and the rootfs qemu config:
 |  -fsdev local,id=root,path=/path/to/rv-rootfs/,security_model=none \
 |  -device virtio-9p-pci,fsdev=root,mount_tag=/dev/root \
 |  -append "root=/dev/root rw rootfstype=9p rootflags=version=9p2000.L,trans=virtio,cache=mmap,access=any security=none earlycon console=tty0 console=ttyS0"
Björn Töpel April 1, 2023, 1:33 p.m. UTC | #11
(Narrowing the Cc list.)

Björn Töpel <bjorn@kernel.org> writes:

>>> > > > This has unfortunately broken booting my usual NFS rootfs on both my D1
>>> > > > and Icicle. It's one of the Fedora images from David, I think this one:
>>> > > > http://fedora.riscv.rocks/kojifiles/work/tasks/3933/1313933/
>>> > > >
>>> > > > It gets pretty far into things, it's once systemd is operational that
>>> > > > things go pear shaped:
>>> > >
>>> > > Shoulda said, can share the full logs if required of course, but they're
>>> > > quite verbose cos systemd etc.
>>> >
>>> > I was just investigating the same thing just now. So that saves me some
>>> > tracking down the culprit :-) .
>>> >
>>> > My main qemu is living as a "board" in my boardfarm (also doing nfsroot)
>>> > as well as my d1 nezha with nfsroot was affected.
>>> Can you reproduce it with qemu? Could give me some tips and let me
>>> reproduce it on qemu?
>
> FWIW, I'm getting the systemd issue w/o NFS, on a regular 9p virtfs.
>
> | $ sudo mmdebstrap --architecture=riscv64 lunar rv-rootfs http://ports.ubuntu.com
>
> and the rootfs qemu config:
>  |  -fsdev local,id=root,path=/path/to/rv-rootfs/,security_model=none \
>  |  -device virtio-9p-pci,fsdev=root,mount_tag=/dev/root \
>  |  -append "root=/dev/root rw rootfstype=9p rootflags=version=9p2000.L,trans=virtio,cache=mmap,access=any security=none earlycon console=tty0 console=ttyS0"

I took a couple of different versions of the series for a spin, and for
me it stops working between these versions.

$ git fetch https://github.com/guoren83/linux.git generic_entry_v12:ge-v12
$ git fetch https://github.com/guoren83/linux.git generic_entry_v13:g2-v13

v12 is happy with systemd, v13 is not.
Björn Töpel April 1, 2023, 2:58 p.m. UTC | #12
Björn Töpel <bjorn@kernel.org> writes:

> (Narrowing the Cc list.)
>
> Björn Töpel <bjorn@kernel.org> writes:
>
>>>> > > > This has unfortunately broken booting my usual NFS rootfs on both my D1
>>>> > > > and Icicle. It's one of the Fedora images from David, I think this one:
>>>> > > > http://fedora.riscv.rocks/kojifiles/work/tasks/3933/1313933/
>>>> > > >
>>>> > > > It gets pretty far into things, it's once systemd is operational that
>>>> > > > things go pear shaped:
>>>> > >
>>>> > > Shoulda said, can share the full logs if required of course, but they're
>>>> > > quite verbose cos systemd etc.
>>>> >
>>>> > I was just investigating the same thing just now. So that saves me some
>>>> > tracking down the culprit :-) .
>>>> >
>>>> > My main qemu is living as a "board" in my boardfarm (also doing nfsroot)
>>>> > as well as my d1 nezha with nfsroot was affected.
>>>> Can you reproduce it with qemu? Could give me some tips and let me
>>>> reproduce it on qemu?
>>
>> FWIW, I'm getting the systemd issue w/o NFS, on a regular 9p virtfs.
>>
>> | $ sudo mmdebstrap --architecture=riscv64 lunar rv-rootfs http://ports.ubuntu.com
>>
>> and the rootfs qemu config:
>>  |  -fsdev local,id=root,path=/path/to/rv-rootfs/,security_model=none \
>>  |  -device virtio-9p-pci,fsdev=root,mount_tag=/dev/root \
>>  |  -append "root=/dev/root rw rootfstype=9p rootflags=version=9p2000.L,trans=virtio,cache=mmap,access=any security=none earlycon console=tty0 console=ttyS0"
>
> I took a couple of different versions of the series for a spin, and for
> me it stops working between these versions.
>
> $ git fetch https://github.com/guoren83/linux.git generic_entry_v12:ge-v12
> $ git fetch https://github.com/guoren83/linux.git generic_entry_v13:g2-v13
>
> v12 is happy with systemd, v13 is not.

Hmm, this patch seems to resolve the issue for me:

--8<--
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index       1f4e37be7eb3..8c258b78c925 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -270,11 +270,11 @@ asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs)
        if (user_mode(regs)) {
                ulong syscall = regs->a7;
 
-               syscall = syscall_enter_from_user_mode(regs, syscall);
-
                regs->epc += 4;
                regs->orig_a0 = regs->a0;
 
+               syscall = syscall_enter_from_user_mode(regs, syscall);
+
                if (syscall < NR_syscalls)
                        syscall_handler(regs, syscall);
                else
-->8--

Seems like syscall_enter_from_user_mode() clobber regs->a0.


Björn
Heiko Stübner April 1, 2023, 3:42 p.m. UTC | #13
Am Samstag, 1. April 2023, 16:58:01 CEST schrieb Björn Töpel:
> Björn Töpel <bjorn@kernel.org> writes:
> 
> > (Narrowing the Cc list.)
> >
> > Björn Töpel <bjorn@kernel.org> writes:
> >
> >>>> > > > This has unfortunately broken booting my usual NFS rootfs on both my D1
> >>>> > > > and Icicle. It's one of the Fedora images from David, I think this one:
> >>>> > > > http://fedora.riscv.rocks/kojifiles/work/tasks/3933/1313933/
> >>>> > > >
> >>>> > > > It gets pretty far into things, it's once systemd is operational that
> >>>> > > > things go pear shaped:
> >>>> > >
> >>>> > > Shoulda said, can share the full logs if required of course, but they're
> >>>> > > quite verbose cos systemd etc.
> >>>> >
> >>>> > I was just investigating the same thing just now. So that saves me some
> >>>> > tracking down the culprit :-) .
> >>>> >
> >>>> > My main qemu is living as a "board" in my boardfarm (also doing nfsroot)
> >>>> > as well as my d1 nezha with nfsroot was affected.
> >>>> Can you reproduce it with qemu? Could give me some tips and let me
> >>>> reproduce it on qemu?
> >>
> >> FWIW, I'm getting the systemd issue w/o NFS, on a regular 9p virtfs.
> >>
> >> | $ sudo mmdebstrap --architecture=riscv64 lunar rv-rootfs http://ports.ubuntu.com
> >>
> >> and the rootfs qemu config:
> >>  |  -fsdev local,id=root,path=/path/to/rv-rootfs/,security_model=none \
> >>  |  -device virtio-9p-pci,fsdev=root,mount_tag=/dev/root \
> >>  |  -append "root=/dev/root rw rootfstype=9p rootflags=version=9p2000.L,trans=virtio,cache=mmap,access=any security=none earlycon console=tty0 console=ttyS0"
> >
> > I took a couple of different versions of the series for a spin, and for
> > me it stops working between these versions.
> >
> > $ git fetch https://github.com/guoren83/linux.git generic_entry_v12:ge-v12
> > $ git fetch https://github.com/guoren83/linux.git generic_entry_v13:g2-v13
> >
> > v12 is happy with systemd, v13 is not.
> 
> Hmm, this patch seems to resolve the issue for me:
> 
> --8<--
> diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
> index       1f4e37be7eb3..8c258b78c925 100644
> --- a/arch/riscv/kernel/traps.c
> +++ b/arch/riscv/kernel/traps.c
> @@ -270,11 +270,11 @@ asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs)
>         if (user_mode(regs)) {
>                 ulong syscall = regs->a7;
>  
> -               syscall = syscall_enter_from_user_mode(regs, syscall);
> -
>                 regs->epc += 4;
>                 regs->orig_a0 = regs->a0;
>  
> +               syscall = syscall_enter_from_user_mode(regs, syscall);
> +
>                 if (syscall < NR_syscalls)
>                         syscall_handler(regs, syscall);
>                 else
> -->8--

Woohoo, thanks for tracking that down. This change makes both my
Qemu and D1-Nezha boot again.

> Seems like syscall_enter_from_user_mode() clobber regs->a0.

With a0 also being the return value in the calling conventions (and being
the syscall value here), I guess that could be somehow expected and thus
as your patch does, the a0 value should be saved first I guess?

An it looks like Loonarch also does save a0 before calling
syscall_enter_from_user_mode() [0]

Reviewed-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
Tested-by: Heiko Stuebner <heiko.stuebner@vrull.eu>

Heiko


[0] https://elixir.bootlin.com/linux/latest/source/arch/loongarch/kernel/syscall.c#L54
Björn Töpel April 1, 2023, 6:41 p.m. UTC | #14
Heiko Stübner <heiko@sntech.de> writes:

> Am Samstag, 1. April 2023, 16:58:01 CEST schrieb Björn Töpel:
>> Björn Töpel <bjorn@kernel.org> writes:
>> 
>> > (Narrowing the Cc list.)
>> >
>> > Björn Töpel <bjorn@kernel.org> writes:
>> >
>> >>>> > > > This has unfortunately broken booting my usual NFS rootfs on both my D1
>> >>>> > > > and Icicle. It's one of the Fedora images from David, I think this one:
>> >>>> > > > http://fedora.riscv.rocks/kojifiles/work/tasks/3933/1313933/
>> >>>> > > >
>> >>>> > > > It gets pretty far into things, it's once systemd is operational that
>> >>>> > > > things go pear shaped:
>> >>>> > >
>> >>>> > > Shoulda said, can share the full logs if required of course, but they're
>> >>>> > > quite verbose cos systemd etc.
>> >>>> >
>> >>>> > I was just investigating the same thing just now. So that saves me some
>> >>>> > tracking down the culprit :-) .
>> >>>> >
>> >>>> > My main qemu is living as a "board" in my boardfarm (also doing nfsroot)
>> >>>> > as well as my d1 nezha with nfsroot was affected.
>> >>>> Can you reproduce it with qemu? Could give me some tips and let me
>> >>>> reproduce it on qemu?
>> >>
>> >> FWIW, I'm getting the systemd issue w/o NFS, on a regular 9p virtfs.
>> >>
>> >> | $ sudo mmdebstrap --architecture=riscv64 lunar rv-rootfs http://ports.ubuntu.com
>> >>
>> >> and the rootfs qemu config:
>> >>  |  -fsdev local,id=root,path=/path/to/rv-rootfs/,security_model=none \
>> >>  |  -device virtio-9p-pci,fsdev=root,mount_tag=/dev/root \
>> >>  |  -append "root=/dev/root rw rootfstype=9p rootflags=version=9p2000.L,trans=virtio,cache=mmap,access=any security=none earlycon console=tty0 console=ttyS0"
>> >
>> > I took a couple of different versions of the series for a spin, and for
>> > me it stops working between these versions.
>> >
>> > $ git fetch https://github.com/guoren83/linux.git generic_entry_v12:ge-v12
>> > $ git fetch https://github.com/guoren83/linux.git generic_entry_v13:g2-v13
>> >
>> > v12 is happy with systemd, v13 is not.
>> 
>> Hmm, this patch seems to resolve the issue for me:
>> 
>> --8<--
>> diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
>> index       1f4e37be7eb3..8c258b78c925 100644
>> --- a/arch/riscv/kernel/traps.c
>> +++ b/arch/riscv/kernel/traps.c
>> @@ -270,11 +270,11 @@ asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs)
>>         if (user_mode(regs)) {
>>                 ulong syscall = regs->a7;
>>  
>> -               syscall = syscall_enter_from_user_mode(regs, syscall);
>> -
>>                 regs->epc += 4;
>>                 regs->orig_a0 = regs->a0;
>>  
>> +               syscall = syscall_enter_from_user_mode(regs, syscall);
>> +
>>                 if (syscall < NR_syscalls)
>>                         syscall_handler(regs, syscall);
>>                 else
>> -->8--
>
> Woohoo, thanks for tracking that down. This change makes both my
> Qemu and D1-Nezha boot again.
>
>> Seems like syscall_enter_from_user_mode() clobber regs->a0.
>
> With a0 also being the return value in the calling conventions (and being
> the syscall value here), I guess that could be somehow expected and thus
> as your patch does, the a0 value should be saved first I guess?
>
> An it looks like Loonarch also does save a0 before calling
> syscall_enter_from_user_mode() [0]
>
> Reviewed-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
> Tested-by: Heiko Stuebner <heiko.stuebner@vrull.eu>

Cool, and thanks for testing/verifying! I'll cook a proper patch on
Monday.


Björn
Guo Ren April 7, 2023, 9:13 a.m. UTC | #15
On Sun, Apr 2, 2023 at 2:41 AM Björn Töpel <bjorn@kernel.org> wrote:
>
> Heiko Stübner <heiko@sntech.de> writes:
>
> > Am Samstag, 1. April 2023, 16:58:01 CEST schrieb Björn Töpel:
> >> Björn Töpel <bjorn@kernel.org> writes:
> >>
> >> > (Narrowing the Cc list.)
> >> >
> >> > Björn Töpel <bjorn@kernel.org> writes:
> >> >
> >> >>>> > > > This has unfortunately broken booting my usual NFS rootfs on both my D1
> >> >>>> > > > and Icicle. It's one of the Fedora images from David, I think this one:
> >> >>>> > > > http://fedora.riscv.rocks/kojifiles/work/tasks/3933/1313933/
> >> >>>> > > >
> >> >>>> > > > It gets pretty far into things, it's once systemd is operational that
> >> >>>> > > > things go pear shaped:
> >> >>>> > >
> >> >>>> > > Shoulda said, can share the full logs if required of course, but they're
> >> >>>> > > quite verbose cos systemd etc.
> >> >>>> >
> >> >>>> > I was just investigating the same thing just now. So that saves me some
> >> >>>> > tracking down the culprit :-) .
> >> >>>> >
> >> >>>> > My main qemu is living as a "board" in my boardfarm (also doing nfsroot)
> >> >>>> > as well as my d1 nezha with nfsroot was affected.
> >> >>>> Can you reproduce it with qemu? Could give me some tips and let me
> >> >>>> reproduce it on qemu?
> >> >>
> >> >> FWIW, I'm getting the systemd issue w/o NFS, on a regular 9p virtfs.
> >> >>
> >> >> | $ sudo mmdebstrap --architecture=riscv64 lunar rv-rootfs http://ports.ubuntu.com
> >> >>
> >> >> and the rootfs qemu config:
> >> >>  |  -fsdev local,id=root,path=/path/to/rv-rootfs/,security_model=none \
> >> >>  |  -device virtio-9p-pci,fsdev=root,mount_tag=/dev/root \
> >> >>  |  -append "root=/dev/root rw rootfstype=9p rootflags=version=9p2000.L,trans=virtio,cache=mmap,access=any security=none earlycon console=tty0 console=ttyS0"
> >> >
> >> > I took a couple of different versions of the series for a spin, and for
> >> > me it stops working between these versions.
> >> >
> >> > $ git fetch https://github.com/guoren83/linux.git generic_entry_v12:ge-v12
> >> > $ git fetch https://github.com/guoren83/linux.git generic_entry_v13:g2-v13
> >> >
> >> > v12 is happy with systemd, v13 is not.
> >>
> >> Hmm, this patch seems to resolve the issue for me:
> >>
> >> --8<--
> >> diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
> >> index       1f4e37be7eb3..8c258b78c925 100644
> >> --- a/arch/riscv/kernel/traps.c
> >> +++ b/arch/riscv/kernel/traps.c
> >> @@ -270,11 +270,11 @@ asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs)
> >>         if (user_mode(regs)) {
> >>                 ulong syscall = regs->a7;
> >>
> >> -               syscall = syscall_enter_from_user_mode(regs, syscall);
> >> -
> >>                 regs->epc += 4;
> >>                 regs->orig_a0 = regs->a0;
> >>
> >> +               syscall = syscall_enter_from_user_mode(regs, syscall);
> >> +
> >>                 if (syscall < NR_syscalls)
> >>                         syscall_handler(regs, syscall);
> >>                 else
> >> -->8--
> >
> > Woohoo, thanks for tracking that down. This change makes both my
> > Qemu and D1-Nezha boot again.
> >
> >> Seems like syscall_enter_from_user_mode() clobber regs->a0.
> >
> > With a0 also being the return value in the calling conventions (and being
> > the syscall value here), I guess that could be somehow expected and thus
> > as your patch does, the a0 value should be saved first I guess?
> >
> > An it looks like Loonarch also does save a0 before calling
> > syscall_enter_from_user_mode() [0]
> >
> > Reviewed-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
> > Tested-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
>
> Cool, and thanks for testing/verifying! I'll cook a proper patch on
> Monday.
Just ping gently :) Have you done it?

>
>
> Björn
>
>
Conor Dooley April 7, 2023, 9:18 a.m. UTC | #16
On 07/04/2023 10:13, Guo Ren wrote:
>> On Sun, Apr 2, 2023 at 2:41 AM Björn Töpel <bjorn@kernel.org> wrote:

>> Cool, and thanks for testing/verifying! I'll cook a proper patch on
>> Monday.

> Just ping gently :) Have you done it?

Yup, he did!

https://patchwork.kernel.org/project/linux-riscv/patch/20230403065207.1070974-1-bjorn@kernel.org/
Guo Ren April 8, 2023, 4:59 a.m. UTC | #17
On Fri, Apr 7, 2023 at 5:18 AM <Conor.Dooley@microchip.com> wrote:
>
> On 07/04/2023 10:13, Guo Ren wrote:
> >> On Sun, Apr 2, 2023 at 2:41 AM Björn Töpel <bjorn@kernel.org> wrote:
>
> >> Cool, and thanks for testing/verifying! I'll cook a proper patch on
> >> Monday.
>
> > Just ping gently :) Have you done it?
>
> Yup, he did!
>
> https://patchwork.kernel.org/project/linux-riscv/patch/20230403065207.1070974-1-bjorn@kernel.org/
>
Great, thx.
Daniel Thompson June 29, 2023, 2:02 p.m. UTC | #18
On Tue, Feb 21, 2023 at 10:30:18PM -0500, guoren@kernel.org wrote:
> From: Guo Ren <guoren@linux.alibaba.com>
>
> This patch converts riscv to use the generic entry infrastructure from
> kernel/entry/*. The generic entry makes maintainers' work easier and
> codes more elegant. Here are the changes:
>
>  - More clear entry.S with handle_exception and ret_from_exception
>  - Get rid of complex custom signal implementation
>  - Move syscall procedure from assembly to C, which is much more
>    readable.
>  - Connect ret_from_fork & ret_from_kernel_thread to generic entry.
>  - Wrap with irqentry_enter/exit and syscall_enter/exit_from_user_mode
>  - Use the standard preemption code instead of custom
>
> Suggested-by: Huacai Chen <chenhuacai@kernel.org>
> Reviewed-by: Björn Töpel <bjorn@rivosinc.com>
> Tested-by: Yipeng Zou <zouyipeng@huawei.com>
> Tested-by: Jisheng Zhang <jszhang@kernel.org>
> Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> Signed-off-by: Guo Ren <guoren@kernel.org>
> Cc: Ben Hutchings <ben@decadent.org.uk>

Apologies for the late feedback but I've been swamped lately and only
recently got round to running the full kgdb test suite on the v6.4
series.

The kgdb test suite includes a couple of tests that verify that the
system resumes after breakpointing due to a BUG():
https://github.com/daniel-thompson/kgdbtest/blob/master/tests/test_kdb_fault_injection.py#L24-L45

These tests have regressed on riscv between v6.3 and v6.4 and a bisect
is pointing at this patch. With these changes in place then, after kdb
resumes the system, the BUG() message is printed as normal but then
immediately fails. From the backtrace it looks like the new entry/exit
code cannot advance past a compiled breakpoint instruction:
~~~
PANIC: Fatal exception in interrupt

Entering kdb (current=0xff60000001a2a280, pid 104) on processor 1 due to
NonMask
able Interrupt @ 0xffffffff800bb3c4
[1]kdb> bt
Stack traceback for pid 104
0xff60000001a2a280      104       92  1    1   R  0xff60000001a2ac50
*echo
CPU: 1 PID: 104 Comm: echo Tainted: G      D
6.3.0-rc1-00003-gf0bddf50586d #119
Hardware name: riscv-virtio,qemu (DT)
Call Trace:
[<ffffffff800050dc>] dump_backtrace+0x1c/0x24
[<ffffffff808458f8>] show_stack+0x2c/0x38
[<ffffffff80851b00>] dump_stack_lvl+0x3c/0x54
[<ffffffff80851b2c>] dump_stack+0x14/0x1c
[<ffffffff800bc4b8>] kdb_dump_stack_on_cpu+0x64/0x66
[<ffffffff800c3d2a>] kdb_show_stack+0x82/0x88
[<ffffffff800c3dc0>] kdb_bt1+0x90/0xf2
[<ffffffff800c4206>] kdb_bt+0x34c/0x384
[<ffffffff800c1d28>] kdb_parse+0x27a/0x618
[<ffffffff800c2566>] kdb_main_loop+0x3b2/0x8fa
[<ffffffff800c4c5a>] kdb_stub+0x1ba/0x3a8
[<ffffffff800bbba8>] kgdb_cpu_enter+0x342/0x5ba
[<ffffffff800bc3da>] kgdb_handle_exception+0xe0/0x11a
[<ffffffff8000810c>] kgdb_riscv_notify+0x86/0xb4
[<ffffffff8002f210>] notify_die+0x6a/0xa6
[<ffffffff80004db0>] handle_break+0x70/0xe0
[<ffffffff80852462>] do_trap_break+0x48/0x5c
[<ffffffff80003598>] ret_from_exception+0x0/0x64
[<ffffffff800bb3c4>] kgdb_compiled_break+0x0/0x14
~~~


Daniel.
Guo Ren June 30, 2023, 11:16 a.m. UTC | #19
On Thu, Jun 29, 2023 at 10:02 AM Daniel Thompson
<daniel.thompson@linaro.org> wrote:
>
> On Tue, Feb 21, 2023 at 10:30:18PM -0500, guoren@kernel.org wrote:
> > From: Guo Ren <guoren@linux.alibaba.com>
> >
> > This patch converts riscv to use the generic entry infrastructure from
> > kernel/entry/*. The generic entry makes maintainers' work easier and
> > codes more elegant. Here are the changes:
> >
> >  - More clear entry.S with handle_exception and ret_from_exception
> >  - Get rid of complex custom signal implementation
> >  - Move syscall procedure from assembly to C, which is much more
> >    readable.
> >  - Connect ret_from_fork & ret_from_kernel_thread to generic entry.
> >  - Wrap with irqentry_enter/exit and syscall_enter/exit_from_user_mode
> >  - Use the standard preemption code instead of custom
> >
> > Suggested-by: Huacai Chen <chenhuacai@kernel.org>
> > Reviewed-by: Björn Töpel <bjorn@rivosinc.com>
> > Tested-by: Yipeng Zou <zouyipeng@huawei.com>
> > Tested-by: Jisheng Zhang <jszhang@kernel.org>
> > Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> > Signed-off-by: Guo Ren <guoren@kernel.org>
> > Cc: Ben Hutchings <ben@decadent.org.uk>
>
> Apologies for the late feedback but I've been swamped lately and only
> recently got round to running the full kgdb test suite on the v6.4
> series.
>
> The kgdb test suite includes a couple of tests that verify that the
> system resumes after breakpointing due to a BUG():
> https://github.com/daniel-thompson/kgdbtest/blob/master/tests/test_kdb_fault_injection.py#L24-L45
>
> These tests have regressed on riscv between v6.3 and v6.4 and a bisect
> is pointing at this patch. With these changes in place then, after kdb
> resumes the system, the BUG() message is printed as normal but then
> immediately fails. From the backtrace it looks like the new entry/exit
> code cannot advance past a compiled breakpoint instruction:
> ~~~
> PANIC: Fatal exception in interrupt
It comes from:
void die(struct pt_regs *regs, ...
{
...
if (in_interrupt())
        panic("Fatal exception in interrupt");
...

We could add a dump_backtrace to see what happened:
if (in_interrupt()) {
+      dump_backtrace(regs, NULL, KERN_DEFAULT);
        panic("Fatal exception in interrupt");
}



>
> Entering kdb (current=0xff60000001a2a280, pid 104) on processor 1 due to
> NonMask
> able Interrupt @ 0xffffffff800bb3c4
> [1]kdb> bt
> Stack traceback for pid 104
> 0xff60000001a2a280      104       92  1    1   R  0xff60000001a2ac50
> *echo
> CPU: 1 PID: 104 Comm: echo Tainted: G      D
> 6.3.0-rc1-00003-gf0bddf50586d #119
> Hardware name: riscv-virtio,qemu (DT)
> Call Trace:
> [<ffffffff800050dc>] dump_backtrace+0x1c/0x24
> [<ffffffff808458f8>] show_stack+0x2c/0x38
> [<ffffffff80851b00>] dump_stack_lvl+0x3c/0x54
> [<ffffffff80851b2c>] dump_stack+0x14/0x1c
> [<ffffffff800bc4b8>] kdb_dump_stack_on_cpu+0x64/0x66
> [<ffffffff800c3d2a>] kdb_show_stack+0x82/0x88
> [<ffffffff800c3dc0>] kdb_bt1+0x90/0xf2
> [<ffffffff800c4206>] kdb_bt+0x34c/0x384
> [<ffffffff800c1d28>] kdb_parse+0x27a/0x618
> [<ffffffff800c2566>] kdb_main_loop+0x3b2/0x8fa
> [<ffffffff800c4c5a>] kdb_stub+0x1ba/0x3a8
> [<ffffffff800bbba8>] kgdb_cpu_enter+0x342/0x5ba
> [<ffffffff800bc3da>] kgdb_handle_exception+0xe0/0x11a
> [<ffffffff8000810c>] kgdb_riscv_notify+0x86/0xb4
> [<ffffffff8002f210>] notify_die+0x6a/0xa6
> [<ffffffff80004db0>] handle_break+0x70/0xe0
> [<ffffffff80852462>] do_trap_break+0x48/0x5c
> [<ffffffff80003598>] ret_from_exception+0x0/0x64
> [<ffffffff800bb3c4>] kgdb_compiled_break+0x0/0x14
> ~~~
>
>
> Daniel.
Guo Ren June 30, 2023, 11:22 a.m. UTC | #20
On Fri, Jun 30, 2023 at 7:16 AM Guo Ren <guoren@kernel.org> wrote:
>
> On Thu, Jun 29, 2023 at 10:02 AM Daniel Thompson
> <daniel.thompson@linaro.org> wrote:
> >
> > On Tue, Feb 21, 2023 at 10:30:18PM -0500, guoren@kernel.org wrote:
> > > From: Guo Ren <guoren@linux.alibaba.com>
> > >
> > > This patch converts riscv to use the generic entry infrastructure from
> > > kernel/entry/*. The generic entry makes maintainers' work easier and
> > > codes more elegant. Here are the changes:
> > >
> > >  - More clear entry.S with handle_exception and ret_from_exception
> > >  - Get rid of complex custom signal implementation
> > >  - Move syscall procedure from assembly to C, which is much more
> > >    readable.
> > >  - Connect ret_from_fork & ret_from_kernel_thread to generic entry.
> > >  - Wrap with irqentry_enter/exit and syscall_enter/exit_from_user_mode
> > >  - Use the standard preemption code instead of custom
> > >
> > > Suggested-by: Huacai Chen <chenhuacai@kernel.org>
> > > Reviewed-by: Björn Töpel <bjorn@rivosinc.com>
> > > Tested-by: Yipeng Zou <zouyipeng@huawei.com>
> > > Tested-by: Jisheng Zhang <jszhang@kernel.org>
> > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> > > Signed-off-by: Guo Ren <guoren@kernel.org>
> > > Cc: Ben Hutchings <ben@decadent.org.uk>
> >
> > Apologies for the late feedback but I've been swamped lately and only
> > recently got round to running the full kgdb test suite on the v6.4
> > series.
> >
> > The kgdb test suite includes a couple of tests that verify that the
> > system resumes after breakpointing due to a BUG():
> > https://github.com/daniel-thompson/kgdbtest/blob/master/tests/test_kdb_fault_injection.py#L24-L45
> >
> > These tests have regressed on riscv between v6.3 and v6.4 and a bisect
> > is pointing at this patch. With these changes in place then, after kdb
> > resumes the system, the BUG() message is printed as normal but then
> > immediately fails. From the backtrace it looks like the new entry/exit
> > code cannot advance past a compiled breakpoint instruction:
> > ~~~
> > PANIC: Fatal exception in interrupt
> It comes from:
> void die(struct pt_regs *regs, ...
> {
> ...
> if (in_interrupt())
>         panic("Fatal exception in interrupt");
> ...
>
> We could add a dump_backtrace to see what happened:
> if (in_interrupt()) {
> +      dump_backtrace(regs, NULL, KERN_DEFAULT);
Sorry, it should be:
+        dump_backtrace(NULL, NULL, KERN_DEFAULT);
We need current stack info, not exception context.


>         panic("Fatal exception in interrupt");
> }
>
>
>
> >
> > Entering kdb (current=0xff60000001a2a280, pid 104) on processor 1 due to
> > NonMask
> > able Interrupt @ 0xffffffff800bb3c4
> > [1]kdb> bt
> > Stack traceback for pid 104
> > 0xff60000001a2a280      104       92  1    1   R  0xff60000001a2ac50
> > *echo
> > CPU: 1 PID: 104 Comm: echo Tainted: G      D
> > 6.3.0-rc1-00003-gf0bddf50586d #119
> > Hardware name: riscv-virtio,qemu (DT)
> > Call Trace:
> > [<ffffffff800050dc>] dump_backtrace+0x1c/0x24
> > [<ffffffff808458f8>] show_stack+0x2c/0x38
> > [<ffffffff80851b00>] dump_stack_lvl+0x3c/0x54
> > [<ffffffff80851b2c>] dump_stack+0x14/0x1c
> > [<ffffffff800bc4b8>] kdb_dump_stack_on_cpu+0x64/0x66
> > [<ffffffff800c3d2a>] kdb_show_stack+0x82/0x88
> > [<ffffffff800c3dc0>] kdb_bt1+0x90/0xf2
> > [<ffffffff800c4206>] kdb_bt+0x34c/0x384
> > [<ffffffff800c1d28>] kdb_parse+0x27a/0x618
> > [<ffffffff800c2566>] kdb_main_loop+0x3b2/0x8fa
> > [<ffffffff800c4c5a>] kdb_stub+0x1ba/0x3a8
> > [<ffffffff800bbba8>] kgdb_cpu_enter+0x342/0x5ba
> > [<ffffffff800bc3da>] kgdb_handle_exception+0xe0/0x11a
> > [<ffffffff8000810c>] kgdb_riscv_notify+0x86/0xb4
> > [<ffffffff8002f210>] notify_die+0x6a/0xa6
> > [<ffffffff80004db0>] handle_break+0x70/0xe0
> > [<ffffffff80852462>] do_trap_break+0x48/0x5c
> > [<ffffffff80003598>] ret_from_exception+0x0/0x64
> > [<ffffffff800bb3c4>] kgdb_compiled_break+0x0/0x14
> > ~~~
> >
> >
> > Daniel.
>
>
>
> --
> Best Regards
>  Guo Ren



--
Best Regards
 Guo Ren
Daniel Thompson June 30, 2023, 2:50 p.m. UTC | #21
On Fri, Jun 30, 2023 at 07:22:40AM -0400, Guo Ren wrote:
> On Fri, Jun 30, 2023 at 7:16 AM Guo Ren <guoren@kernel.org> wrote:
> >
> > On Thu, Jun 29, 2023 at 10:02 AM Daniel Thompson
> > <daniel.thompson@linaro.org> wrote:
> > >
> > > On Tue, Feb 21, 2023 at 10:30:18PM -0500, guoren@kernel.org wrote:
> > > > From: Guo Ren <guoren@linux.alibaba.com>
> > > >
> > > > This patch converts riscv to use the generic entry infrastructure from
> > > > kernel/entry/*. The generic entry makes maintainers' work easier and
> > > > codes more elegant. Here are the changes:
> > > >
> > > >  - More clear entry.S with handle_exception and ret_from_exception
> > > >  - Get rid of complex custom signal implementation
> > > >  - Move syscall procedure from assembly to C, which is much more
> > > >    readable.
> > > >  - Connect ret_from_fork & ret_from_kernel_thread to generic entry.
> > > >  - Wrap with irqentry_enter/exit and syscall_enter/exit_from_user_mode
> > > >  - Use the standard preemption code instead of custom
> > > >
> > > > Suggested-by: Huacai Chen <chenhuacai@kernel.org>
> > > > Reviewed-by: Björn Töpel <bjorn@rivosinc.com>
> > > > Tested-by: Yipeng Zou <zouyipeng@huawei.com>
> > > > Tested-by: Jisheng Zhang <jszhang@kernel.org>
> > > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> > > > Signed-off-by: Guo Ren <guoren@kernel.org>
> > > > Cc: Ben Hutchings <ben@decadent.org.uk>
> > >
> > > Apologies for the late feedback but I've been swamped lately and only
> > > recently got round to running the full kgdb test suite on the v6.4
> > > series.
> > >
> > > The kgdb test suite includes a couple of tests that verify that the
> > > system resumes after breakpointing due to a BUG():
> > > https://github.com/daniel-thompson/kgdbtest/blob/master/tests/test_kdb_fault_injection.py#L24-L45
> > >
> > > These tests have regressed on riscv between v6.3 and v6.4 and a bisect
> > > is pointing at this patch. With these changes in place then, after kdb
> > > resumes the system, the BUG() message is printed as normal but then
> > > immediately fails. From the backtrace it looks like the new entry/exit
> > > code cannot advance past a compiled breakpoint instruction:
> > > ~~~
> > > PANIC: Fatal exception in interrupt
> > It comes from:
> > void die(struct pt_regs *regs, ...
> > {
> > ...
> > if (in_interrupt())
> >         panic("Fatal exception in interrupt");
> > ...
> >
> > We could add a dump_backtrace to see what happened:
> > if (in_interrupt()) {
> > +      dump_backtrace(regs, NULL, KERN_DEFAULT);
> Sorry, it should be:
> +        dump_backtrace(NULL, NULL, KERN_DEFAULT);
> We need current stack info, not exception context.

I added this... and I also stopped kgdb from intercepting the panic()
since that interferes with the console output from dump_backtrace().

~~~
# /bin/echo BUG > /sys/kernel/debug/provoke-crash/DIRECT
[    3.380565] lkdtm: Performing direct entry BUG

Entering kdb (current=0xff6000000380ab00, pid 98) on processor 0 due to NonMaskable Interrupt @ 0xffffffff8064b844
kdb> go
Catastrophic error detected
kdb_continue_catastrophic=0, type go a second time if you really want to continue
kdb> go
Catastrophic error detected
kdb_continue_catastrophic=0, attempting to continue
[    3.381411] ------------[ cut here ]------------
[    3.381454] kernel BUG at drivers/misc/lkdtm/bugs.c:78!
[    3.381609] Kernel BUG [#1]
[    3.381632] Modules linked in:
[    3.381734] CPU: 0 PID: 98 Comm: echo Not tainted 6.4.0-rc6-00004-ge6e9d4598760-dirty #126
[    3.381817] Hardware name: riscv-virtio,qemu (DT)
[    3.381885] epc : lkdtm_BUG+0x6/0x8
[    3.381959]  ra : lkdtm_do_action+0x10/0x1c
[    3.381978] epc : ffffffff8064b844 ra : ffffffff8064afb4 sp : ff200000008c3d30
[    3.381991]  gp : ffffffff810665a0 tp : ff6000000380ab00 t0 : 6500000000000000
[    3.382002]  t1 : 0000000000000001 t2 : 6550203a6d74646b s0 : ff200000008c3d40
[    3.382012]  s1 : ff60000003988000 a0 : ffffffff80fc0260 a1 : ff6000003ffad788
[    3.382023]  a2 : ff6000003ffb9530 a3 : 0000000000000000 a4 : 0000000000000000
[    3.382034]  a5 : ffffffff8064b83e a6 : 0000000000000050 a7 : 0000000000040000
[    3.382045]  s2 : 0000000000000004 s3 : ffffffff80fc0260 s4 : ff200000008c3e70
[    3.382056]  s5 : ff600000033223a8 s6 : 00000000000f0cc0 s7 : ff60000002211000
[    3.382066]  s8 : 00ffffffafc50c08 s9 : 00ffffffafc4b9b8 s10: 0000000000000000
[    3.382077]  s11: 0000000000000001 t3 : 461f715700000000 t4 : 0000000000000002
[    3.382087]  t5 : 0000000000000000 t6 : ff200000008c3b58
[    3.382097] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003
[    3.382139] [<ffffffff8064b844>] lkdtm_BUG+0x6/0x8
[    3.382245] Code: 0513 9245 b097 0039 80e7 7f20 bf39 1141 e422 0800 (9002) 1141
[    3.594697] ---[ end trace 0000000000000000 ]---

At this point we expect a shell prompt since we should have taken the BUG(),
killed the echo process and returned to the shell. However in v6.4 we get the
following instead (including the instrumentation you asked for):

[    3.594801] [<ffffffff80005e3a>] dump_backtrace+0x1c/0x24
[    3.594826] [<ffffffff800059f0>] die+0x228/0x238
[    3.594835] [<ffffffff80005b38>] handle_break+0x9a/0xe0
[    3.594843] [<ffffffff809f30d6>] do_trap_break+0x48/0x5c
[    3.594854] [<ffffffff80003ee4>] ret_from_exception+0x0/0x64
[    3.594862] [<ffffffff8064b844>] lkdtm_BUG+0x6/0x8
[    3.594959] Kernel panic - not syncing: Fatal exception in interrupt
[    3.595005] SMP: stopping secondary CPUs
[    3.596444] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
~~~


Daniel.
Guo Ren July 1, 2023, 2:55 a.m. UTC | #22
On Fri, Jun 30, 2023 at 10:51 PM Daniel Thompson
<daniel.thompson@linaro.org> wrote:
>
> On Fri, Jun 30, 2023 at 07:22:40AM -0400, Guo Ren wrote:
> > On Fri, Jun 30, 2023 at 7:16 AM Guo Ren <guoren@kernel.org> wrote:
> > >
> > > On Thu, Jun 29, 2023 at 10:02 AM Daniel Thompson
> > > <daniel.thompson@linaro.org> wrote:
> > > >
> > > > On Tue, Feb 21, 2023 at 10:30:18PM -0500, guoren@kernel.org wrote:
> > > > > From: Guo Ren <guoren@linux.alibaba.com>
> > > > >
> > > > > This patch converts riscv to use the generic entry infrastructure from
> > > > > kernel/entry/*. The generic entry makes maintainers' work easier and
> > > > > codes more elegant. Here are the changes:
> > > > >
> > > > >  - More clear entry.S with handle_exception and ret_from_exception
> > > > >  - Get rid of complex custom signal implementation
> > > > >  - Move syscall procedure from assembly to C, which is much more
> > > > >    readable.
> > > > >  - Connect ret_from_fork & ret_from_kernel_thread to generic entry.
> > > > >  - Wrap with irqentry_enter/exit and syscall_enter/exit_from_user_mode
> > > > >  - Use the standard preemption code instead of custom
> > > > >
> > > > > Suggested-by: Huacai Chen <chenhuacai@kernel.org>
> > > > > Reviewed-by: Björn Töpel <bjorn@rivosinc.com>
> > > > > Tested-by: Yipeng Zou <zouyipeng@huawei.com>
> > > > > Tested-by: Jisheng Zhang <jszhang@kernel.org>
> > > > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> > > > > Signed-off-by: Guo Ren <guoren@kernel.org>
> > > > > Cc: Ben Hutchings <ben@decadent.org.uk>
> > > >
> > > > Apologies for the late feedback but I've been swamped lately and only
> > > > recently got round to running the full kgdb test suite on the v6.4
> > > > series.
> > > >
> > > > The kgdb test suite includes a couple of tests that verify that the
> > > > system resumes after breakpointing due to a BUG():
> > > > https://github.com/daniel-thompson/kgdbtest/blob/master/tests/test_kdb_fault_injection.py#L24-L45
> > > >
> > > > These tests have regressed on riscv between v6.3 and v6.4 and a bisect
> > > > is pointing at this patch. With these changes in place then, after kdb
> > > > resumes the system, the BUG() message is printed as normal but then
> > > > immediately fails. From the backtrace it looks like the new entry/exit
> > > > code cannot advance past a compiled breakpoint instruction:
> > > > ~~~
> > > > PANIC: Fatal exception in interrupt
> > > It comes from:
> > > void die(struct pt_regs *regs, ...
> > > {
> > > ...
> > > if (in_interrupt())
> > >         panic("Fatal exception in interrupt");
> > > ...
> > >
> > > We could add a dump_backtrace to see what happened:
> > > if (in_interrupt()) {
> > > +      dump_backtrace(regs, NULL, KERN_DEFAULT);
> > Sorry, it should be:
> > +        dump_backtrace(NULL, NULL, KERN_DEFAULT);
> > We need current stack info, not exception context.
>
> I added this... and I also stopped kgdb from intercepting the panic()
> since that interferes with the console output from dump_backtrace().
>
> ~~~
> # /bin/echo BUG > /sys/kernel/debug/provoke-crash/DIRECT
> [    3.380565] lkdtm: Performing direct entry BUG
>
> Entering kdb (current=0xff6000000380ab00, pid 98) on processor 0 due to NonMaskable Interrupt @ 0xffffffff8064b844
> kdb> go
> Catastrophic error detected
> kdb_continue_catastrophic=0, type go a second time if you really want to continue
> kdb> go
> Catastrophic error detected
> kdb_continue_catastrophic=0, attempting to continue
> [    3.381411] ------------[ cut here ]------------
> [    3.381454] kernel BUG at drivers/misc/lkdtm/bugs.c:78!
> [    3.381609] Kernel BUG [#1]
> [    3.381632] Modules linked in:
> [    3.381734] CPU: 0 PID: 98 Comm: echo Not tainted 6.4.0-rc6-00004-ge6e9d4598760-dirty #126
> [    3.381817] Hardware name: riscv-virtio,qemu (DT)
> [    3.381885] epc : lkdtm_BUG+0x6/0x8
> [    3.381959]  ra : lkdtm_do_action+0x10/0x1c
> [    3.381978] epc : ffffffff8064b844 ra : ffffffff8064afb4 sp : ff200000008c3d30
> [    3.381991]  gp : ffffffff810665a0 tp : ff6000000380ab00 t0 : 6500000000000000
> [    3.382002]  t1 : 0000000000000001 t2 : 6550203a6d74646b s0 : ff200000008c3d40
> [    3.382012]  s1 : ff60000003988000 a0 : ffffffff80fc0260 a1 : ff6000003ffad788
> [    3.382023]  a2 : ff6000003ffb9530 a3 : 0000000000000000 a4 : 0000000000000000
> [    3.382034]  a5 : ffffffff8064b83e a6 : 0000000000000050 a7 : 0000000000040000
> [    3.382045]  s2 : 0000000000000004 s3 : ffffffff80fc0260 s4 : ff200000008c3e70
> [    3.382056]  s5 : ff600000033223a8 s6 : 00000000000f0cc0 s7 : ff60000002211000
> [    3.382066]  s8 : 00ffffffafc50c08 s9 : 00ffffffafc4b9b8 s10: 0000000000000000
> [    3.382077]  s11: 0000000000000001 t3 : 461f715700000000 t4 : 0000000000000002
> [    3.382087]  t5 : 0000000000000000 t6 : ff200000008c3b58
> [    3.382097] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003
> [    3.382139] [<ffffffff8064b844>] lkdtm_BUG+0x6/0x8
> [    3.382245] Code: 0513 9245 b097 0039 80e7 7f20 bf39 1141 e422 0800 (9002) 1141
> [    3.594697] ---[ end trace 0000000000000000 ]---
>
> At this point we expect a shell prompt since we should have taken the BUG(),
> killed the echo process and returned to the shell. However in v6.4 we get the
> following instead (including the instrumentation you asked for):

After comparing with arm64, I found that arm64 uses spinlock_irq to
protect the in_interrupt(). I think this would make in_interrupt() =
0.

So how about trying:

diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 5158961ea977..0ac914a99ee3 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -82,13 +82,15 @@ void die(struct pt_regs *regs, const char *str)

        bust_spinlocks(0);
        add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
-       spin_unlock_irqrestore(&die_lock, flags);
        oops_exit();

        if (in_interrupt())
                panic("Fatal exception in interrupt");
        if (panic_on_oops)
                panic("Fatal exception");
+
+       spin_unlock_irqrestore(&die_lock, flags);
+
        if (ret != NOTIFY_STOP)
                make_task_dead(SIGSEGV);
 }

>
> [    3.594801] [<ffffffff80005e3a>] dump_backtrace+0x1c/0x24
> [    3.594826] [<ffffffff800059f0>] die+0x228/0x238
> [    3.594835] [<ffffffff80005b38>] handle_break+0x9a/0xe0
> [    3.594843] [<ffffffff809f30d6>] do_trap_break+0x48/0x5c
> [    3.594854] [<ffffffff80003ee4>] ret_from_exception+0x0/0x64
> [    3.594862] [<ffffffff8064b844>] lkdtm_BUG+0x6/0x8
> [    3.594959] Kernel panic - not syncing: Fatal exception in interrupt
> [    3.595005] SMP: stopping secondary CPUs
> [    3.596444] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
> ~~~
>
>
> Daniel.
Guo Ren July 1, 2023, 3:08 a.m. UTC | #23
On Sat, Jul 1, 2023 at 10:55 AM Guo Ren <guoren@kernel.org> wrote:
>
> On Fri, Jun 30, 2023 at 10:51 PM Daniel Thompson
> <daniel.thompson@linaro.org> wrote:
> >
> > On Fri, Jun 30, 2023 at 07:22:40AM -0400, Guo Ren wrote:
> > > On Fri, Jun 30, 2023 at 7:16 AM Guo Ren <guoren@kernel.org> wrote:
> > > >
> > > > On Thu, Jun 29, 2023 at 10:02 AM Daniel Thompson
> > > > <daniel.thompson@linaro.org> wrote:
> > > > >
> > > > > On Tue, Feb 21, 2023 at 10:30:18PM -0500, guoren@kernel.org wrote:
> > > > > > From: Guo Ren <guoren@linux.alibaba.com>
> > > > > >
> > > > > > This patch converts riscv to use the generic entry infrastructure from
> > > > > > kernel/entry/*. The generic entry makes maintainers' work easier and
> > > > > > codes more elegant. Here are the changes:
> > > > > >
> > > > > >  - More clear entry.S with handle_exception and ret_from_exception
> > > > > >  - Get rid of complex custom signal implementation
> > > > > >  - Move syscall procedure from assembly to C, which is much more
> > > > > >    readable.
> > > > > >  - Connect ret_from_fork & ret_from_kernel_thread to generic entry.
> > > > > >  - Wrap with irqentry_enter/exit and syscall_enter/exit_from_user_mode
> > > > > >  - Use the standard preemption code instead of custom
> > > > > >
> > > > > > Suggested-by: Huacai Chen <chenhuacai@kernel.org>
> > > > > > Reviewed-by: Björn Töpel <bjorn@rivosinc.com>
> > > > > > Tested-by: Yipeng Zou <zouyipeng@huawei.com>
> > > > > > Tested-by: Jisheng Zhang <jszhang@kernel.org>
> > > > > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> > > > > > Signed-off-by: Guo Ren <guoren@kernel.org>
> > > > > > Cc: Ben Hutchings <ben@decadent.org.uk>
> > > > >
> > > > > Apologies for the late feedback but I've been swamped lately and only
> > > > > recently got round to running the full kgdb test suite on the v6.4
> > > > > series.
> > > > >
> > > > > The kgdb test suite includes a couple of tests that verify that the
> > > > > system resumes after breakpointing due to a BUG():
> > > > > https://github.com/daniel-thompson/kgdbtest/blob/master/tests/test_kdb_fault_injection.py#L24-L45
> > > > >
> > > > > These tests have regressed on riscv between v6.3 and v6.4 and a bisect
> > > > > is pointing at this patch. With these changes in place then, after kdb
> > > > > resumes the system, the BUG() message is printed as normal but then
> > > > > immediately fails. From the backtrace it looks like the new entry/exit
> > > > > code cannot advance past a compiled breakpoint instruction:
> > > > > ~~~
> > > > > PANIC: Fatal exception in interrupt
> > > > It comes from:
> > > > void die(struct pt_regs *regs, ...
> > > > {
> > > > ...
> > > > if (in_interrupt())
> > > >         panic("Fatal exception in interrupt");
> > > > ...
> > > >
> > > > We could add a dump_backtrace to see what happened:
> > > > if (in_interrupt()) {
> > > > +      dump_backtrace(regs, NULL, KERN_DEFAULT);
> > > Sorry, it should be:
> > > +        dump_backtrace(NULL, NULL, KERN_DEFAULT);
> > > We need current stack info, not exception context.
> >
> > I added this... and I also stopped kgdb from intercepting the panic()
> > since that interferes with the console output from dump_backtrace().
> >
> > ~~~
> > # /bin/echo BUG > /sys/kernel/debug/provoke-crash/DIRECT
> > [    3.380565] lkdtm: Performing direct entry BUG
> >
> > Entering kdb (current=0xff6000000380ab00, pid 98) on processor 0 due to NonMaskable Interrupt @ 0xffffffff8064b844
> > kdb> go
> > Catastrophic error detected
> > kdb_continue_catastrophic=0, type go a second time if you really want to continue
> > kdb> go
> > Catastrophic error detected
> > kdb_continue_catastrophic=0, attempting to continue
> > [    3.381411] ------------[ cut here ]------------
> > [    3.381454] kernel BUG at drivers/misc/lkdtm/bugs.c:78!
> > [    3.381609] Kernel BUG [#1]
> > [    3.381632] Modules linked in:
> > [    3.381734] CPU: 0 PID: 98 Comm: echo Not tainted 6.4.0-rc6-00004-ge6e9d4598760-dirty #126
> > [    3.381817] Hardware name: riscv-virtio,qemu (DT)
> > [    3.381885] epc : lkdtm_BUG+0x6/0x8
> > [    3.381959]  ra : lkdtm_do_action+0x10/0x1c
> > [    3.381978] epc : ffffffff8064b844 ra : ffffffff8064afb4 sp : ff200000008c3d30
> > [    3.381991]  gp : ffffffff810665a0 tp : ff6000000380ab00 t0 : 6500000000000000
> > [    3.382002]  t1 : 0000000000000001 t2 : 6550203a6d74646b s0 : ff200000008c3d40
> > [    3.382012]  s1 : ff60000003988000 a0 : ffffffff80fc0260 a1 : ff6000003ffad788
> > [    3.382023]  a2 : ff6000003ffb9530 a3 : 0000000000000000 a4 : 0000000000000000
> > [    3.382034]  a5 : ffffffff8064b83e a6 : 0000000000000050 a7 : 0000000000040000
> > [    3.382045]  s2 : 0000000000000004 s3 : ffffffff80fc0260 s4 : ff200000008c3e70
> > [    3.382056]  s5 : ff600000033223a8 s6 : 00000000000f0cc0 s7 : ff60000002211000
> > [    3.382066]  s8 : 00ffffffafc50c08 s9 : 00ffffffafc4b9b8 s10: 0000000000000000
> > [    3.382077]  s11: 0000000000000001 t3 : 461f715700000000 t4 : 0000000000000002
> > [    3.382087]  t5 : 0000000000000000 t6 : ff200000008c3b58
> > [    3.382097] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003
> > [    3.382139] [<ffffffff8064b844>] lkdtm_BUG+0x6/0x8
> > [    3.382245] Code: 0513 9245 b097 0039 80e7 7f20 bf39 1141 e422 0800 (9002) 1141
> > [    3.594697] ---[ end trace 0000000000000000 ]---
> >
> > At this point we expect a shell prompt since we should have taken the BUG(),
> > killed the echo process and returned to the shell. However in v6.4 we get the
> > following instead (including the instrumentation you asked for):
>
> After comparing with arm64, I found that arm64 uses spinlock_irq to
> protect the in_interrupt(). I think this would make in_interrupt() =
> 0.
>
> So how about trying:
>
> diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
> index 5158961ea977..0ac914a99ee3 100644
> --- a/arch/riscv/kernel/traps.c
> +++ b/arch/riscv/kernel/traps.c
> @@ -82,13 +82,15 @@ void die(struct pt_regs *regs, const char *str)
>
>         bust_spinlocks(0);
>         add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
> -       spin_unlock_irqrestore(&die_lock, flags);
>         oops_exit();
>
>         if (in_interrupt())
>                 panic("Fatal exception in interrupt");
>         if (panic_on_oops)
>                 panic("Fatal exception");
> +
> +       spin_unlock_irqrestore(&die_lock, flags);
En... It seems it's not correct, how can I reproduce your environment
on qemu? Sorry, I'm not familiar with kgdb.

> +
>         if (ret != NOTIFY_STOP)
>                 make_task_dead(SIGSEGV);
>  }
>
> >
> > [    3.594801] [<ffffffff80005e3a>] dump_backtrace+0x1c/0x24
> > [    3.594826] [<ffffffff800059f0>] die+0x228/0x238
> > [    3.594835] [<ffffffff80005b38>] handle_break+0x9a/0xe0
> > [    3.594843] [<ffffffff809f30d6>] do_trap_break+0x48/0x5c
> > [    3.594854] [<ffffffff80003ee4>] ret_from_exception+0x0/0x64
> > [    3.594862] [<ffffffff8064b844>] lkdtm_BUG+0x6/0x8
> > [    3.594959] Kernel panic - not syncing: Fatal exception in interrupt
> > [    3.595005] SMP: stopping secondary CPUs
> > [    3.596444] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
> > ~~~
> >
> >
> > Daniel.
>
>
>
> --
> Best Regards
>  Guo Ren
Guo Ren July 1, 2023, 4:22 a.m. UTC | #24
On Sat, Jul 1, 2023 at 11:08 AM Guo Ren <guoren@kernel.org> wrote:
>
> On Sat, Jul 1, 2023 at 10:55 AM Guo Ren <guoren@kernel.org> wrote:
> >
> > On Fri, Jun 30, 2023 at 10:51 PM Daniel Thompson
> > <daniel.thompson@linaro.org> wrote:
> > >
> > > On Fri, Jun 30, 2023 at 07:22:40AM -0400, Guo Ren wrote:
> > > > On Fri, Jun 30, 2023 at 7:16 AM Guo Ren <guoren@kernel.org> wrote:
> > > > >
> > > > > On Thu, Jun 29, 2023 at 10:02 AM Daniel Thompson
> > > > > <daniel.thompson@linaro.org> wrote:
> > > > > >
> > > > > > On Tue, Feb 21, 2023 at 10:30:18PM -0500, guoren@kernel.org wrote:
> > > > > > > From: Guo Ren <guoren@linux.alibaba.com>
> > > > > > >
> > > > > > > This patch converts riscv to use the generic entry infrastructure from
> > > > > > > kernel/entry/*. The generic entry makes maintainers' work easier and
> > > > > > > codes more elegant. Here are the changes:
> > > > > > >
> > > > > > >  - More clear entry.S with handle_exception and ret_from_exception
> > > > > > >  - Get rid of complex custom signal implementation
> > > > > > >  - Move syscall procedure from assembly to C, which is much more
> > > > > > >    readable.
> > > > > > >  - Connect ret_from_fork & ret_from_kernel_thread to generic entry.
> > > > > > >  - Wrap with irqentry_enter/exit and syscall_enter/exit_from_user_mode
> > > > > > >  - Use the standard preemption code instead of custom
> > > > > > >
> > > > > > > Suggested-by: Huacai Chen <chenhuacai@kernel.org>
> > > > > > > Reviewed-by: Björn Töpel <bjorn@rivosinc.com>
> > > > > > > Tested-by: Yipeng Zou <zouyipeng@huawei.com>
> > > > > > > Tested-by: Jisheng Zhang <jszhang@kernel.org>
> > > > > > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> > > > > > > Signed-off-by: Guo Ren <guoren@kernel.org>
> > > > > > > Cc: Ben Hutchings <ben@decadent.org.uk>
> > > > > >
> > > > > > Apologies for the late feedback but I've been swamped lately and only
> > > > > > recently got round to running the full kgdb test suite on the v6.4
> > > > > > series.
> > > > > >
> > > > > > The kgdb test suite includes a couple of tests that verify that the
> > > > > > system resumes after breakpointing due to a BUG():
> > > > > > https://github.com/daniel-thompson/kgdbtest/blob/master/tests/test_kdb_fault_injection.py#L24-L45
> > > > > >
> > > > > > These tests have regressed on riscv between v6.3 and v6.4 and a bisect
> > > > > > is pointing at this patch. With these changes in place then, after kdb
> > > > > > resumes the system, the BUG() message is printed as normal but then
> > > > > > immediately fails. From the backtrace it looks like the new entry/exit
> > > > > > code cannot advance past a compiled breakpoint instruction:
> > > > > > ~~~
> > > > > > PANIC: Fatal exception in interrupt
> > > > > It comes from:
> > > > > void die(struct pt_regs *regs, ...
> > > > > {
> > > > > ...
> > > > > if (in_interrupt())
> > > > >         panic("Fatal exception in interrupt");
> > > > > ...
> > > > >
> > > > > We could add a dump_backtrace to see what happened:
> > > > > if (in_interrupt()) {
> > > > > +      dump_backtrace(regs, NULL, KERN_DEFAULT);
> > > > Sorry, it should be:
> > > > +        dump_backtrace(NULL, NULL, KERN_DEFAULT);
> > > > We need current stack info, not exception context.
> > >
> > > I added this... and I also stopped kgdb from intercepting the panic()
> > > since that interferes with the console output from dump_backtrace().
> > >
> > > ~~~
> > > # /bin/echo BUG > /sys/kernel/debug/provoke-crash/DIRECT
> > > [    3.380565] lkdtm: Performing direct entry BUG
> > >
> > > Entering kdb (current=0xff6000000380ab00, pid 98) on processor 0 due to NonMaskable Interrupt @ 0xffffffff8064b844
> > > kdb> go
> > > Catastrophic error detected
> > > kdb_continue_catastrophic=0, type go a second time if you really want to continue
> > > kdb> go
> > > Catastrophic error detected
> > > kdb_continue_catastrophic=0, attempting to continue
> > > [    3.381411] ------------[ cut here ]------------
> > > [    3.381454] kernel BUG at drivers/misc/lkdtm/bugs.c:78!
> > > [    3.381609] Kernel BUG [#1]
> > > [    3.381632] Modules linked in:
> > > [    3.381734] CPU: 0 PID: 98 Comm: echo Not tainted 6.4.0-rc6-00004-ge6e9d4598760-dirty #126
> > > [    3.381817] Hardware name: riscv-virtio,qemu (DT)
> > > [    3.381885] epc : lkdtm_BUG+0x6/0x8
> > > [    3.381959]  ra : lkdtm_do_action+0x10/0x1c
> > > [    3.381978] epc : ffffffff8064b844 ra : ffffffff8064afb4 sp : ff200000008c3d30
> > > [    3.381991]  gp : ffffffff810665a0 tp : ff6000000380ab00 t0 : 6500000000000000
> > > [    3.382002]  t1 : 0000000000000001 t2 : 6550203a6d74646b s0 : ff200000008c3d40
> > > [    3.382012]  s1 : ff60000003988000 a0 : ffffffff80fc0260 a1 : ff6000003ffad788
> > > [    3.382023]  a2 : ff6000003ffb9530 a3 : 0000000000000000 a4 : 0000000000000000
> > > [    3.382034]  a5 : ffffffff8064b83e a6 : 0000000000000050 a7 : 0000000000040000
> > > [    3.382045]  s2 : 0000000000000004 s3 : ffffffff80fc0260 s4 : ff200000008c3e70
> > > [    3.382056]  s5 : ff600000033223a8 s6 : 00000000000f0cc0 s7 : ff60000002211000
> > > [    3.382066]  s8 : 00ffffffafc50c08 s9 : 00ffffffafc4b9b8 s10: 0000000000000000
> > > [    3.382077]  s11: 0000000000000001 t3 : 461f715700000000 t4 : 0000000000000002
> > > [    3.382087]  t5 : 0000000000000000 t6 : ff200000008c3b58
> > > [    3.382097] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003
> > > [    3.382139] [<ffffffff8064b844>] lkdtm_BUG+0x6/0x8
> > > [    3.382245] Code: 0513 9245 b097 0039 80e7 7f20 bf39 1141 e422 0800 (9002) 1141
> > > [    3.594697] ---[ end trace 0000000000000000 ]---
> > >
> > > At this point we expect a shell prompt since we should have taken the BUG(),
> > > killed the echo process and returned to the shell. However in v6.4 we get the
> > > following instead (including the instrumentation you asked for):
> >
> > After comparing with arm64, I found that arm64 uses spinlock_irq to
> > protect the in_interrupt(). I think this would make in_interrupt() =
> > 0.
> >
> > So how about trying:
> >
> > diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
> > index 5158961ea977..0ac914a99ee3 100644
> > --- a/arch/riscv/kernel/traps.c
> > +++ b/arch/riscv/kernel/traps.c
> > @@ -82,13 +82,15 @@ void die(struct pt_regs *regs, const char *str)
> >
> >         bust_spinlocks(0);
> >         add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
> > -       spin_unlock_irqrestore(&die_lock, flags);
> >         oops_exit();
> >
> >         if (in_interrupt())
> >                 panic("Fatal exception in interrupt");
> >         if (panic_on_oops)
> >                 panic("Fatal exception");
> > +
> > +       spin_unlock_irqrestore(&die_lock, flags);
> En... It seems it's not correct, how can I reproduce your environment
> on qemu? Sorry, I'm not familiar with kgdb.

I got it:
Normal is:
# mount -t debugfs none /sys/kernel/debug/
# /bin/echo BUG > /sys/kernel/debug/provoke-crash/DIRECT
[    8.948041] lkdtm: Performing direct entry BUG
[    8.949228] ------------[ cut here ]------------
[    8.949640] kernel BUG at drivers/misc/lkdtm/bugs.c:78!
[    8.950534] Kernel BUG [#1]
[    8.950944] Modules linked in:
[    8.951805] CPU: 0 PID: 106 Comm: echo Not tainted
6.3.0-rc2-00295-gb4e5219985e8 #22
[    8.952831] Hardware name: riscv-virtio,qemu (DT)
[    8.953587] epc : lkdtm_BUG+0x6/0x8
[    8.954232]  ra : lkdtm_do_action+0x14/0x1c
[    8.954713] epc : ffffffff805549e2 ra : ffffffff8087245c sp :
ff2000000081bd60
[    8.955378]  gp : ffffffff814ffec0 tp : ff600000023c8000 t0 :
6500000000000000
[    8.956029]  t1 : 000000000000006c t2 : 6550203a6d74646b s0 :
ff2000000081bd70
[    8.956699]  s1 : ffffffff814bee50 a0 : ffffffff814bee50 a1 :
ff6000001fbd8608
[    8.957381]  a2 : ff6000001fbdb868 a3 : 0000000000000000 a4 :
0000000000000000
[    8.958035]  a5 : ffffffff805549dc a6 : 0000000000000032 a7 :
0000000000000038
[    8.958708]  s2 : 0000000000000004 s3 : 00000000556371a0 s4 :
ff2000000081be90
[    8.959397]  s5 : ff60000001c90000 s6 : 00000000556371a0 s7 :
0000000000000030
[    8.960053]  s8 : 000000007fffec78 s9 : 0000000000000007 s10:
0000000055637480
[    8.960717]  s11: 0000000000000001 t3 : ffffffff81512e97 t4 :
ffffffff81512e97
[    8.961379]  t5 : ffffffff81512e98 t6 : ff2000000081bba8
[    8.961888] status: 0000000100000120 badaddr: 0000000000000000
cause: 0000000000000003
[    8.962923] [<ffffffff805549e2>] lkdtm_BUG+0x6/0x8
[    8.964194] Code: 0513 d665 7097 0031 80e7 f000 b705 1141 e422 0800
(9002) 1141
[    8.965847] ---[ end trace 0000000000000000 ]---
[    8.966637] note: echo[106] exited with irqs disabled
Segmentation fault
#

After generic_entry:
# mount -t debugfs none /sys/kernel/debug/
# /bin/echo BUG > /sys/kernel/debug/provoke-crash/DIRECT
[    8.152247] lkdtm: Performing direct entry BUG
[    8.153652] ------------[ cut here ]------------
[    8.153825] kernel BUG at drivers/misc/lkdtm/bugs.c:78!
[    8.154341] Kernel BUG [#1]
[    8.154440] Modules linked in:
[    8.154918] CPU: 0 PID: 106 Comm: echo Not tainted
6.4.0-rc1-00055-g0ca05a4b079f #21
[    8.155301] Hardware name: riscv-virtio,qemu (DT)
[    8.155581] epc : lkdtm_BUG+0x6/0x8
[    8.155880]  ra : lkdtm_do_action+0x14/0x1c
[    8.155977] epc : ffffffff8059d4b4 ra : ffffffff808c1a84 sp :
ff2000000081bd40
[    8.156030]  gp : ffffffff81503c08 tp : ff600000028ebac0 t0 :
6500000000000000
[    8.156079]  t1 : 000000000000006c t2 : 6550203a6d74646b s0 :
ff2000000081bd50
[    8.156144]  s1 : ffffffff814c2e88 a0 : ffffffff814c2e88 a1 :
ff6000001ffd8608
[    8.156193]  a2 : ff6000001ffdb870 a3 : 0000000000000000 a4 :
0000000000000000
[    8.156241]  a5 : ffffffff8059d4ae a6 : 0000000000000032 a7 :
0000000000000038
[    8.156288]  s2 : 0000000000000004 s3 : 00000000556371a0 s4 :
ff2000000081be70
[    8.156335]  s5 : ff60000002090000 s6 : 00000000556371a0 s7 :
0000000000000030
[    8.156382]  s8 : 000000007fffec78 s9 : 0000000000000007 s10:
0000000055637480
[    8.156428]  s11: 0000000000000001 t3 : ffffffff815173d7 t4 :
ffffffff815173d7
[    8.156473]  t5 : ffffffff815173d8 t6 : ff2000000081bb88
[    8.156516] status: 0000000100000120 badaddr: 0000000000000000
cause: 0000000000000003
[    8.156830] [<ffffffff8059d4b4>] lkdtm_BUG+0x6/0x8
[    8.157630] Code: 0513 1745 d097 0031 80e7 70a0 b705 1141 e422 0800
(9002) 1141
[    8.169646] ---[ end trace 0000000000000000 ]---
[    8.170148] Kernel panic - not syncing: Fatal exception in interrupt
[    8.171839] ---[ end Kernel panic - not syncing: Fatal exception in
interrupt ]---

I'm debugging on it, and soon give the patch.

>
> > +
> >         if (ret != NOTIFY_STOP)
> >                 make_task_dead(SIGSEGV);
> >  }
> >
> > >
> > > [    3.594801] [<ffffffff80005e3a>] dump_backtrace+0x1c/0x24
> > > [    3.594826] [<ffffffff800059f0>] die+0x228/0x238
> > > [    3.594835] [<ffffffff80005b38>] handle_break+0x9a/0xe0
> > > [    3.594843] [<ffffffff809f30d6>] do_trap_break+0x48/0x5c
> > > [    3.594854] [<ffffffff80003ee4>] ret_from_exception+0x0/0x64
> > > [    3.594862] [<ffffffff8064b844>] lkdtm_BUG+0x6/0x8
> > > [    3.594959] Kernel panic - not syncing: Fatal exception in interrupt
> > > [    3.595005] SMP: stopping secondary CPUs
> > > [    3.596444] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
> > > ~~~
> > >
> > >
> > > Daniel.
> >
> >
> >
> > --
> > Best Regards
> >  Guo Ren
>
>
>
> --
> Best Regards
>  Guo Ren
Guo Ren July 3, 2023, 3:09 a.m. UTC | #25
On Sat, Jul 1, 2023 at 12:22 PM Guo Ren <guoren@kernel.org> wrote:
>
> On Sat, Jul 1, 2023 at 11:08 AM Guo Ren <guoren@kernel.org> wrote:
> >
> > On Sat, Jul 1, 2023 at 10:55 AM Guo Ren <guoren@kernel.org> wrote:
> > >
> > > On Fri, Jun 30, 2023 at 10:51 PM Daniel Thompson
> > > <daniel.thompson@linaro.org> wrote:
> > > >
> > > > On Fri, Jun 30, 2023 at 07:22:40AM -0400, Guo Ren wrote:
> > > > > On Fri, Jun 30, 2023 at 7:16 AM Guo Ren <guoren@kernel.org> wrote:
> > > > > >
> > > > > > On Thu, Jun 29, 2023 at 10:02 AM Daniel Thompson
> > > > > > <daniel.thompson@linaro.org> wrote:
> > > > > > >
> > > > > > > On Tue, Feb 21, 2023 at 10:30:18PM -0500, guoren@kernel.org wrote:
> > > > > > > > From: Guo Ren <guoren@linux.alibaba.com>
> > > > > > > >
> > > > > > > > This patch converts riscv to use the generic entry infrastructure from
> > > > > > > > kernel/entry/*. The generic entry makes maintainers' work easier and
> > > > > > > > codes more elegant. Here are the changes:
> > > > > > > >
> > > > > > > >  - More clear entry.S with handle_exception and ret_from_exception
> > > > > > > >  - Get rid of complex custom signal implementation
> > > > > > > >  - Move syscall procedure from assembly to C, which is much more
> > > > > > > >    readable.
> > > > > > > >  - Connect ret_from_fork & ret_from_kernel_thread to generic entry.
> > > > > > > >  - Wrap with irqentry_enter/exit and syscall_enter/exit_from_user_mode
> > > > > > > >  - Use the standard preemption code instead of custom
> > > > > > > >
> > > > > > > > Suggested-by: Huacai Chen <chenhuacai@kernel.org>
> > > > > > > > Reviewed-by: Björn Töpel <bjorn@rivosinc.com>
> > > > > > > > Tested-by: Yipeng Zou <zouyipeng@huawei.com>
> > > > > > > > Tested-by: Jisheng Zhang <jszhang@kernel.org>
> > > > > > > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> > > > > > > > Signed-off-by: Guo Ren <guoren@kernel.org>
> > > > > > > > Cc: Ben Hutchings <ben@decadent.org.uk>
> > > > > > >
> > > > > > > Apologies for the late feedback but I've been swamped lately and only
> > > > > > > recently got round to running the full kgdb test suite on the v6.4
> > > > > > > series.
> > > > > > >
> > > > > > > The kgdb test suite includes a couple of tests that verify that the
> > > > > > > system resumes after breakpointing due to a BUG():
> > > > > > > https://github.com/daniel-thompson/kgdbtest/blob/master/tests/test_kdb_fault_injection.py#L24-L45
> > > > > > >
> > > > > > > These tests have regressed on riscv between v6.3 and v6.4 and a bisect
> > > > > > > is pointing at this patch. With these changes in place then, after kdb
> > > > > > > resumes the system, the BUG() message is printed as normal but then
> > > > > > > immediately fails. From the backtrace it looks like the new entry/exit
> > > > > > > code cannot advance past a compiled breakpoint instruction:
> > > > > > > ~~~
> > > > > > > PANIC: Fatal exception in interrupt
> > > > > > It comes from:
> > > > > > void die(struct pt_regs *regs, ...
> > > > > > {
> > > > > > ...
> > > > > > if (in_interrupt())
> > > > > >         panic("Fatal exception in interrupt");
> > > > > > ...
> > > > > >
> > > > > > We could add a dump_backtrace to see what happened:
> > > > > > if (in_interrupt()) {
> > > > > > +      dump_backtrace(regs, NULL, KERN_DEFAULT);
> > > > > Sorry, it should be:
> > > > > +        dump_backtrace(NULL, NULL, KERN_DEFAULT);
> > > > > We need current stack info, not exception context.
> > > >
> > > > I added this... and I also stopped kgdb from intercepting the panic()
> > > > since that interferes with the console output from dump_backtrace().
> > > >
> > > > ~~~
> > > > # /bin/echo BUG > /sys/kernel/debug/provoke-crash/DIRECT
> > > > [    3.380565] lkdtm: Performing direct entry BUG
> > > >
> > > > Entering kdb (current=0xff6000000380ab00, pid 98) on processor 0 due to NonMaskable Interrupt @ 0xffffffff8064b844
> > > > kdb> go
> > > > Catastrophic error detected
> > > > kdb_continue_catastrophic=0, type go a second time if you really want to continue
> > > > kdb> go
> > > > Catastrophic error detected
> > > > kdb_continue_catastrophic=0, attempting to continue
> > > > [    3.381411] ------------[ cut here ]------------
> > > > [    3.381454] kernel BUG at drivers/misc/lkdtm/bugs.c:78!
> > > > [    3.381609] Kernel BUG [#1]
> > > > [    3.381632] Modules linked in:
> > > > [    3.381734] CPU: 0 PID: 98 Comm: echo Not tainted 6.4.0-rc6-00004-ge6e9d4598760-dirty #126
> > > > [    3.381817] Hardware name: riscv-virtio,qemu (DT)
> > > > [    3.381885] epc : lkdtm_BUG+0x6/0x8
> > > > [    3.381959]  ra : lkdtm_do_action+0x10/0x1c
> > > > [    3.381978] epc : ffffffff8064b844 ra : ffffffff8064afb4 sp : ff200000008c3d30
> > > > [    3.381991]  gp : ffffffff810665a0 tp : ff6000000380ab00 t0 : 6500000000000000
> > > > [    3.382002]  t1 : 0000000000000001 t2 : 6550203a6d74646b s0 : ff200000008c3d40
> > > > [    3.382012]  s1 : ff60000003988000 a0 : ffffffff80fc0260 a1 : ff6000003ffad788
> > > > [    3.382023]  a2 : ff6000003ffb9530 a3 : 0000000000000000 a4 : 0000000000000000
> > > > [    3.382034]  a5 : ffffffff8064b83e a6 : 0000000000000050 a7 : 0000000000040000
> > > > [    3.382045]  s2 : 0000000000000004 s3 : ffffffff80fc0260 s4 : ff200000008c3e70
> > > > [    3.382056]  s5 : ff600000033223a8 s6 : 00000000000f0cc0 s7 : ff60000002211000
> > > > [    3.382066]  s8 : 00ffffffafc50c08 s9 : 00ffffffafc4b9b8 s10: 0000000000000000
> > > > [    3.382077]  s11: 0000000000000001 t3 : 461f715700000000 t4 : 0000000000000002
> > > > [    3.382087]  t5 : 0000000000000000 t6 : ff200000008c3b58
> > > > [    3.382097] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003
> > > > [    3.382139] [<ffffffff8064b844>] lkdtm_BUG+0x6/0x8
> > > > [    3.382245] Code: 0513 9245 b097 0039 80e7 7f20 bf39 1141 e422 0800 (9002) 1141
> > > > [    3.594697] ---[ end trace 0000000000000000 ]---
> > > >
> > > > At this point we expect a shell prompt since we should have taken the BUG(),
> > > > killed the echo process and returned to the shell. However in v6.4 we get the
> > > > following instead (including the instrumentation you asked for):
> > >
> > > After comparing with arm64, I found that arm64 uses spinlock_irq to
> > > protect the in_interrupt(). I think this would make in_interrupt() =
> > > 0.
> > >
> > > So how about trying:
> > >
> > > diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
> > > index 5158961ea977..0ac914a99ee3 100644
> > > --- a/arch/riscv/kernel/traps.c
> > > +++ b/arch/riscv/kernel/traps.c
> > > @@ -82,13 +82,15 @@ void die(struct pt_regs *regs, const char *str)
> > >
> > >         bust_spinlocks(0);
> > >         add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
> > > -       spin_unlock_irqrestore(&die_lock, flags);
> > >         oops_exit();
> > >
> > >         if (in_interrupt())
> > >                 panic("Fatal exception in interrupt");
> > >         if (panic_on_oops)
> > >                 panic("Fatal exception");
> > > +
> > > +       spin_unlock_irqrestore(&die_lock, flags);
> > En... It seems it's not correct, how can I reproduce your environment
> > on qemu? Sorry, I'm not familiar with kgdb.
>
> I got it:
> Normal is:
> # mount -t debugfs none /sys/kernel/debug/
> # /bin/echo BUG > /sys/kernel/debug/provoke-crash/DIRECT
> [    8.948041] lkdtm: Performing direct entry BUG
> [    8.949228] ------------[ cut here ]------------
> [    8.949640] kernel BUG at drivers/misc/lkdtm/bugs.c:78!
> [    8.950534] Kernel BUG [#1]
> [    8.950944] Modules linked in:
> [    8.951805] CPU: 0 PID: 106 Comm: echo Not tainted
> 6.3.0-rc2-00295-gb4e5219985e8 #22
> [    8.952831] Hardware name: riscv-virtio,qemu (DT)
> [    8.953587] epc : lkdtm_BUG+0x6/0x8
> [    8.954232]  ra : lkdtm_do_action+0x14/0x1c
> [    8.954713] epc : ffffffff805549e2 ra : ffffffff8087245c sp :
> ff2000000081bd60
> [    8.955378]  gp : ffffffff814ffec0 tp : ff600000023c8000 t0 :
> 6500000000000000
> [    8.956029]  t1 : 000000000000006c t2 : 6550203a6d74646b s0 :
> ff2000000081bd70
> [    8.956699]  s1 : ffffffff814bee50 a0 : ffffffff814bee50 a1 :
> ff6000001fbd8608
> [    8.957381]  a2 : ff6000001fbdb868 a3 : 0000000000000000 a4 :
> 0000000000000000
> [    8.958035]  a5 : ffffffff805549dc a6 : 0000000000000032 a7 :
> 0000000000000038
> [    8.958708]  s2 : 0000000000000004 s3 : 00000000556371a0 s4 :
> ff2000000081be90
> [    8.959397]  s5 : ff60000001c90000 s6 : 00000000556371a0 s7 :
> 0000000000000030
> [    8.960053]  s8 : 000000007fffec78 s9 : 0000000000000007 s10:
> 0000000055637480
> [    8.960717]  s11: 0000000000000001 t3 : ffffffff81512e97 t4 :
> ffffffff81512e97
> [    8.961379]  t5 : ffffffff81512e98 t6 : ff2000000081bba8
> [    8.961888] status: 0000000100000120 badaddr: 0000000000000000
> cause: 0000000000000003
> [    8.962923] [<ffffffff805549e2>] lkdtm_BUG+0x6/0x8
> [    8.964194] Code: 0513 d665 7097 0031 80e7 f000 b705 1141 e422 0800
> (9002) 1141
> [    8.965847] ---[ end trace 0000000000000000 ]---
> [    8.966637] note: echo[106] exited with irqs disabled
> Segmentation fault
> #
>
> After generic_entry:
> # mount -t debugfs none /sys/kernel/debug/
> # /bin/echo BUG > /sys/kernel/debug/provoke-crash/DIRECT
> [    8.152247] lkdtm: Performing direct entry BUG
> [    8.153652] ------------[ cut here ]------------
> [    8.153825] kernel BUG at drivers/misc/lkdtm/bugs.c:78!
> [    8.154341] Kernel BUG [#1]
> [    8.154440] Modules linked in:
> [    8.154918] CPU: 0 PID: 106 Comm: echo Not tainted
> 6.4.0-rc1-00055-g0ca05a4b079f #21
> [    8.155301] Hardware name: riscv-virtio,qemu (DT)
> [    8.155581] epc : lkdtm_BUG+0x6/0x8
> [    8.155880]  ra : lkdtm_do_action+0x14/0x1c
> [    8.155977] epc : ffffffff8059d4b4 ra : ffffffff808c1a84 sp :
> ff2000000081bd40
> [    8.156030]  gp : ffffffff81503c08 tp : ff600000028ebac0 t0 :
> 6500000000000000
> [    8.156079]  t1 : 000000000000006c t2 : 6550203a6d74646b s0 :
> ff2000000081bd50
> [    8.156144]  s1 : ffffffff814c2e88 a0 : ffffffff814c2e88 a1 :
> ff6000001ffd8608
> [    8.156193]  a2 : ff6000001ffdb870 a3 : 0000000000000000 a4 :
> 0000000000000000
> [    8.156241]  a5 : ffffffff8059d4ae a6 : 0000000000000032 a7 :
> 0000000000000038
> [    8.156288]  s2 : 0000000000000004 s3 : 00000000556371a0 s4 :
> ff2000000081be70
> [    8.156335]  s5 : ff60000002090000 s6 : 00000000556371a0 s7 :
> 0000000000000030
> [    8.156382]  s8 : 000000007fffec78 s9 : 0000000000000007 s10:
> 0000000055637480
> [    8.156428]  s11: 0000000000000001 t3 : ffffffff815173d7 t4 :
> ffffffff815173d7
> [    8.156473]  t5 : ffffffff815173d8 t6 : ff2000000081bb88
> [    8.156516] status: 0000000100000120 badaddr: 0000000000000000
> cause: 0000000000000003
> [    8.156830] [<ffffffff8059d4b4>] lkdtm_BUG+0x6/0x8
> [    8.157630] Code: 0513 1745 d097 0031 80e7 70a0 b705 1141 e422 0800
> (9002) 1141
> [    8.169646] ---[ end trace 0000000000000000 ]---
> [    8.170148] Kernel panic - not syncing: Fatal exception in interrupt
> [    8.171839] ---[ end Kernel panic - not syncing: Fatal exception in
> interrupt ]---
>
> I'm debugging on it, and soon give the patch.
Thx, Daniel, for the report; here is the fixup:
https://lore.kernel.org/linux-riscv/20230702025708.784106-1-guoren@kernel.org/

I've tested, and it's okay now.
# /bin/echo BUG > /sys/kernel/debug/provoke-crash/DIRECT

>
> >
> > > +
> > >         if (ret != NOTIFY_STOP)
> > >                 make_task_dead(SIGSEGV);
> > >  }
> > >
> > > >
> > > > [    3.594801] [<ffffffff80005e3a>] dump_backtrace+0x1c/0x24
> > > > [    3.594826] [<ffffffff800059f0>] die+0x228/0x238
> > > > [    3.594835] [<ffffffff80005b38>] handle_break+0x9a/0xe0
> > > > [    3.594843] [<ffffffff809f30d6>] do_trap_break+0x48/0x5c
> > > > [    3.594854] [<ffffffff80003ee4>] ret_from_exception+0x0/0x64
> > > > [    3.594862] [<ffffffff8064b844>] lkdtm_BUG+0x6/0x8
> > > > [    3.594959] Kernel panic - not syncing: Fatal exception in interrupt
> > > > [    3.595005] SMP: stopping secondary CPUs
> > > > [    3.596444] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
> > > > ~~~
> > > >
> > > >
> > > > Daniel.
> > >
> > >
> > >
> > > --
> > > Best Regards
> > >  Guo Ren
> >
> >
> >
> > --
> > Best Regards
> >  Guo Ren
>
>
>
> --
> Best Regards
>  Guo Ren
diff mbox series

Patch

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 81eb031887d2..7e27c8c01ae8 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -61,6 +61,7 @@  config RISCV
 	select GENERIC_ATOMIC64 if !64BIT
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
 	select GENERIC_EARLY_IOREMAP
+	select GENERIC_ENTRY
 	select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO
 	select GENERIC_IDLE_POLL_SETUP
 	select GENERIC_IOREMAP if MMU
diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
index ef386fcf3939..61ba8ed43d8f 100644
--- a/arch/riscv/include/asm/asm-prototypes.h
+++ b/arch/riscv/include/asm/asm-prototypes.h
@@ -27,5 +27,7 @@  DECLARE_DO_ERROR_INFO(do_trap_break);
 
 asmlinkage unsigned long get_overflow_stack(void);
 asmlinkage void handle_bad_stack(struct pt_regs *regs);
+asmlinkage void do_page_fault(struct pt_regs *regs);
+asmlinkage void do_irq(struct pt_regs *regs);
 
 #endif /* _ASM_RISCV_PROTOTYPES_H */
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 0e571f6483d9..7c2b8cdb7b77 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -40,7 +40,6 @@ 
 #define SR_UXL		_AC(0x300000000, UL) /* XLEN mask for U-mode */
 #define SR_UXL_32	_AC(0x100000000, UL) /* XLEN = 32 for U-mode */
 #define SR_UXL_64	_AC(0x200000000, UL) /* XLEN = 64 for U-mode */
-#define SR_UXL_SHIFT	32
 #endif
 
 /* SATP flags */
diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h
new file mode 100644
index 000000000000..6e4dee49d84b
--- /dev/null
+++ b/arch/riscv/include/asm/entry-common.h
@@ -0,0 +1,11 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_RISCV_ENTRY_COMMON_H
+#define _ASM_RISCV_ENTRY_COMMON_H
+
+#include <asm/stacktrace.h>
+
+void handle_page_fault(struct pt_regs *regs);
+void handle_break(struct pt_regs *regs);
+
+#endif /* _ASM_RISCV_ENTRY_COMMON_H */
diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h
index 6ecd461129d2..b5b0adcc85c1 100644
--- a/arch/riscv/include/asm/ptrace.h
+++ b/arch/riscv/include/asm/ptrace.h
@@ -53,6 +53,9 @@  struct pt_regs {
 	unsigned long orig_a0;
 };
 
+#define PTRACE_SYSEMU			0x1f
+#define PTRACE_SYSEMU_SINGLESTEP	0x20
+
 #ifdef CONFIG_64BIT
 #define REG_FMT "%016lx"
 #else
@@ -121,8 +124,6 @@  extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
 
 void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 			   unsigned long frame_pointer);
-int do_syscall_trace_enter(struct pt_regs *regs);
-void do_syscall_trace_exit(struct pt_regs *regs);
 
 /**
  * regs_get_register() - get register value from its offset
@@ -172,6 +173,11 @@  static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
 	return 0;
 }
 
+static inline int regs_irqs_disabled(struct pt_regs *regs)
+{
+	return !(regs->status & SR_PIE);
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_RISCV_PTRACE_H */
diff --git a/arch/riscv/include/asm/stacktrace.h b/arch/riscv/include/asm/stacktrace.h
index 3450c1912afd..f7e8ef2418b9 100644
--- a/arch/riscv/include/asm/stacktrace.h
+++ b/arch/riscv/include/asm/stacktrace.h
@@ -16,4 +16,9 @@  extern void notrace walk_stackframe(struct task_struct *task, struct pt_regs *re
 extern void dump_backtrace(struct pt_regs *regs, struct task_struct *task,
 			   const char *loglvl);
 
+static inline bool on_thread_stack(void)
+{
+	return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1));
+}
+
 #endif /* _ASM_RISCV_STACKTRACE_H */
diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index 384a63b86420..736110e1fd78 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -74,5 +74,26 @@  static inline int syscall_get_arch(struct task_struct *task)
 #endif
 }
 
+typedef long (*syscall_t)(ulong, ulong, ulong, ulong, ulong, ulong, ulong);
+static inline void syscall_handler(struct pt_regs *regs, ulong syscall)
+{
+	syscall_t fn;
+
+#ifdef CONFIG_COMPAT
+	if ((regs->status & SR_UXL) == SR_UXL_32)
+		fn = compat_sys_call_table[syscall];
+	else
+#endif
+		fn = sys_call_table[syscall];
+
+	regs->a0 = fn(regs->orig_a0, regs->a1, regs->a2,
+		      regs->a3, regs->a4, regs->a5, regs->a6);
+}
+
+static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
+{
+	return false;
+}
+
 asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t);
 #endif	/* _ASM_RISCV_SYSCALL_H */
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index f704c8dd57e0..e0d202134b44 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -67,6 +67,7 @@  struct thread_info {
 	long			kernel_sp;	/* Kernel stack pointer */
 	long			user_sp;	/* User stack pointer */
 	int			cpu;
+	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
 };
 
 /*
@@ -89,26 +90,18 @@  struct thread_info {
  * - pending work-to-be-done flags are in lowest half-word
  * - other flags in upper half-word(s)
  */
-#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
 #define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_RESTORE_SIGMASK	4	/* restore signal mask in do_signal() */
 #define TIF_MEMDIE		5	/* is terminating due to OOM killer */
-#define TIF_SYSCALL_TRACEPOINT  6       /* syscall tracepoint instrumentation */
-#define TIF_SYSCALL_AUDIT	7	/* syscall auditing */
-#define TIF_SECCOMP		8	/* syscall secure computing */
 #define TIF_NOTIFY_SIGNAL	9	/* signal notifications exist */
 #define TIF_UPROBE		10	/* uprobe breakpoint or singlestep */
 #define TIF_32BIT		11	/* compat-mode 32bit process */
 
-#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
-#define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
-#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
-#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
 #define _TIF_UPROBE		(1 << TIF_UPROBE)
 
@@ -116,8 +109,4 @@  struct thread_info {
 	(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \
 	 _TIF_NOTIFY_SIGNAL | _TIF_UPROBE)
 
-#define _TIF_SYSCALL_WORK \
-	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT | \
-	 _TIF_SECCOMP)
-
 #endif /* _ASM_RISCV_THREAD_INFO_H */
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 99d38fdf8b18..bc322f92ba34 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -14,11 +14,7 @@ 
 #include <asm/asm-offsets.h>
 #include <asm/errata_list.h>
 
-#if !IS_ENABLED(CONFIG_PREEMPTION)
-.set resume_kernel, restore_all
-#endif
-
-ENTRY(handle_exception)
+SYM_CODE_START(handle_exception)
 	/*
 	 * If coming from userspace, preserve the user thread pointer and load
 	 * the kernel thread pointer.  If we came from the kernel, the scratch
@@ -106,19 +102,8 @@  _save_context:
 .option norelax
 	la gp, __global_pointer$
 .option pop
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-	call __trace_hardirqs_off
-#endif
-
-#ifdef CONFIG_CONTEXT_TRACKING_USER
-	/* If previous state is in user mode, call user_exit_callable(). */
-	li   a0, SR_PP
-	and a0, s1, a0
-	bnez a0, skip_context_tracking
-	call user_exit_callable
-skip_context_tracking:
-#endif
+	move a0, sp /* pt_regs */
+	la ra, ret_from_exception
 
 	/*
 	 * MSB of cause differentiates between
@@ -126,38 +111,13 @@  skip_context_tracking:
 	 */
 	bge s4, zero, 1f
 
-	la ra, ret_from_exception
-
 	/* Handle interrupts */
-	move a0, sp /* pt_regs */
-	la a1, generic_handle_arch_irq
-	jr a1
-1:
-	/*
-	 * Exceptions run with interrupts enabled or disabled depending on the
-	 * state of SR_PIE in m/sstatus.
-	 */
-	andi t0, s1, SR_PIE
-	beqz t0, 1f
-	/* kprobes, entered via ebreak, must have interrupts disabled. */
-	li t0, EXC_BREAKPOINT
-	beq s4, t0, 1f
-#ifdef CONFIG_TRACE_IRQFLAGS
-	call __trace_hardirqs_on
-#endif
-	csrs CSR_STATUS, SR_IE
-
+	tail do_irq
 1:
-	la ra, ret_from_exception
-	/* Handle syscalls */
-	li t0, EXC_SYSCALL
-	beq s4, t0, handle_syscall
-
 	/* Handle other exceptions */
 	slli t0, s4, RISCV_LGPTR
 	la t1, excp_vect_table
 	la t2, excp_vect_table_end
-	move a0, sp /* pt_regs */
 	add t0, t1, t0
 	/* Check if exception code lies within bounds */
 	bgeu t0, t2, 1f
@@ -165,95 +125,17 @@  skip_context_tracking:
 	jr t0
 1:
 	tail do_trap_unknown
+SYM_CODE_END(handle_exception)
 
-handle_syscall:
-#ifdef CONFIG_RISCV_M_MODE
-	/*
-	 * When running is M-Mode (no MMU config), MPIE does not get set.
-	 * As a result, we need to force enable interrupts here because
-	 * handle_exception did not do set SR_IE as it always sees SR_PIE
-	 * being cleared.
-	 */
-	csrs CSR_STATUS, SR_IE
-#endif
-#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING_USER)
-	/* Recover a0 - a7 for system calls */
-	REG_L a0, PT_A0(sp)
-	REG_L a1, PT_A1(sp)
-	REG_L a2, PT_A2(sp)
-	REG_L a3, PT_A3(sp)
-	REG_L a4, PT_A4(sp)
-	REG_L a5, PT_A5(sp)
-	REG_L a6, PT_A6(sp)
-	REG_L a7, PT_A7(sp)
-#endif
-	 /* save the initial A0 value (needed in signal handlers) */
-	REG_S a0, PT_ORIG_A0(sp)
-	/*
-	 * Advance SEPC to avoid executing the original
-	 * scall instruction on sret
-	 */
-	addi s2, s2, 0x4
-	REG_S s2, PT_EPC(sp)
-	/* Trace syscalls, but only if requested by the user. */
-	REG_L t0, TASK_TI_FLAGS(tp)
-	andi t0, t0, _TIF_SYSCALL_WORK
-	bnez t0, handle_syscall_trace_enter
-check_syscall_nr:
-	/* Check to make sure we don't jump to a bogus syscall number. */
-	li t0, __NR_syscalls
-	la s0, sys_ni_syscall
-	/*
-	 * Syscall number held in a7.
-	 * If syscall number is above allowed value, redirect to ni_syscall.
-	 */
-	bgeu a7, t0, 3f
-#ifdef CONFIG_COMPAT
-	REG_L s0, PT_STATUS(sp)
-	srli s0, s0, SR_UXL_SHIFT
-	andi s0, s0, (SR_UXL >> SR_UXL_SHIFT)
-	li t0, (SR_UXL_32 >> SR_UXL_SHIFT)
-	sub t0, s0, t0
-	bnez t0, 1f
-
-	/* Call compat_syscall */
-	la s0, compat_sys_call_table
-	j 2f
-1:
-#endif
-	/* Call syscall */
-	la s0, sys_call_table
-2:
-	slli t0, a7, RISCV_LGPTR
-	add s0, s0, t0
-	REG_L s0, 0(s0)
-3:
-	jalr s0
-
-ret_from_syscall:
-	/* Set user a0 to kernel a0 */
-	REG_S a0, PT_A0(sp)
-	/*
-	 * We didn't execute the actual syscall.
-	 * Seccomp already set return value for the current task pt_regs.
-	 * (If it was configured with SECCOMP_RET_ERRNO/TRACE)
-	 */
-ret_from_syscall_rejected:
-#ifdef CONFIG_DEBUG_RSEQ
-	move a0, sp
-	call rseq_syscall
-#endif
-	/* Trace syscalls, but only if requested by the user. */
-	REG_L t0, TASK_TI_FLAGS(tp)
-	andi t0, t0, _TIF_SYSCALL_WORK
-	bnez t0, handle_syscall_trace_exit
-
+/*
+ * The ret_from_exception must be called with interrupt disabled. Here is the
+ * caller list:
+ *  - handle_exception
+ *  - ret_from_fork
+ *  - ret_from_kernel_thread
+ */
 SYM_CODE_START_NOALIGN(ret_from_exception)
 	REG_L s0, PT_STATUS(sp)
-	csrc CSR_STATUS, SR_IE
-#ifdef CONFIG_TRACE_IRQFLAGS
-	call __trace_hardirqs_off
-#endif
 #ifdef CONFIG_RISCV_M_MODE
 	/* the MPP value is too large to be used as an immediate arg for addi */
 	li t0, SR_MPP
@@ -261,17 +143,7 @@  SYM_CODE_START_NOALIGN(ret_from_exception)
 #else
 	andi s0, s0, SR_SPP
 #endif
-	bnez s0, resume_kernel
-SYM_CODE_END(ret_from_exception)
-
-	/* Interrupts must be disabled here so flags are checked atomically */
-	REG_L s0, TASK_TI_FLAGS(tp) /* current_thread_info->flags */
-	andi s1, s0, _TIF_WORK_MASK
-	bnez s1, resume_userspace_slow
-resume_userspace:
-#ifdef CONFIG_CONTEXT_TRACKING_USER
-	call user_enter_callable
-#endif
+	bnez s0, 1f
 
 	/* Save unwound kernel stack pointer in thread_info */
 	addi s0, sp, PT_SIZE_ON_STACK
@@ -282,18 +154,7 @@  resume_userspace:
 	 * structures again.
 	 */
 	csrw CSR_SCRATCH, tp
-
-restore_all:
-#ifdef CONFIG_TRACE_IRQFLAGS
-	REG_L s1, PT_STATUS(sp)
-	andi t0, s1, SR_PIE
-	beqz t0, 1f
-	call __trace_hardirqs_on
-	j 2f
 1:
-	call __trace_hardirqs_off
-2:
-#endif
 	REG_L a0, PT_STATUS(sp)
 	/*
 	 * The current load reservation is effectively part of the processor's
@@ -356,47 +217,10 @@  restore_all:
 #else
 	sret
 #endif
-
-#if IS_ENABLED(CONFIG_PREEMPTION)
-resume_kernel:
-	REG_L s0, TASK_TI_PREEMPT_COUNT(tp)
-	bnez s0, restore_all
-	REG_L s0, TASK_TI_FLAGS(tp)
-	andi s0, s0, _TIF_NEED_RESCHED
-	beqz s0, restore_all
-	call preempt_schedule_irq
-	j restore_all
-#endif
-
-resume_userspace_slow:
-	/* Enter slow path for supplementary processing */
-	move a0, sp /* pt_regs */
-	move a1, s0 /* current_thread_info->flags */
-	call do_work_pending
-	j resume_userspace
-
-/* Slow paths for ptrace. */
-handle_syscall_trace_enter:
-	move a0, sp
-	call do_syscall_trace_enter
-	move t0, a0
-	REG_L a0, PT_A0(sp)
-	REG_L a1, PT_A1(sp)
-	REG_L a2, PT_A2(sp)
-	REG_L a3, PT_A3(sp)
-	REG_L a4, PT_A4(sp)
-	REG_L a5, PT_A5(sp)
-	REG_L a6, PT_A6(sp)
-	REG_L a7, PT_A7(sp)
-	bnez t0, ret_from_syscall_rejected
-	j check_syscall_nr
-handle_syscall_trace_exit:
-	move a0, sp
-	call do_syscall_trace_exit
-	j ret_from_exception
+SYM_CODE_END(ret_from_exception)
 
 #ifdef CONFIG_VMAP_STACK
-handle_kernel_stack_overflow:
+SYM_CODE_START_LOCAL(handle_kernel_stack_overflow)
 	/*
 	 * Takes the psuedo-spinlock for the shadow stack, in case multiple
 	 * harts are concurrently overflowing their kernel stacks.  We could
@@ -505,23 +329,25 @@  restore_caller_reg:
 	REG_S s5, PT_TP(sp)
 	move a0, sp
 	tail handle_bad_stack
+SYM_CODE_END(handle_kernel_stack_overflow)
 #endif
 
-END(handle_exception)
-
-ENTRY(ret_from_fork)
+SYM_CODE_START(ret_from_fork)
+	call schedule_tail
+	move a0, sp /* pt_regs */
 	la ra, ret_from_exception
-	tail schedule_tail
-ENDPROC(ret_from_fork)
+	tail syscall_exit_to_user_mode
+SYM_CODE_END(ret_from_fork)
 
-ENTRY(ret_from_kernel_thread)
+SYM_CODE_START(ret_from_kernel_thread)
 	call schedule_tail
 	/* Call fn(arg) */
-	la ra, ret_from_exception
 	move a0, s1
-	jr s0
-ENDPROC(ret_from_kernel_thread)
-
+	jalr s0
+	move a0, sp /* pt_regs */
+	la ra, ret_from_exception
+	tail syscall_exit_to_user_mode
+SYM_CODE_END(ret_from_kernel_thread)
 
 /*
  * Integer register context switch
@@ -533,7 +359,7 @@  ENDPROC(ret_from_kernel_thread)
  * The value of a0 and a1 must be preserved by this function, as that's how
  * arguments are passed to schedule_tail.
  */
-ENTRY(__switch_to)
+SYM_FUNC_START(__switch_to)
 	/* Save context into prev->thread */
 	li    a4,  TASK_THREAD_RA
 	add   a3, a0, a4
@@ -570,7 +396,7 @@  ENTRY(__switch_to)
 	/* The offset of thread_info in task_struct is zero. */
 	move tp, a1
 	ret
-ENDPROC(__switch_to)
+SYM_FUNC_END(__switch_to)
 
 #ifndef CONFIG_MMU
 #define do_page_fault do_trap_unknown
@@ -579,7 +405,7 @@  ENDPROC(__switch_to)
 	.section ".rodata"
 	.align LGREG
 	/* Exception vector table */
-ENTRY(excp_vect_table)
+SYM_CODE_START(excp_vect_table)
 	RISCV_PTR do_trap_insn_misaligned
 	ALT_INSN_FAULT(RISCV_PTR do_trap_insn_fault)
 	RISCV_PTR do_trap_insn_illegal
@@ -588,7 +414,7 @@  ENTRY(excp_vect_table)
 	RISCV_PTR do_trap_load_fault
 	RISCV_PTR do_trap_store_misaligned
 	RISCV_PTR do_trap_store_fault
-	RISCV_PTR do_trap_ecall_u /* system call, gets intercepted */
+	RISCV_PTR do_trap_ecall_u /* system call */
 	RISCV_PTR do_trap_ecall_s
 	RISCV_PTR do_trap_unknown
 	RISCV_PTR do_trap_ecall_m
@@ -598,11 +424,11 @@  ENTRY(excp_vect_table)
 	RISCV_PTR do_trap_unknown
 	RISCV_PTR do_page_fault   /* store page fault */
 excp_vect_table_end:
-END(excp_vect_table)
+SYM_CODE_END(excp_vect_table)
 
 #ifndef CONFIG_MMU
-ENTRY(__user_rt_sigreturn)
+SYM_CODE_START(__user_rt_sigreturn)
 	li a7, __NR_rt_sigreturn
 	scall
-END(__user_rt_sigreturn)
+SYM_CODE_END(__user_rt_sigreturn)
 #endif
diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h
index 726731ada534..a556fdaafed9 100644
--- a/arch/riscv/kernel/head.h
+++ b/arch/riscv/kernel/head.h
@@ -10,7 +10,6 @@ 
 
 extern atomic_t hart_lottery;
 
-asmlinkage void do_page_fault(struct pt_regs *regs);
 asmlinkage void __init setup_vm(uintptr_t dtb_pa);
 #ifdef CONFIG_XIP_KERNEL
 asmlinkage void __init __copy_data(void);
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 44f4b1ca315d..23c48b14a0e7 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -19,9 +19,6 @@ 
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/syscalls.h>
-
 enum riscv_regset {
 	REGSET_X,
 #ifdef CONFIG_FPU
@@ -228,46 +225,6 @@  long arch_ptrace(struct task_struct *child, long request,
 	return ret;
 }
 
-/*
- * Allows PTRACE_SYSCALL to work.  These are called from entry.S in
- * {handle,ret_from}_syscall.
- */
-__visible int do_syscall_trace_enter(struct pt_regs *regs)
-{
-	if (test_thread_flag(TIF_SYSCALL_TRACE))
-		if (ptrace_report_syscall_entry(regs))
-			return -1;
-
-	/*
-	 * Do the secure computing after ptrace; failures should be fast.
-	 * If this fails we might have return value in a0 from seccomp
-	 * (via SECCOMP_RET_ERRNO/TRACE).
-	 */
-	if (secure_computing() == -1)
-		return -1;
-
-#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
-	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
-		trace_sys_enter(regs, syscall_get_nr(current, regs));
-#endif
-
-	audit_syscall_entry(regs->a7, regs->a0, regs->a1, regs->a2, regs->a3);
-	return 0;
-}
-
-__visible void do_syscall_trace_exit(struct pt_regs *regs)
-{
-	audit_syscall_exit(regs);
-
-	if (test_thread_flag(TIF_SYSCALL_TRACE))
-		ptrace_report_syscall_exit(regs, 0);
-
-#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
-	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
-		trace_sys_exit(regs, regs_return_value(regs));
-#endif
-}
-
 #ifdef CONFIG_COMPAT
 static int compat_riscv_gpr_get(struct task_struct *target,
 				const struct user_regset *regset,
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index bfb2afa4135f..2e365084417e 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -12,6 +12,7 @@ 
 #include <linux/syscalls.h>
 #include <linux/resume_user_mode.h>
 #include <linux/linkage.h>
+#include <linux/entry-common.h>
 
 #include <asm/ucontext.h>
 #include <asm/vdso.h>
@@ -274,7 +275,7 @@  static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 	signal_setup_done(ret, ksig, 0);
 }
 
-static void do_signal(struct pt_regs *regs)
+void arch_do_signal_or_restart(struct pt_regs *regs)
 {
 	struct ksignal ksig;
 
@@ -311,29 +312,3 @@  static void do_signal(struct pt_regs *regs)
 	 */
 	restore_saved_sigmask();
 }
-
-/*
- * Handle any pending work on the resume-to-userspace path, as indicated by
- * _TIF_WORK_MASK. Entered from assembly with IRQs off.
- */
-asmlinkage __visible void do_work_pending(struct pt_regs *regs,
-					  unsigned long thread_info_flags)
-{
-	do {
-		if (thread_info_flags & _TIF_NEED_RESCHED) {
-			schedule();
-		} else {
-			local_irq_enable();
-			if (thread_info_flags & _TIF_UPROBE)
-				uprobe_notify_resume(regs);
-			/* Handle pending signal delivery */
-			if (thread_info_flags & (_TIF_SIGPENDING |
-						 _TIF_NOTIFY_SIGNAL))
-				do_signal(regs);
-			if (thread_info_flags & _TIF_NOTIFY_RESUME)
-				resume_user_mode_work(regs);
-		}
-		local_irq_disable();
-		thread_info_flags = read_thread_flags();
-	} while (thread_info_flags & _TIF_WORK_MASK);
-}
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 02727a4fe8be..1f4e37be7eb3 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -17,12 +17,14 @@ 
 #include <linux/module.h>
 #include <linux/irq.h>
 #include <linux/kexec.h>
+#include <linux/entry-common.h>
 
 #include <asm/asm-prototypes.h>
 #include <asm/bug.h>
 #include <asm/csr.h>
 #include <asm/processor.h>
 #include <asm/ptrace.h>
+#include <asm/syscall.h>
 #include <asm/thread_info.h>
 
 int show_unhandled_signals = 1;
@@ -123,10 +125,18 @@  static void do_trap_error(struct pt_regs *regs, int signo, int code,
 #else
 #define __trap_section noinstr
 #endif
-#define DO_ERROR_INFO(name, signo, code, str)				\
-asmlinkage __visible __trap_section void name(struct pt_regs *regs)	\
-{									\
-	do_trap_error(regs, signo, code, regs->epc, "Oops - " str);	\
+#define DO_ERROR_INFO(name, signo, code, str)					\
+asmlinkage __visible __trap_section void name(struct pt_regs *regs)		\
+{										\
+	if (user_mode(regs)) {							\
+		irqentry_enter_from_user_mode(regs);				\
+		do_trap_error(regs, signo, code, regs->epc, "Oops - " str);	\
+		irqentry_exit_to_user_mode(regs);				\
+	} else {								\
+		irqentry_state_t state = irqentry_nmi_enter(regs);		\
+		do_trap_error(regs, signo, code, regs->epc, "Oops - " str);	\
+		irqentry_nmi_exit(regs, state);					\
+	}									\
 }
 
 DO_ERROR_INFO(do_trap_unknown,
@@ -148,26 +158,50 @@  DO_ERROR_INFO(do_trap_store_misaligned,
 int handle_misaligned_load(struct pt_regs *regs);
 int handle_misaligned_store(struct pt_regs *regs);
 
-asmlinkage void __trap_section do_trap_load_misaligned(struct pt_regs *regs)
+asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs)
 {
-	if (!handle_misaligned_load(regs))
-		return;
-	do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc,
-		      "Oops - load address misaligned");
+	if (user_mode(regs)) {
+		irqentry_enter_from_user_mode(regs);
+
+		if (handle_misaligned_load(regs))
+			do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc,
+			      "Oops - load address misaligned");
+
+		irqentry_exit_to_user_mode(regs);
+	} else {
+		irqentry_state_t state = irqentry_nmi_enter(regs);
+
+		if (handle_misaligned_load(regs))
+			do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc,
+			      "Oops - load address misaligned");
+
+		irqentry_nmi_exit(regs, state);
+	}
 }
 
-asmlinkage void __trap_section do_trap_store_misaligned(struct pt_regs *regs)
+asmlinkage __visible __trap_section void do_trap_store_misaligned(struct pt_regs *regs)
 {
-	if (!handle_misaligned_store(regs))
-		return;
-	do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc,
-		      "Oops - store (or AMO) address misaligned");
+	if (user_mode(regs)) {
+		irqentry_enter_from_user_mode(regs);
+
+		if (handle_misaligned_store(regs))
+			do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc,
+				"Oops - store (or AMO) address misaligned");
+
+		irqentry_exit_to_user_mode(regs);
+	} else {
+		irqentry_state_t state = irqentry_nmi_enter(regs);
+
+		if (handle_misaligned_store(regs))
+			do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc,
+				"Oops - store (or AMO) address misaligned");
+
+		irqentry_nmi_exit(regs, state);
+	}
 }
 #endif
 DO_ERROR_INFO(do_trap_store_fault,
 	SIGSEGV, SEGV_ACCERR, "store (or AMO) access fault");
-DO_ERROR_INFO(do_trap_ecall_u,
-	SIGILL, ILL_ILLTRP, "environment call from U-mode");
 DO_ERROR_INFO(do_trap_ecall_s,
 	SIGILL, ILL_ILLTRP, "environment call from S-mode");
 DO_ERROR_INFO(do_trap_ecall_m,
@@ -183,7 +217,7 @@  static inline unsigned long get_break_insn_length(unsigned long pc)
 	return GET_INSN_LENGTH(insn);
 }
 
-asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs)
+void handle_break(struct pt_regs *regs)
 {
 #ifdef CONFIG_KPROBES
 	if (kprobe_single_step_handler(regs))
@@ -213,7 +247,77 @@  asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs)
 	else
 		die(regs, "Kernel BUG");
 }
-NOKPROBE_SYMBOL(do_trap_break);
+
+asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs)
+{
+	if (user_mode(regs)) {
+		irqentry_enter_from_user_mode(regs);
+
+		handle_break(regs);
+
+		irqentry_exit_to_user_mode(regs);
+	} else {
+		irqentry_state_t state = irqentry_nmi_enter(regs);
+
+		handle_break(regs);
+
+		irqentry_nmi_exit(regs, state);
+	}
+}
+
+asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs)
+{
+	if (user_mode(regs)) {
+		ulong syscall = regs->a7;
+
+		syscall = syscall_enter_from_user_mode(regs, syscall);
+
+		regs->epc += 4;
+		regs->orig_a0 = regs->a0;
+
+		if (syscall < NR_syscalls)
+			syscall_handler(regs, syscall);
+		else
+			regs->a0 = -ENOSYS;
+
+		syscall_exit_to_user_mode(regs);
+	} else {
+		irqentry_state_t state = irqentry_nmi_enter(regs);
+
+		do_trap_error(regs, SIGILL, ILL_ILLTRP, regs->epc,
+			"Oops - environment call from U-mode");
+
+		irqentry_nmi_exit(regs, state);
+	}
+
+}
+
+#ifdef CONFIG_MMU
+asmlinkage __visible noinstr void do_page_fault(struct pt_regs *regs)
+{
+	irqentry_state_t state = irqentry_enter(regs);
+
+	handle_page_fault(regs);
+
+	local_irq_disable();
+
+	irqentry_exit(regs, state);
+}
+#endif
+
+asmlinkage __visible noinstr void do_irq(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs;
+	irqentry_state_t state = irqentry_enter(regs);
+
+	irq_enter_rcu();
+	old_regs = set_irq_regs(regs);
+	handle_arch_irq(regs);
+	set_irq_regs(old_regs);
+	irq_exit_rcu();
+
+	irqentry_exit(regs, state);
+}
 
 #ifdef CONFIG_GENERIC_BUG
 int is_valid_bugaddr(unsigned long pc)
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index eb0774d9c03b..efc36be38bce 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -15,6 +15,7 @@ 
 #include <linux/uaccess.h>
 #include <linux/kprobes.h>
 #include <linux/kfence.h>
+#include <linux/entry-common.h>
 
 #include <asm/ptrace.h>
 #include <asm/tlbflush.h>
@@ -204,7 +205,7 @@  static inline bool access_error(unsigned long cause, struct vm_area_struct *vma)
  * This routine handles page faults.  It determines the address and the
  * problem, and then passes it off to one of the appropriate routines.
  */
-asmlinkage void do_page_fault(struct pt_regs *regs)
+void handle_page_fault(struct pt_regs *regs)
 {
 	struct task_struct *tsk;
 	struct vm_area_struct *vma;
@@ -251,7 +252,7 @@  asmlinkage void do_page_fault(struct pt_regs *regs)
 	}
 #endif
 	/* Enable interrupts if they were enabled in the parent context. */
-	if (likely(regs->status & SR_PIE))
+	if (!regs_irqs_disabled(regs))
 		local_irq_enable();
 
 	/*
@@ -353,4 +354,3 @@  asmlinkage void do_page_fault(struct pt_regs *regs)
 	}
 	return;
 }
-NOKPROBE_SYMBOL(do_page_fault);