Message ID | 20250212-nolibc-mips-n32-v1-1-6892e58d1321@weissschuh.net (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | tools/nolibc: add support for N64 and N32 ABIs | expand |
On Wed, Feb 12, 2025 at 07:49:53PM +0100, Thomas Weißschuh wrote: > +#if defined(_ABIO32) > + > #define _NOLIBC_SYSCALL_CLOBBERLIST \ > "memory", "cc", "at", "v1", "hi", "lo", \ > "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" > +#define _NOLIBC_SYSCALL_STACK_RESERVE "addiu $sp, $sp, -32\n" > +#define _NOLIBC_SYSCALL_STACK_UNRESERVE "addiu $sp, $sp, 32\n" > + > +#elif defined(_ABIN32) || defined(_ABI64) > + > +/* binutils, GCC and clang disagree about register aliases, use numbers instead. */ Is this often encountered despite this ? I guess it can cause portability issues :-/ > +#if defined(_ABIO32) > + > #define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ > ({ \ > register long _num __asm__ ("v0") = (num); \ (...) > @@ -178,6 +201,50 @@ > _arg4 ? -_num : _num; \ > }) > > +#else > + Here you should indicate which ABI is covered by this #else, because one has to go up to previous definitions to figure it's _ABIN32 and _ABI64. > +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ > +({ \ > + register long _num __asm__ ("v0") = (num); \ > + register long _arg1 __asm__ ("$4") = (long)(arg1); \ > + register long _arg2 __asm__ ("$5") = (long)(arg2); \ > + register long _arg3 __asm__ ("$6") = (long)(arg3); \ > + register long _arg4 __asm__ ("$7") = (long)(arg4); \ > + register long _arg5 __asm__ ("$8") = (long)(arg5); \ (...) > @@ -190,13 +257,33 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __ > "1:\n" > ".cpload $ra\n" > "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ > + > +#if defined(_ABIO32) > "addiu $sp, $sp, -4\n" /* space for .cprestore to store $gp */ > ".cprestore 0\n" > "li $t0, -8\n" > "and $sp, $sp, $t0\n" /* $sp must be 8-byte aligned */ > "addiu $sp, $sp, -16\n" /* the callee expects to save a0..a3 there */ > - "lui $t9, %hi(_start_c)\n" /* ABI requires current function address in $t9 */ > +#else So same here. I think you should do it for all #else since you're generally grouping 2 ABIs vs one between a #if and a #else and it's not trivial to figure what a #else covers, like below. > + "daddiu $sp, $sp, -8\n" /* space for .cprestore to store $gp */ > + ".cpsetup $ra, 0, 1b\n" > + "li $t0, -16\n" > + "and $sp, $sp, $t0\n" /* $sp must be 16-byte aligned */ > +#endif > + > + /* ABI requires current function address in $t9 */ > +#if defined(_ABIO32) || defined(_ABIN32) > + "lui $t9, %hi(_start_c)\n" > "ori $t9, %lo(_start_c)\n" > +#else This one indeed covers only _ABI64 > + "lui $t9, %highest(_start_c)\n" > + "ori $t9, %higher(_start_c)\n" > + "dsll $t9, 0x10\n" > + "ori $t9, %hi(_start_c)\n" > + "dsll $t9, 0x10\n" > + "ori $t9, %lo(_start_c)\n" > +#endif With the tiny details above, this looks fine. It's great that syscall numbers didn't change so that you can cover an extra arch with only a few ifdefs. I have not tested but I guess you did :-) So that's OK for me: Acked-by: Willy Tarreau <w@1wt.eu> Thanks! Willy
On Wed, 12 Feb 2025, Thomas Weißschuh wrote: > diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h > index 753a8ed2cf695f0b5eac4b5e4d317fdb383ebf93..638520a3427a985fdbd5f5a49b55853bbadeee75 100644 > --- a/tools/include/nolibc/arch-mips.h > +++ b/tools/include/nolibc/arch-mips.h > @@ -190,13 +257,33 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __ > "1:\n" > ".cpload $ra\n" > "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ > + > +#if defined(_ABIO32) > "addiu $sp, $sp, -4\n" /* space for .cprestore to store $gp */ > ".cprestore 0\n" > "li $t0, -8\n" > "and $sp, $sp, $t0\n" /* $sp must be 8-byte aligned */ > "addiu $sp, $sp, -16\n" /* the callee expects to save a0..a3 there */ > - "lui $t9, %hi(_start_c)\n" /* ABI requires current function address in $t9 */ > +#else > + "daddiu $sp, $sp, -8\n" /* space for .cprestore to store $gp */ > + ".cpsetup $ra, 0, 1b\n" > + "li $t0, -16\n" > + "and $sp, $sp, $t0\n" /* $sp must be 16-byte aligned */ > +#endif Why is this code breaking stack alignment just to have to fix it up two instructions down the line? Or is it that the incoming $sp is not aligned in the first place (in which case we're having a deeper problem). > + > + /* ABI requires current function address in $t9 */ > +#if defined(_ABIO32) || defined(_ABIN32) > + "lui $t9, %hi(_start_c)\n" > "ori $t9, %lo(_start_c)\n" > +#else > + "lui $t9, %highest(_start_c)\n" > + "ori $t9, %higher(_start_c)\n" > + "dsll $t9, 0x10\n" > + "ori $t9, %hi(_start_c)\n" > + "dsll $t9, 0x10\n" > + "ori $t9, %lo(_start_c)\n" This could be optimised using a temporary (e.g. $at, but I guess any will do as I gather we don't have any ABI abnormalities here). > +#endif > + > "jalr $t9\n" /* transfer to c runtime > */ > " nop\n" /* delayed slot On an unrelated matter JALR above ought to be JAL (or otherwise there's no point in using the .cprestore pseudo-op). And I fail to see why this code has to be "noreorder" (except for the .cpload piece, of course), it's just asking for troubles. Maciej
On 2025-02-16 10:49:38+0100, Willy Tarreau wrote: > On Wed, Feb 12, 2025 at 07:49:53PM +0100, Thomas Weißschuh wrote: > > +#if defined(_ABIO32) > > + > > #define _NOLIBC_SYSCALL_CLOBBERLIST \ > > "memory", "cc", "at", "v1", "hi", "lo", \ > > "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" > > +#define _NOLIBC_SYSCALL_STACK_RESERVE "addiu $sp, $sp, -32\n" > > +#define _NOLIBC_SYSCALL_STACK_UNRESERVE "addiu $sp, $sp, 32\n" > > + > > +#elif defined(_ABIN32) || defined(_ABI64) > > + > > +/* binutils, GCC and clang disagree about register aliases, use numbers instead. */ > > Is this often encountered despite this ? I guess it can cause portability > issues :-/ No idea. It's the first time I saw something like this. > > +#if defined(_ABIO32) > > + > > #define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ > > ({ \ > > register long _num __asm__ ("v0") = (num); \ > (...) > > @@ -178,6 +201,50 @@ > > _arg4 ? -_num : _num; \ > > }) > > > > +#else > > + > > Here you should indicate which ABI is covered by this #else, because one > has to go up to previous definitions to figure it's _ABIN32 and _ABI64. Ack. > > +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ > > +({ \ > > + register long _num __asm__ ("v0") = (num); \ > > + register long _arg1 __asm__ ("$4") = (long)(arg1); \ > > + register long _arg2 __asm__ ("$5") = (long)(arg2); \ > > + register long _arg3 __asm__ ("$6") = (long)(arg3); \ > > + register long _arg4 __asm__ ("$7") = (long)(arg4); \ > > + register long _arg5 __asm__ ("$8") = (long)(arg5); \ > (...) > > @@ -190,13 +257,33 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __ > > "1:\n" > > ".cpload $ra\n" > > "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ > > + > > +#if defined(_ABIO32) > > "addiu $sp, $sp, -4\n" /* space for .cprestore to store $gp */ > > ".cprestore 0\n" > > "li $t0, -8\n" > > "and $sp, $sp, $t0\n" /* $sp must be 8-byte aligned */ > > "addiu $sp, $sp, -16\n" /* the callee expects to save a0..a3 there */ > > - "lui $t9, %hi(_start_c)\n" /* ABI requires current function address in $t9 */ > > +#else > > So same here. I think you should do it for all #else since you're generally > grouping 2 ABIs vs one between a #if and a #else and it's not trivial to > figure what a #else covers, like below. Ack. > > + "daddiu $sp, $sp, -8\n" /* space for .cprestore to store $gp */ > > + ".cpsetup $ra, 0, 1b\n" > > + "li $t0, -16\n" > > + "and $sp, $sp, $t0\n" /* $sp must be 16-byte aligned */ > > +#endif > > + > > + /* ABI requires current function address in $t9 */ > > +#if defined(_ABIO32) || defined(_ABIN32) > > + "lui $t9, %hi(_start_c)\n" > > "ori $t9, %lo(_start_c)\n" > > +#else > > This one indeed covers only _ABI64 (That is intentional) > > + "lui $t9, %highest(_start_c)\n" > > + "ori $t9, %higher(_start_c)\n" > > + "dsll $t9, 0x10\n" > > + "ori $t9, %hi(_start_c)\n" > > + "dsll $t9, 0x10\n" > > + "ori $t9, %lo(_start_c)\n" > > +#endif > > With the tiny details above, this looks fine. It's great that syscall > numbers didn't change so that you can cover an extra arch with only a > few ifdefs. I have not tested but I guess you did :-) So that's OK for > me: The syscall numbers are different, but the UAPI headers also detect the ABI in use and select the correct numbers. > Acked-by: Willy Tarreau <w@1wt.eu> Thanks!
Hi Maciej, thanks for your feedback! On 2025-02-16 15:41:55+0000, Maciej W. Rozycki wrote: > On Wed, 12 Feb 2025, Thomas Weißschuh wrote: > > diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h > > index 753a8ed2cf695f0b5eac4b5e4d317fdb383ebf93..638520a3427a985fdbd5f5a49b55853bbadeee75 100644 > > --- a/tools/include/nolibc/arch-mips.h > > +++ b/tools/include/nolibc/arch-mips.h > > @@ -190,13 +257,33 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __ > > "1:\n" > > ".cpload $ra\n" > > "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ > > + > > +#if defined(_ABIO32) > > "addiu $sp, $sp, -4\n" /* space for .cprestore to store $gp */ > > ".cprestore 0\n" > > "li $t0, -8\n" > > "and $sp, $sp, $t0\n" /* $sp must be 8-byte aligned */ > > "addiu $sp, $sp, -16\n" /* the callee expects to save a0..a3 there */ > > - "lui $t9, %hi(_start_c)\n" /* ABI requires current function address in $t9 */ > > +#else > > + "daddiu $sp, $sp, -8\n" /* space for .cprestore to store $gp */ > > + ".cpsetup $ra, 0, 1b\n" > > + "li $t0, -16\n" > > + "and $sp, $sp, $t0\n" /* $sp must be 16-byte aligned */ > > +#endif > > Why is this code breaking stack alignment just to have to fix it up two > instructions down the line? Or is it that the incoming $sp is not aligned > in the first place (in which case we're having a deeper problem). nolibc itself does not assume that $sp is aligned. Maybe Willy can explain the historical background. The System V ABI MIPS supplement [0] says the following: The registers listed below have the specified contents at process entry: ... $sp The stack pointer holds the address of the bottom of the stack, which must be doubleword (8 byte) aligned. ... However "process entry" is main(), while this code is running before main. The kernel always aligns the stack to a multiple of 16 bytes. See the usage of STACK_ROUND() in fs/binfmt_elf.c. So I guess we could remove the manual alignment. (At least for alignments of 16 bytes and less) > > + > > + /* ABI requires current function address in $t9 */ > > +#if defined(_ABIO32) || defined(_ABIN32) > > + "lui $t9, %hi(_start_c)\n" > > "ori $t9, %lo(_start_c)\n" > > +#else > > + "lui $t9, %highest(_start_c)\n" > > + "ori $t9, %higher(_start_c)\n" > > + "dsll $t9, 0x10\n" > > + "ori $t9, %hi(_start_c)\n" > > + "dsll $t9, 0x10\n" > > + "ori $t9, %lo(_start_c)\n" > > This could be optimised using a temporary (e.g. $at, but I guess any will > do as I gather we don't have any ABI abnormalities here). clang rejects manual usage of $at without ".set noat". So $t0 is simpler. > > +#endif > > + > > "jalr $t9\n" /* transfer to c runtime > > */ > > " nop\n" /* delayed slot > > On an unrelated matter JALR above ought to be JAL (or otherwise there's > no point in using the .cprestore pseudo-op). And I fail to see why this > code has to be "noreorder" (except for the .cpload piece, of course), it's > just asking for troubles. Thanks for the hints. Without "noreorder", is the manually addition of the delayed slot "nop" still necessary? These points also apply to the existing O32 implementation, right? If so I'll make a proper series out of it. [0] https://refspecs.linuxfoundation.org/elf/mipsabi.pdf Thomas
Hi Thomas, > thanks for your feedback! You're welcome. Sadly little MIPS-fu seems still available nowadays and for me it's often too easy to miss MIPS-related topics in the mailing list flood. > > Why is this code breaking stack alignment just to have to fix it up two > > instructions down the line? Or is it that the incoming $sp is not aligned > > in the first place (in which case we're having a deeper problem). > > nolibc itself does not assume that $sp is aligned. > Maybe Willy can explain the historical background. I'm all ears. > The System V ABI MIPS supplement [0] says the following: > > The registers listed below have the specified contents at process entry: > ... > > $sp The stack pointer holds the address of the bottom of the stack, which > must be doubleword (8 byte) aligned. > ... > > However "process entry" is main(), while this code is running before main. Umm, no, process entry definitely is not main(); if you refer to the somewhat obsolete and inaccurate MIPS psABI, then please note that the paragraph right above your quote says: $2 A non-zero value specifies a function pointer the application should register with atexit(BA_OS). If $2 contains zero, no action is required. and one immediately below it says: $31 The return address register is set to zero so that programs that search backward through stack frames (stack backtracing) recognize the last stack frame, that is, a stack frame with a zero in the saved $31 slot. and there is more on the initial process stack earlier on, including the location of the auxiliary vector. All it making it clear it's not main() this specification refers to, but the entry point from the OS kernel (then of course you're aware already it's o32 it talks about; n64/n32 requires 16 bytes, but then again we only have secondary references[1][2], in this case for SGI IRIX). > The kernel always aligns the stack to a multiple of 16 bytes. > See the usage of STACK_ROUND() in fs/binfmt_elf.c. > > So I guess we could remove the manual alignment. > (At least for alignments of 16 bytes and less) I think they all need to go then, but then stack pointer adjustments have to be made in multiples of the alignment required by the psABI of course. > > > + > > > + /* ABI requires current function address in $t9 */ > > > +#if defined(_ABIO32) || defined(_ABIN32) > > > + "lui $t9, %hi(_start_c)\n" > > > "ori $t9, %lo(_start_c)\n" > > > +#else > > > + "lui $t9, %highest(_start_c)\n" > > > + "ori $t9, %higher(_start_c)\n" > > > + "dsll $t9, 0x10\n" > > > + "ori $t9, %hi(_start_c)\n" > > > + "dsll $t9, 0x10\n" > > > + "ori $t9, %lo(_start_c)\n" > > > > This could be optimised using a temporary (e.g. $at, but I guess any will > > do as I gather we don't have any ABI abnormalities here). > > clang rejects manual usage of $at without ".set noat". > So $t0 is simpler. It's always `.set at' that's required to use $at by hand; it's been so long before clang was even thought of. Or you could use LA and DLA macros for o32/n32 and n64 respectively for the assembler to do all this stuff for you in the default `.set reorder' mode (assuming that clang is not completely broken here). > > > +#endif > > > + > > > "jalr $t9\n" /* transfer to c runtime > > > */ > > > " nop\n" /* delayed slot > > > > On an unrelated matter JALR above ought to be JAL (or otherwise there's > > no point in using the .cprestore pseudo-op). And I fail to see why this > > code has to be "noreorder" (except for the .cpload piece, of course), it's > > just asking for troubles. > > Thanks for the hints. > > Without "noreorder", is the manually addition of the delayed slot "nop" > still necessary? It's not. It's for the `.set noreorder' mode only, to fill the branch delay slot by hand. Otherwise it'll become just a useless instruction following the call sequence and executed after return. > These points also apply to the existing O32 implementation, right? Correct. Sadly it's the first time I see this code. Overall I find it a bit of a chimera: it uses `.set noreorder' and explicit relocations on one hand, and then high-level assembly `.cpload' and `.cprestore' pseudo-ops on the other, effectively mixing the two styles of assembly. The pseudo-ops come from times when using assembly macros was the norm and are there to support that coding model where macros rely on these pseudo-ops, and before the use of explicit relocations became the norm at least for GCC. In the absence of assembly macros you can write code expansions for these pseudo-ops by hand, just as what GCC does nowadays (in the `-mexplicit-relocs' mode, which is usually the default). But due to architecture variations it's very hard to write handcoded assembly in the `.set noreorder' mode: you need to take care of all the data dependencies that appear due to the lack of interlocking between pipeline stages, which results in code that either works correctly everywhere, but is suboptimal for newer architecture revisions, or code that works better with newer architecture revisions, but is actually broken with earlier ones. About the only typical case where you do want to use `.set noreorder' is to schedule a branch delay slot by hand due to a data anti-dependency between the two instructions. Patchable/self-modifying code is a less frequent case. And I had literally one case in my entire career where I wanted to actually jump to a branch delay slot instruction (it's still there in arch/mips/include/asm/div64.h, in do_div64_32(); see label #2). Also it appears no code in this function actually relies on $gp having been set up, so perhaps this stuff can just be discarded in the first place? References: [1] "MIPSpro 64-Bit Porting and Transition Guide", <https://irix7.com/techpubs/007-2391-006.pdf> [2] "MIPSpro N32 ABI Handbook", <https://irix7.com/techpubs/007-2816-004.pdf> Maciej
Mi Maciej, On Mon, Feb 17, 2025 at 11:23:11PM +0000, Maciej W. Rozycki wrote: > > > Why is this code breaking stack alignment just to have to fix it up two > > > instructions down the line? Or is it that the incoming $sp is not aligned > > > in the first place (in which case we're having a deeper problem). > > > > nolibc itself does not assume that $sp is aligned. > > Maybe Willy can explain the historical background. > > I'm all ears. I had a look, that's interesting. Actually this started in the very early i386 code in nolibc, where we already use the stack in the entry code, and simply fix it up before calling main. Then we pursued this with x86_64 (which also uses the stack and needs to fix it up), then arm (where we use and fix the stack), then the MIPS entry code was simply written based on the same construct while it does not use the stack thus does indeed not need to be fixed. Here are the links, it predates kernel inclusion: arm: https://github.com/wtarreau/nolibc/commit/af968b1 mips: https://github.com/wtarreau/nolibc/commit/e04cd25 Thus we can shave 4 more bytes from the MIPS entry code ;-) > > > > +#endif > > > > + > > > > "jalr $t9\n" /* transfer to c runtime > > > > */ > > > > " nop\n" /* delayed slot > > > > > > On an unrelated matter JALR above ought to be JAL (or otherwise there's > > > no point in using the .cprestore pseudo-op). And I fail to see why this > > > code has to be "noreorder" (except for the .cpload piece, of course), it's > > > just asking for troubles. > > > > Thanks for the hints. > > > > Without "noreorder", is the manually addition of the delayed slot "nop" > > still necessary? > > It's not. It's for the `.set noreorder' mode only, to fill the branch > delay slot by hand. Otherwise it'll become just a useless instruction > following the call sequence and executed after return. Similarly I had ".set noreorder" and the nop in the initial code from 2017. I remember that it took me a long while to get that init code to work on my MIPS boxes, and it's extremely likely that I found the noreorder trick long after I placed the nop and did not remove it. > > These points also apply to the existing O32 implementation, right? > > Correct. Sadly it's the first time I see this code. > > Overall I find it a bit of a chimera: it uses `.set noreorder' and > explicit relocations on one hand, and then high-level assembly `.cpload' > and `.cprestore' pseudo-ops on the other, effectively mixing the two > styles of assembly. > > The pseudo-ops come from times when using assembly macros was the norm > and are there to support that coding model where macros rely on these > pseudo-ops, and before the use of explicit relocations became the norm at > least for GCC. In the absence of assembly macros you can write code > expansions for these pseudo-ops by hand, just as what GCC does nowadays > (in the `-mexplicit-relocs' mode, which is usually the default). > > But due to architecture variations it's very hard to write handcoded > assembly in the `.set noreorder' mode: you need to take care of all the > data dependencies that appear due to the lack of interlocking between > pipeline stages, which results in code that either works correctly > everywhere, but is suboptimal for newer architecture revisions, or code > that works better with newer architecture revisions, but is actually > broken with earlier ones. > > About the only typical case where you do want to use `.set noreorder' is > to schedule a branch delay slot by hand due to a data anti-dependency > between the two instructions. Patchable/self-modifying code is a less > frequent case. And I had literally one case in my entire career where I > wanted to actually jump to a branch delay slot instruction (it's still > there in arch/mips/include/asm/div64.h, in do_div64_32(); see label #2). > > Also it appears no code in this function actually relies on $gp having > been set up, so perhaps this stuff can just be discarded in the first > place? All of this is very helpful. For example we did face an issue with noreorder here, that required to then add ".set push" then ".set pop" as it was polluting the rest of the code: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=184177c3d6e0 But I'm pretty sure that I didn't invent this ".set noreorder" and if I had found how to get rid of it, I'd happily done it. Like often with such code, it was produced by trial and error, and it's very possible that one solution was added to cover the problem caused by another one, and was not optimal. For me by then, the code was considered OK enough when my preinit program would work fine on all my machines (mostly mips-24Kc and mips-1004Kc that I daily have access to). In any case that's something that's easy to try again if we want to clean that up to normalize it. Thanks! Willy
diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h index 753a8ed2cf695f0b5eac4b5e4d317fdb383ebf93..638520a3427a985fdbd5f5a49b55853bbadeee75 100644 --- a/tools/include/nolibc/arch-mips.h +++ b/tools/include/nolibc/arch-mips.h @@ -10,7 +10,7 @@ #include "compiler.h" #include "crt.h" -#if !defined(_ABIO32) +#if !defined(_ABIO32) && !defined(_ABIN32) && !defined(_ABI64) #error Unsupported MIPS ABI #endif @@ -32,11 +32,32 @@ * - the arguments are cast to long and assigned into the target registers * which are then simply passed as registers to the asm code, so that we * don't have to experience issues with register constraints. + * + * Syscalls for MIPS ABI N32, same as ABI O32 with the following differences : + * - arguments are in a0, a1, a2, a3, t0, t1, t2, t3. + * t0..t3 are also known as a4..a7. + * - stack is 16-byte aligned */ +#if defined(_ABIO32) + #define _NOLIBC_SYSCALL_CLOBBERLIST \ "memory", "cc", "at", "v1", "hi", "lo", \ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" +#define _NOLIBC_SYSCALL_STACK_RESERVE "addiu $sp, $sp, -32\n" +#define _NOLIBC_SYSCALL_STACK_UNRESERVE "addiu $sp, $sp, 32\n" + +#elif defined(_ABIN32) || defined(_ABI64) + +/* binutils, GCC and clang disagree about register aliases, use numbers instead. */ +#define _NOLIBC_SYSCALL_CLOBBERLIST \ + "memory", "cc", "at", "v1", \ + "10", "11", "12", "13", "14", "15", "24", "25" + +#define _NOLIBC_SYSCALL_STACK_RESERVE +#define _NOLIBC_SYSCALL_STACK_UNRESERVE + +#endif #define my_syscall0(num) \ ({ \ @@ -44,9 +65,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "r"(_num) \ : _NOLIBC_SYSCALL_CLOBBERLIST \ @@ -61,9 +82,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1) \ @@ -80,9 +101,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2) \ @@ -100,9 +121,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3) \ @@ -120,9 +141,9 @@ register long _arg4 __asm__ ("a3") = (long)(arg4); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ @@ -131,6 +152,8 @@ _arg4 ? -_num : _num; \ }) +#if defined(_ABIO32) + #define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ ({ \ register long _num __asm__ ("v0") = (num); \ @@ -141,10 +164,10 @@ register long _arg5 = (long)(arg5); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "sw %7, 16($sp)\n" \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ @@ -164,11 +187,11 @@ register long _arg6 = (long)(arg6); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "sw %7, 16($sp)\n" \ "sw %8, 20($sp)\n" \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ @@ -178,6 +201,50 @@ _arg4 ? -_num : _num; \ }) +#else + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("$4") = (long)(arg1); \ + register long _arg2 __asm__ ("$5") = (long)(arg2); \ + register long _arg3 __asm__ ("$6") = (long)(arg3); \ + register long _arg4 __asm__ ("$7") = (long)(arg4); \ + register long _arg5 __asm__ ("$8") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("$4") = (long)(arg1); \ + register long _arg2 __asm__ ("$5") = (long)(arg2); \ + register long _arg3 __asm__ ("$6") = (long)(arg3); \ + register long _arg4 __asm__ ("$7") = (long)(arg4); \ + register long _arg5 __asm__ ("$8") = (long)(arg5); \ + register long _arg6 __asm__ ("$9") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#endif + /* startup code, note that it's called __start on MIPS */ void __start(void); void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __start(void) @@ -190,13 +257,33 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __ "1:\n" ".cpload $ra\n" "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ + +#if defined(_ABIO32) "addiu $sp, $sp, -4\n" /* space for .cprestore to store $gp */ ".cprestore 0\n" "li $t0, -8\n" "and $sp, $sp, $t0\n" /* $sp must be 8-byte aligned */ "addiu $sp, $sp, -16\n" /* the callee expects to save a0..a3 there */ - "lui $t9, %hi(_start_c)\n" /* ABI requires current function address in $t9 */ +#else + "daddiu $sp, $sp, -8\n" /* space for .cprestore to store $gp */ + ".cpsetup $ra, 0, 1b\n" + "li $t0, -16\n" + "and $sp, $sp, $t0\n" /* $sp must be 16-byte aligned */ +#endif + + /* ABI requires current function address in $t9 */ +#if defined(_ABIO32) || defined(_ABIN32) + "lui $t9, %hi(_start_c)\n" "ori $t9, %lo(_start_c)\n" +#else + "lui $t9, %highest(_start_c)\n" + "ori $t9, %higher(_start_c)\n" + "dsll $t9, 0x10\n" + "ori $t9, %hi(_start_c)\n" + "dsll $t9, 0x10\n" + "ori $t9, %lo(_start_c)\n" +#endif + "jalr $t9\n" /* transfer to c runtime */ " nop\n" /* delayed slot */ ".set pop\n" diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 983985b7529b65b7ce4a00c28f3f915d83974eea..2dec6ab9596c974b6aac439685e17f5c10a76948 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -52,6 +52,10 @@ ARCH_ppc64 = powerpc ARCH_ppc64le = powerpc ARCH_mips32le = mips ARCH_mips32be = mips +ARCH_mipsn32le = mips +ARCH_mipsn32be = mips +ARCH_mips64le = mips +ARCH_mips64be = mips ARCH_riscv32 = riscv ARCH_riscv64 = riscv ARCH := $(or $(ARCH_$(XARCH)),$(XARCH)) @@ -64,6 +68,10 @@ IMAGE_arm64 = arch/arm64/boot/Image IMAGE_arm = arch/arm/boot/zImage IMAGE_mips32le = vmlinuz IMAGE_mips32be = vmlinuz +IMAGE_mipsn32le = vmlinuz +IMAGE_mipsn32be = vmlinuz +IMAGE_mips64le = vmlinuz +IMAGE_mips64be = vmlinuz IMAGE_ppc = vmlinux IMAGE_ppc64 = vmlinux IMAGE_ppc64le = arch/powerpc/boot/zImage @@ -83,6 +91,10 @@ DEFCONFIG_arm64 = defconfig DEFCONFIG_arm = multi_v7_defconfig DEFCONFIG_mips32le = malta_defconfig DEFCONFIG_mips32be = malta_defconfig generic/eb.config +DEFCONFIG_mipsn32le = malta_defconfig generic/64r2.config +DEFCONFIG_mipsn32be = malta_defconfig generic/64r6.config generic/eb.config +DEFCONFIG_mips64le = malta_defconfig generic/64r6.config +DEFCONFIG_mips64be = malta_defconfig generic/64r2.config generic/eb.config DEFCONFIG_ppc = pmac32_defconfig DEFCONFIG_ppc64 = powernv_be_defconfig DEFCONFIG_ppc64le = powernv_defconfig @@ -105,7 +117,11 @@ QEMU_ARCH_x86 = x86_64 QEMU_ARCH_arm64 = aarch64 QEMU_ARCH_arm = arm QEMU_ARCH_mips32le = mipsel # works with malta_defconfig -QEMU_ARCH_mips32be = mips +QEMU_ARCH_mips32be = mips +QEMU_ARCH_mipsn32le = mips64el +QEMU_ARCH_mipsn32be = mips64 +QEMU_ARCH_mips64le = mips64el +QEMU_ARCH_mips64be = mips64 QEMU_ARCH_ppc = ppc QEMU_ARCH_ppc64 = ppc64 QEMU_ARCH_ppc64le = ppc64 @@ -117,6 +133,8 @@ QEMU_ARCH_loongarch = loongarch64 QEMU_ARCH = $(QEMU_ARCH_$(XARCH)) QEMU_ARCH_USER_ppc64le = ppc64le +QEMU_ARCH_USER_mipsn32le = mipsn32el +QEMU_ARCH_USER_mipsn32be = mipsn32 QEMU_ARCH_USER = $(or $(QEMU_ARCH_USER_$(XARCH)),$(QEMU_ARCH_$(XARCH))) QEMU_BIOS_DIR = /usr/share/edk2/ @@ -134,6 +152,10 @@ QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_mips32le = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_mips32be = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mipsn32le = -M malta -cpu 5KEc -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mipsn32be = -M malta -cpu I6400 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mips64le = -M malta -cpu I6400 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mips64be = -M malta -cpu 5KEc -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc64 = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" @@ -161,6 +183,10 @@ CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2) CFLAGS_s390 = -m64 CFLAGS_mips32le = -EL -mabi=32 -fPIC CFLAGS_mips32be = -EB -mabi=32 +CFLAGS_mipsn32le = -EL -mabi=n32 -fPIC -march=mips64r2 +CFLAGS_mipsn32be = -EB -mabi=n32 -march=mips64r6 +CFLAGS_mips64le = -EL -mabi=64 -march=mips64r6 +CFLAGS_mips64be = -EB -mabi=64 -march=mips64r2 CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all)) CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \ $(call cc-option,-fno-stack-protector) $(call cc-option,-Wmissing-prototypes) \ diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh index 6db01115276888bc89f6ec5532153c37e55c83d3..f0f3890fb5fa8196cd33aa8681ed30b00d8f474e 100755 --- a/tools/testing/selftests/nolibc/run-tests.sh +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -20,7 +20,7 @@ llvm= all_archs=( i386 x86_64 arm64 arm - mips32le mips32be + mips32le mips32be mipsn32le mipsn32be mips64le mips64be ppc ppc64 ppc64le riscv32 riscv64 s390
Add support for the MIPS 64bit N64 and ILP32 N32 ABIs. In addition to different byte orders and ABIs there are also different releases of the MIPS architecture. To avoid blowing up the test matrix, only add a subset of all possible test combinations. Signed-off-by: Thomas Weißschuh <linux@weissschuh.net> --- tools/include/nolibc/arch-mips.h | 119 ++++++++++++++++++++++++---- tools/testing/selftests/nolibc/Makefile | 28 ++++++- tools/testing/selftests/nolibc/run-tests.sh | 2 +- 3 files changed, 131 insertions(+), 18 deletions(-) --- base-commit: 16681bea9a80080765c98b545ad74c17de2d513c change-id: 20231105-nolibc-mips-n32-234901bd910d Best regards,