diff mbox series

[RFC] tools/nolibc: add support for constructors and destructors

Message ID 20231005-nolibc-constructors-v1-1-776d56bbe917@weissschuh.net (mailing list archive)
State New
Headers show
Series [RFC] tools/nolibc: add support for constructors and destructors | expand

Commit Message

Thomas Weißschuh Oct. 5, 2023, 4:45 p.m. UTC
With the startup code moved to C, implementing support for
constructors and deconstructors is fairly easy to implement.

Examples for code size impact:

   text	   data	    bss	    dec	    hex	filename
  21837	    104	     88	  22029	   560d	nolibc-test.before
  22135	    120	     88	  22343	   5747	nolibc-test.after
  21970	    104	     88	  22162	   5692 nolibc-test.after-only-crt.h-changes

The sections are defined by [0].

[0] https://refspecs.linuxfoundation.org/elf/gabi4+/ch5.dynamic.html

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
---
Note:

This is only an RFC as I'm not 100% sure it belong into nolibc.
But at least the code is visible as an example.

Also it is one prerequisite for full ksefltest_harness.h support in
nolibc, should we want that.
---
 tools/include/nolibc/crt.h                   | 23 ++++++++++++++++++++++-
 tools/testing/selftests/nolibc/nolibc-test.c | 16 ++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)


---
base-commit: ab663cc32912914258bc8a2fbd0e753f552ee9d8
change-id: 20231005-nolibc-constructors-b2aebffe9b65

Best regards,

Comments

Willy Tarreau Oct. 7, 2023, 6:50 a.m. UTC | #1
Hi Thomas,

On Thu, Oct 05, 2023 at 06:45:07PM +0200, Thomas Weißschuh wrote:
> With the startup code moved to C, implementing support for
> constructors and deconstructors is fairly easy to implement.
> 
> Examples for code size impact:
> 
>    text	   data	    bss	    dec	    hex	filename
>   21837	    104	     88	  22029	   560d	nolibc-test.before
>   22135	    120	     88	  22343	   5747	nolibc-test.after
>   21970	    104	     88	  22162	   5692 nolibc-test.after-only-crt.h-changes
> 
> The sections are defined by [0].
> 
> [0] https://refspecs.linuxfoundation.org/elf/gabi4+/ch5.dynamic.html
> 
> Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
> ---
> Note:
> 
> This is only an RFC as I'm not 100% sure it belong into nolibc.
> But at least the code is visible as an example.

That's interesting, thanks for working on this! I thought about it in
the past but didn't see how to address it. I do think some users might
find it convenient with modular code that will require less ifdefs.
That may be particularly true with test programs that want to register
some test series for example. The code looks clean to me, and I suppose
you've tested it on multiple archs. However I'm having a comment below:
(...)

>  #endif /* _NOLIBC_CRT_H */
> diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
> index a3ee4496bf0a..f166b425613a 100644
> --- a/tools/testing/selftests/nolibc/nolibc-test.c
> +++ b/tools/testing/selftests/nolibc/nolibc-test.c
> @@ -57,6 +57,9 @@ static int test_argc;
>  /* will be used by some test cases as readable file, please don't write it */
>  static const char *argv0;
>  
> +/* will be used by constructor tests */
> +static int constructor_test_value;
> +
>  /* definition of a series of tests */
>  struct test {
>  	const char *name;              /* test name */
> @@ -594,6 +597,18 @@ int expect_strne(const char *expr, int llen, const char *cmp)
>  #define CASE_TEST(name) \
>  	case __LINE__: llen += printf("%d %s", test, #name);
>  
> +__attribute__((constructor))
> +static void constructor1(void)
> +{
> +	constructor_test_value = 1;
> +}
> +
> +__attribute__((constructor))
> +static void constructor2(void)
> +{
> +	constructor_test_value *= 2;
> +}
> +

In the past I learned the hard way that you can never trust the execution
order of constructors, so if you're unlucky above you could very well end
up with 1 and that would be correct. I suggest that instead you do something
such as:

      constructor_test_value += 1;
...
      constructor_test_value += 2;

and check for value 3 in the test to make sure they were both executed
exactly once each.

Thanks!
Willy
Thomas Weißschuh Oct. 7, 2023, 7:28 a.m. UTC | #2
Hi Willy,

On 2023-10-07 08:50:25+0200, Willy Tarreau wrote:
> On Thu, Oct 05, 2023 at 06:45:07PM +0200, Thomas Weißschuh wrote:
> > With the startup code moved to C, implementing support for
> > constructors and deconstructors is fairly easy to implement.

> [..]

> > diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
> > index a3ee4496bf0a..f166b425613a 100644
> > --- a/tools/testing/selftests/nolibc/nolibc-test.c
> > +++ b/tools/testing/selftests/nolibc/nolibc-test.c
> > @@ -57,6 +57,9 @@ static int test_argc;
> >  /* will be used by some test cases as readable file, please don't write it */
> >  static const char *argv0;
> >  
> > +/* will be used by constructor tests */
> > +static int constructor_test_value;
> > +
> >  /* definition of a series of tests */
> >  struct test {
> >  	const char *name;              /* test name */
> > @@ -594,6 +597,18 @@ int expect_strne(const char *expr, int llen, const char *cmp)
> >  #define CASE_TEST(name) \
> >  	case __LINE__: llen += printf("%d %s", test, #name);
> >  
> > +__attribute__((constructor))
> > +static void constructor1(void)
> > +{
> > +	constructor_test_value = 1;
> > +}
> > +
> > +__attribute__((constructor))
> > +static void constructor2(void)
> > +{
> > +	constructor_test_value *= 2;
> > +}
> > +
> 
> In the past I learned the hard way that you can never trust the execution
> order of constructors, so if you're unlucky above you could very well end
> up with 1 and that would be correct. I suggest that instead you do something
> such as:
> 
>       constructor_test_value += 1;
> ...
>       constructor_test_value += 2;
> 
> and check for value 3 in the test to make sure they were both executed
> exactly once each.

Was this indeterminism for constructors from the same translation unit?
Or across different translation units/shared objects?


I'm not entirely sure, but the GCC [0] docs could be read that within a
given TU the execution order for constructors is the same as the
definition order, even for C.

    The priorities for constructor and destructor functions are the same
    as those specified for namespace-scope C++ objects

And linked from there:

    In Standard C++, objects defined at namespace scope are guaranteed
    to be initialized in an order in strict accordance with that of
    their definitions *in a given translation unit*. No guarantee is made
    for initializations across translation units.


[0] https://gcc.gnu.org/onlinedocs/gcc-4.7.0/gcc/Function-Attributes.html
(using an old version of the docs to make sure this didn't change recently)
Willy Tarreau Oct. 7, 2023, 8:30 a.m. UTC | #3
On Sat, Oct 07, 2023 at 09:28:45AM +0200, Thomas Weißschuh wrote:
> > In the past I learned the hard way that you can never trust the execution
> > order of constructors, so if you're unlucky above you could very well end
> > up with 1 and that would be correct. I suggest that instead you do something
> > such as:
> > 
> >       constructor_test_value += 1;
> > ...
> >       constructor_test_value += 2;
> > 
> > and check for value 3 in the test to make sure they were both executed
> > exactly once each.
> 
> Was this indeterminism for constructors from the same translation unit?
> Or across different translation units/shared objects?

I'm certain that there's no guarantee from multiple units, but I seem
to remember something about possible reordering within a same unit.

> I'm not entirely sure, but the GCC [0] docs could be read that within a
> given TU the execution order for constructors is the same as the
> definition order, even for C.
> 
>     The priorities for constructor and destructor functions are the same
>     as those specified for namespace-scope C++ objects
> 
> And linked from there:
> 
>     In Standard C++, objects defined at namespace scope are guaranteed
>     to be initialized in an order in strict accordance with that of
>     their definitions *in a given translation unit*. No guarantee is made
>     for initializations across translation units.

Then that's probably OK. I'd say it all depends what you want to test.
If you also want the test to cover for this, then your test is stricter,
but then maybe you should put a comment there saying that it's on purpose
to also verify they execute in the expected order ;-)

Willy
Thomas Weißschuh Oct. 7, 2023, 8:42 a.m. UTC | #4
On 2023-10-07 10:30:35+0200, Willy Tarreau wrote:
> On Sat, Oct 07, 2023 at 09:28:45AM +0200, Thomas Weißschuh wrote:
> > > In the past I learned the hard way that you can never trust the execution
> > > order of constructors, so if you're unlucky above you could very well end
> > > up with 1 and that would be correct. I suggest that instead you do something
> > > such as:
> > > 
> > >       constructor_test_value += 1;
> > > ...
> > >       constructor_test_value += 2;
> > > 
> > > and check for value 3 in the test to make sure they were both executed
> > > exactly once each.
> > 
> > Was this indeterminism for constructors from the same translation unit?
> > Or across different translation units/shared objects?
> 
> I'm certain that there's no guarantee from multiple units, but I seem
> to remember something about possible reordering within a same unit.
> 
> > I'm not entirely sure, but the GCC [0] docs could be read that within a
> > given TU the execution order for constructors is the same as the
> > definition order, even for C.
> > 
> >     The priorities for constructor and destructor functions are the same
> >     as those specified for namespace-scope C++ objects
> > 
> > And linked from there:
> > 
> >     In Standard C++, objects defined at namespace scope are guaranteed
> >     to be initialized in an order in strict accordance with that of
> >     their definitions *in a given translation unit*. No guarantee is made
> >     for initializations across translation units.
> 
> Then that's probably OK. I'd say it all depends what you want to test.
> If you also want the test to cover for this, then your test is stricter,
> but then maybe you should put a comment there saying that it's on purpose
> to also verify they execute in the expected order ;-)

Given that the worst failure mode that could happen here is that the
tests fail and directly point at the issue, it should not be too risky to
try to rely on this behaviour.

Verifying the order was indeed the goal. I'll add the comment.

Thanks,
Thomas
diff mbox series

Patch

diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h
index a5f33fef1672..c1176611d9a9 100644
--- a/tools/include/nolibc/crt.h
+++ b/tools/include/nolibc/crt.h
@@ -13,11 +13,22 @@  const unsigned long *_auxv __attribute__((weak));
 static void __stack_chk_init(void);
 static void exit(int);
 
+extern void (*const __preinit_array_start[])(void) __attribute__((weak));
+extern void (*const __preinit_array_end[])(void) __attribute__((weak));
+
+extern void (*const __init_array_start[])(void) __attribute__((weak));
+extern void (*const __init_array_end[])(void) __attribute__((weak));
+
+extern void (*const __fini_array_start[])(void) __attribute__((weak));
+extern void (*const __fini_array_end[])(void) __attribute__((weak));
+
 void _start_c(long *sp)
 {
 	long argc;
 	char **argv;
 	char **envp;
+	int exitcode;
+	void (* const *func)(void);
 	const unsigned long *auxv;
 	/* silence potential warning: conflicting types for 'main' */
 	int _nolibc_main(int, char **, char **) __asm__ ("main");
@@ -54,8 +65,18 @@  void _start_c(long *sp)
 		;
 	_auxv = auxv;
 
+	for (func = __preinit_array_start; func < __preinit_array_end; func++)
+		(*func)();
+	for (func = __init_array_start; func < __init_array_end; func++)
+		(*func)();
+
 	/* go to application */
-	exit(_nolibc_main(argc, argv, envp));
+	exitcode = _nolibc_main(argc, argv, envp);
+
+	for (func = __fini_array_end - 1; func >= __fini_array_start; func--)
+		(*func)();
+
+	exit(exitcode);
 }
 
 #endif /* _NOLIBC_CRT_H */
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index a3ee4496bf0a..f166b425613a 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -57,6 +57,9 @@  static int test_argc;
 /* will be used by some test cases as readable file, please don't write it */
 static const char *argv0;
 
+/* will be used by constructor tests */
+static int constructor_test_value;
+
 /* definition of a series of tests */
 struct test {
 	const char *name;              /* test name */
@@ -594,6 +597,18 @@  int expect_strne(const char *expr, int llen, const char *cmp)
 #define CASE_TEST(name) \
 	case __LINE__: llen += printf("%d %s", test, #name);
 
+__attribute__((constructor))
+static void constructor1(void)
+{
+	constructor_test_value = 1;
+}
+
+__attribute__((constructor))
+static void constructor2(void)
+{
+	constructor_test_value *= 2;
+}
+
 int run_startup(int min, int max)
 {
 	int test;
@@ -631,6 +646,7 @@  int run_startup(int min, int max)
 		CASE_TEST(auxv_addr);        EXPECT_PTRGT(test_auxv != (void *)-1, test_auxv, brk); break;
 		CASE_TEST(auxv_AT_UID);      EXPECT_EQ(1, getauxval(AT_UID), getuid()); break;
 		CASE_TEST(auxv_AT_PAGESZ);   EXPECT_GE(1, getauxval(AT_PAGESZ), 4096); break;
+		CASE_TEST(constructor);      EXPECT_EQ(1, constructor_test_value, 2); break;
 		case __LINE__:
 			return ret; /* must be last */
 		/* note: do not set any defaults so as to permit holes above */