diff mbox series

[v2,2/6] unit-tests: add for_test

Message ID 2dff757d-3c5d-4923-97df-26bcb1c21230@web.de (mailing list archive)
State New
Headers show
Series unit-tests: add and use for_test to simplify tests | expand

Commit Message

René Scharfe July 21, 2024, 6:21 a.m. UTC
The macro TEST only allows defining a test that consists of a single
expression.  Add a new macro, for_test, which provides a way to define
unit tests that are made up of one or more statements.

for_test allows defining self-contained tests en bloc, a bit like
test_expect_success does for regular tests.  It acts like a for loop
that runs at most once; the test body is executed if test_skip_all()
had not been called before.

Helped-by: Phillip Wood <phillip.wood123@gmail.com>
Signed-off-by: René Scharfe <l.s.r@web.de>
---
 .clang-format               |  2 ++
 t/helper/test-example-tap.c | 33 +++++++++++++++++++++++++++++++++
 t/t0080/expect              | 35 ++++++++++++++++++++++++++++++++++-
 t/unit-tests/test-lib.h     | 19 +++++++++++++++++++
 4 files changed, 88 insertions(+), 1 deletion(-)

--
2.45.2

Comments

Kyle Lippincott July 22, 2024, 7:13 p.m. UTC | #1
On Sat, Jul 20, 2024 at 11:22 PM René Scharfe <l.s.r@web.de> wrote:
>
> The macro TEST only allows defining a test that consists of a single
> expression.  Add a new macro, for_test, which provides a way to define
> unit tests that are made up of one or more statements.
>
> for_test allows defining self-contained tests en bloc, a bit like
> test_expect_success does for regular tests.  It acts like a for loop
> that runs at most once; the test body is executed if test_skip_all()
> had not been called before.

I can see based on this description where the name came from, but
without this context, it's not clear when reading a test what it
actually does. The name comes from an implementation detail, and is
not describing what it _does_, just _how_ it does it.

Maybe a name like `small_test` or `quick_test` or something like that
would better express the intended usage?

>
> Helped-by: Phillip Wood <phillip.wood123@gmail.com>
> Signed-off-by: René Scharfe <l.s.r@web.de>
> ---
>  .clang-format               |  2 ++
>  t/helper/test-example-tap.c | 33 +++++++++++++++++++++++++++++++++
>  t/t0080/expect              | 35 ++++++++++++++++++++++++++++++++++-
>  t/unit-tests/test-lib.h     | 19 +++++++++++++++++++
>  4 files changed, 88 insertions(+), 1 deletion(-)
>
> diff --git a/.clang-format b/.clang-format
> index 6408251577..863dc87dfc 100644
> --- a/.clang-format
> +++ b/.clang-format
> @@ -151,6 +151,7 @@ Cpp11BracedListStyle: false
>  # function calls. Taken from:
>  #   git grep -h '^#define [^[:space:]]*for_\?each[^[:space:]]*(' |
>  #   sed "s/^#define /  - '/; s/(.*$/'/" | sort | uniq
> +# Added for_test from t/unit-tests/test-lib.h manually as a special case.
>  ForEachMacros:
>    - 'for_each_builtin'
>    - 'for_each_string_list_item'
> @@ -168,6 +169,7 @@ ForEachMacros:
>    - 'strintmap_for_each_entry'
>    - 'strmap_for_each_entry'
>    - 'strset_for_each_entry'
> +  - 'for_test'
>
>  # The maximum number of consecutive empty lines to keep.
>  MaxEmptyLinesToKeep: 1
> diff --git a/t/helper/test-example-tap.c b/t/helper/test-example-tap.c
> index d072ad559f..51d5e6e75b 100644
> --- a/t/helper/test-example-tap.c
> +++ b/t/helper/test-example-tap.c
> @@ -92,5 +92,38 @@ int cmd__example_tap(int argc, const char **argv)
>         test_res = TEST(t_empty(), "test with no checks");
>         TEST(check_int(test_res, ==, 0), "test with no checks returns 0");
>
> +       for_test ("for_test passing test")
> +               check_int(1, ==, 1);

I'm concerned that users will write this like:
+       for_test ("for_test passing test");
+               check_int(1, ==, 1);

And the issue won't be caught. Maybe that's fine; the only issue here
is that it's not going to be as descriptive if it fails, right?

> +       for_test ("for_test failing test")
> +               check_int(1, ==, 2);
> +       for_test ("for_test passing TEST_TODO()")
> +               TEST_TODO(check(0));
> +       for_test ("for_test failing TEST_TODO()")
> +               TEST_TODO(check(1));
> +       for_test ("for_test test_skip()") {
> +               check(0);
> +               test_skip("missing prerequisite");
> +               check(1);
> +       }
> +       for_test ("for_test test_skip() inside TEST_TODO()")
> +               TEST_TODO((test_skip("missing prerequisite"), 1));
> +       for_test ("for_test TEST_TODO() after failing check") {
> +               check(0);
> +               TEST_TODO(check(0));
> +       }
> +       for_test ("for_test failing check after TEST_TODO()") {
> +               check(1);
> +               TEST_TODO(check(0));
> +               check(0);
> +       }
> +       for_test ("for_test messages from failing string and char comparison") {
> +               check_str("\thello\\", "there\"\n");
> +               check_str("NULL", NULL);
> +               check_char('a', ==, '\n');
> +               check_char('\\', ==, '\'');
> +       }
> +       for_test ("for_test test with no checks")
> +               ; /* nothing */
> +
>         return test_done();
>  }
> diff --git a/t/t0080/expect b/t/t0080/expect
> index 0cfa0dc6d8..583f41b8c9 100644
> --- a/t/t0080/expect
> +++ b/t/t0080/expect
> @@ -40,4 +40,37 @@ not ok 17 - messages from failing string and char comparison
>  # BUG: test has no checks at t/helper/test-example-tap.c:92
>  not ok 18 - test with no checks
>  ok 19 - test with no checks returns 0
> -1..19
> +ok 20 - for_test passing test
> +# check "1 == 2" failed at t/helper/test-example-tap.c:98
> +#    left: 1
> +#   right: 2
> +not ok 21 - for_test failing test
> +not ok 22 - for_test passing TEST_TODO() # TODO
> +# todo check 'check(1)' succeeded at t/helper/test-example-tap.c:102
> +not ok 23 - for_test failing TEST_TODO()
> +# check "0" failed at t/helper/test-example-tap.c:104
> +# skipping test - missing prerequisite
> +# skipping check '1' at t/helper/test-example-tap.c:106
> +ok 24 - for_test test_skip() # SKIP
> +# skipping test - missing prerequisite
> +ok 25 - for_test test_skip() inside TEST_TODO() # SKIP
> +# check "0" failed at t/helper/test-example-tap.c:111
> +not ok 26 - for_test TEST_TODO() after failing check
> +# check "0" failed at t/helper/test-example-tap.c:117
> +not ok 27 - for_test failing check after TEST_TODO()
> +# check "!strcmp("\thello\\", "there\"\n")" failed at t/helper/test-example-tap.c:120
> +#    left: "\011hello\\"
> +#   right: "there\"\012"
> +# check "!strcmp("NULL", NULL)" failed at t/helper/test-example-tap.c:121
> +#    left: "NULL"
> +#   right: NULL
> +# check "'a' == '\n'" failed at t/helper/test-example-tap.c:122
> +#    left: 'a'
> +#   right: '\012'
> +# check "'\\' == '\''" failed at t/helper/test-example-tap.c:123
> +#    left: '\\'
> +#   right: '\''
> +not ok 28 - for_test messages from failing string and char comparison
> +# BUG: test has no checks at t/helper/test-example-tap.c:125
> +not ok 29 - for_test test with no checks
> +1..29
> diff --git a/t/unit-tests/test-lib.h b/t/unit-tests/test-lib.h
> index 2de6d715d5..12afd47ac9 100644
> --- a/t/unit-tests/test-lib.h
> +++ b/t/unit-tests/test-lib.h
> @@ -14,6 +14,25 @@
>         test__run_end(test__run_begin() ? 0 : (t, 1),   \
>                       TEST_LOCATION(),  __VA_ARGS__)
>
> +/*
> + * Run a test unless test_skip_all() has been called.  Acts like a for
> + * loop that runs at most once, with the test description between the
> + * parentheses and the test body as a statement or block after them.
> + * The description for each test should be unique.  E.g.:
> + *
> + *  for_test ("something else %d %d", arg1, arg2) {
> + *          prepare();
> + *          test_something_else(arg1, arg2);
> + *          cleanup();
> + *  }
> + */
> +#define for_test(...)                                                  \
> +       for (int for_test_running_ = test__run_begin() ?                \
> +               (test__run_end(0, TEST_LOCATION(), __VA_ARGS__), 0) : 1;\
> +            for_test_running_;                                         \
> +            test__run_end(1, TEST_LOCATION(), __VA_ARGS__),            \
> +               for_test_running_ = 0)

IMHO: this is borderline "too much magic" for my tastes. I think
having multiple test functions is generally easier to understand, and
the overhead isn't really relevant. It's not _as_ compact in the
source file, and requires that we have both the TEST statement and the
function (and forgetting the TEST statement means that we won't invoke
the function). If that is the main issue we're facing here, I wonder
if there's some other way of resolving that (such as unused function
detection via some compiler flags; even if it's not detected on all
platforms, getting at least one of the CI platforms should be
sufficient to prevent the issue [but we should target as many as
possible, so it's caught earlier than "I tried to send it to the
list"])

If others agree that this is a good simplification for the people
reading the test code (and hopefully for the test author), I'm fine
with this going in (with a different name). I'm not trying to veto the
concept.


> +
>  /*
>   * Print a test plan, should be called before any tests. If the number
>   * of tests is not known in advance test_done() will automatically
> --
> 2.45.2
>
Junio C Hamano July 22, 2024, 7:36 p.m. UTC | #2
Kyle Lippincott <spectral@google.com> writes:

> I can see based on this description where the name came from, but
> without this context, it's not clear when reading a test what it
> actually does. The name comes from an implementation detail, and is
> not describing what it _does_, just _how_ it does it.
>
> Maybe a name like `small_test` or `quick_test` or something like that
> would better express the intended usage?

Names that explicitly have C keyword for control structures, e.g.
"if_somecondition_test()", "while_foo_test()" or "for_test()" is
vastly preferrable than these, in order to make it clear that we are
introducing a macro that defines control structure.

>> +       for_test ("for_test passing test")
>> +               check_int(1, ==, 1);
>
> I'm concerned that users will write this like:
> +       for_test ("for_test passing test");
> +               check_int(1, ==, 1);

And that is exactly why we want the macro name to include C keyword 
for control structures.

> And the issue won't be caught.

You are right.  Making an empty body somehow catchable by the
compiler would be a vast improvement.

>> +#define for_test(...)                                                  \
>> +       for (int for_test_running_ = test__run_begin() ?                \
>> +               (test__run_end(0, TEST_LOCATION(), __VA_ARGS__), 0) : 1;\
>> +            for_test_running_;                                         \
>> +            test__run_end(1, TEST_LOCATION(), __VA_ARGS__),            \
>> +               for_test_running_ = 0)
>
> IMHO: this is borderline "too much magic" for my tastes. I think
> having multiple test functions is generally easier to understand, and
> the overhead isn't really relevant. It's not _as_ compact in the
> source file, and requires that we have both the TEST statement and the
> function (and forgetting the TEST statement means that we won't invoke
> the function). If that is the main issue we're facing here, I wonder
> if there's some other way of resolving that (such as unused function
> detection via some compiler flags; even if it's not detected on all
> platforms, getting at least one of the CI platforms should be
> sufficient to prevent the issue [but we should target as many as
> possible, so it's caught earlier than "I tried to send it to the
> list"])

Interesting.

> If others agree that this is a good simplification for the people
> reading the test code (and hopefully for the test author), I'm fine
> with this going in (with a different name). I'm not trying to veto the
> concept.

OK.  But what you suggested in the previous paragraph has merit.
Are there other things that could be improved in the existing unit
test helpers, that would help those who do not use this new for_test()
thing?  Let's see how the patches to improve them would look like.

Thanks.
René Scharfe July 22, 2024, 8:31 p.m. UTC | #3
Am 22.07.24 um 21:36 schrieb Junio C Hamano:
> Kyle Lippincott <spectral@google.com> writes:
>
>>> +       for_test ("for_test passing test")
>>> +               check_int(1, ==, 1);
>>
>> I'm concerned that users will write this like:
>> +       for_test ("for_test passing test");
>> +               check_int(1, ==, 1);
>
> And that is exactly why we want the macro name to include C keyword
> for control structures.
>
>> And the issue won't be caught.
>
> You are right.  Making an empty body somehow catchable by the
> compiler would be a vast improvement.

That would be nice, but I have no idea how to do that without compiler
changes.  In the meantime the existing runtime checks will catch both
the empty test in the first line and the out-of-test check in the second
one and report them like this:

 # BUG: test has no checks at t/helper/test-example-tap.c:75
 not ok 1 - for_test passing test
 Assertion failed: (ctx.running), function test_assert, file test-lib.c, line 267.

File name and line in the second one are not as helpful as they could
be.  Here's a patch to improve that output.

--- >8 ---
Subject: [PATCH] unit-tests: show location of checks outside of tests

Checks outside of tests are caught at runtime and reported like this:

 Assertion failed: (ctx.running), function test_assert, file test-lib.c, line 267.

The assert() call aborts the unit test and doesn't reveal the location
or even the type of the offending check, as test_assert() is called by
all of them.

Handle it like the opposite case, a test without any checks: Don't
abort, but report the location of the actual check, along with a message
explaining the situation.  The output for example above becomes:

 # BUG: check outside of test at t/helper/test-example-tap.c:75

... and the unit test program continues.

Signed-off-by: René Scharfe <l.s.r@web.de>
---
 t/unit-tests/test-lib.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/t/unit-tests/test-lib.c b/t/unit-tests/test-lib.c
index 3c513ce59a..9977c81739 100644
--- a/t/unit-tests/test-lib.c
+++ b/t/unit-tests/test-lib.c
@@ -264,7 +264,11 @@ static void test_todo(void)

 int test_assert(const char *location, const char *check, int ok)
 {
-	assert(ctx.running);
+	if (!ctx.running) {
+		test_msg("BUG: check outside of test at %s",
+			 make_relative(location));
+		return 0;
+	}

 	if (ctx.result == RESULT_SKIP) {
 		test_msg("skipping check '%s' at %s", check,
--
2.45.2
Junio C Hamano July 22, 2024, 8:41 p.m. UTC | #4
René Scharfe <l.s.r@web.de> writes:

>>> And the issue won't be caught.
>>
>> You are right.  Making an empty body somehow catchable by the
>> compiler would be a vast improvement.
>
> That would be nice, but I have no idea how to do that without compiler
> changes.

Me neither.  I was trying to nerd-snipe Kyle into coming up with a
solution ;-)

> In the meantime the existing runtime checks will catch both
> the empty test in the first line and the out-of-test check in the second
> one and report them like this:
>
>  # BUG: test has no checks at t/helper/test-example-tap.c:75
>  not ok 1 - for_test passing test
>  Assertion failed: (ctx.running), function test_assert, file test-lib.c, line 267.

Nice improvement, I would say.

> diff --git a/t/unit-tests/test-lib.c b/t/unit-tests/test-lib.c
> index 3c513ce59a..9977c81739 100644
> --- a/t/unit-tests/test-lib.c
> +++ b/t/unit-tests/test-lib.c
> @@ -264,7 +264,11 @@ static void test_todo(void)
>
>  int test_assert(const char *location, const char *check, int ok)
>  {
> -	assert(ctx.running);
> +	if (!ctx.running) {
> +		test_msg("BUG: check outside of test at %s",
> +			 make_relative(location));
> +		return 0;
> +	}
>
>  	if (ctx.result == RESULT_SKIP) {
>  		test_msg("skipping check '%s' at %s", check,
> --
> 2.45.2
Kyle Lippincott July 22, 2024, 10:41 p.m. UTC | #5
On Mon, Jul 22, 2024 at 12:37 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Kyle Lippincott <spectral@google.com> writes:
>
> > I can see based on this description where the name came from, but
> > without this context, it's not clear when reading a test what it
> > actually does. The name comes from an implementation detail, and is
> > not describing what it _does_, just _how_ it does it.
> >
> > Maybe a name like `small_test` or `quick_test` or something like that
> > would better express the intended usage?
>
> Names that explicitly have C keyword for control structures, e.g.
> "if_somecondition_test()", "while_foo_test()" or "for_test()" is
> vastly preferrable than these, in order to make it clear that we are
> introducing a macro that defines control structure.

Perhaps expression_test or something, then? It's testing an expression
(I think blocks are a type of expression? I never remember which is
'larger': expressions or statements, and that might only be a C++
thing, C might not have the same terminology).

I was going to suggest a lint check that checks to ensure that we
don't have a semicolon immediately after the description, or a lint
check that enforced that even single-statement tests are wrapped in {}
(inverting the style guide requirements), but realistically neither
are actually needed: `test__run_begin` already asserts that a test
isn't currently running. `check_int` and friends already assert that a
test _is_ running. So this is already defended against:

for_test ("test description");
    check_int(1, ==, 1);   /* `assert`s: not in the middle of a test */

What we don't have is defense against a completely empty test, or a
test without any actual conditions:

for_test ("test one");
for_test ("test two") {
    printf("this test doesn't actually _test_ anything\n");
}

Adding that is doable, and improves the first case: the `for_test
("test description");` line fails because the test didn't do anything,
not the `check_int` line.

>
> >> +       for_test ("for_test passing test")
> >> +               check_int(1, ==, 1);
> >
> > I'm concerned that users will write this like:
> > +       for_test ("for_test passing test");
> > +               check_int(1, ==, 1);
>
> And that is exactly why we want the macro name to include C keyword
> for control structures.
>
> > And the issue won't be caught.
>
> You are right.  Making an empty body somehow catchable by the
> compiler would be a vast improvement.
>
> >> +#define for_test(...)                                                  \
> >> +       for (int for_test_running_ = test__run_begin() ?                \
> >> +               (test__run_end(0, TEST_LOCATION(), __VA_ARGS__), 0) : 1;\
> >> +            for_test_running_;                                         \
> >> +            test__run_end(1, TEST_LOCATION(), __VA_ARGS__),            \
> >> +               for_test_running_ = 0)
> >
> > IMHO: this is borderline "too much magic" for my tastes. I think
> > having multiple test functions is generally easier to understand, and
> > the overhead isn't really relevant. It's not _as_ compact in the
> > source file, and requires that we have both the TEST statement and the
> > function (and forgetting the TEST statement means that we won't invoke
> > the function). If that is the main issue we're facing here, I wonder
> > if there's some other way of resolving that (such as unused function
> > detection via some compiler flags; even if it's not detected on all
> > platforms, getting at least one of the CI platforms should be
> > sufficient to prevent the issue [but we should target as many as
> > possible, so it's caught earlier than "I tried to send it to the
> > list"])
>
> Interesting.
>
> > If others agree that this is a good simplification for the people
> > reading the test code (and hopefully for the test author), I'm fine
> > with this going in (with a different name). I'm not trying to veto the
> > concept.
>
> OK.  But what you suggested in the previous paragraph has merit.
> Are there other things that could be improved in the existing unit
> test helpers, that would help those who do not use this new for_test()
> thing?  Let's see how the patches to improve them would look like.
>
> Thanks.
>
Kyle Lippincott July 22, 2024, 10:47 p.m. UTC | #6
On Mon, Jul 22, 2024 at 1:31 PM René Scharfe <l.s.r@web.de> wrote:
>
> Am 22.07.24 um 21:36 schrieb Junio C Hamano:
> > Kyle Lippincott <spectral@google.com> writes:
> >
> >>> +       for_test ("for_test passing test")
> >>> +               check_int(1, ==, 1);
> >>
> >> I'm concerned that users will write this like:
> >> +       for_test ("for_test passing test");
> >> +               check_int(1, ==, 1);
> >
> > And that is exactly why we want the macro name to include C keyword
> > for control structures.
> >
> >> And the issue won't be caught.
> >
> > You are right.  Making an empty body somehow catchable by the
> > compiler would be a vast improvement.
>
> That would be nice, but I have no idea how to do that without compiler
> changes.  In the meantime the existing runtime checks will catch both
> the empty test in the first line and the out-of-test check in the second
> one and report them like this:
>
>  # BUG: test has no checks at t/helper/test-example-tap.c:75
>  not ok 1 - for_test passing test
>  Assertion failed: (ctx.running), function test_assert, file test-lib.c, line 267.
>
> File name and line in the second one are not as helpful as they could
> be.  Here's a patch to improve that output.
>
> --- >8 ---
> Subject: [PATCH] unit-tests: show location of checks outside of tests
>
> Checks outside of tests are caught at runtime and reported like this:
>
>  Assertion failed: (ctx.running), function test_assert, file test-lib.c, line 267.
>
> The assert() call aborts the unit test and doesn't reveal the location
> or even the type of the offending check, as test_assert() is called by
> all of them.
>
> Handle it like the opposite case, a test without any checks: Don't
> abort, but report the location of the actual check, along with a message
> explaining the situation.  The output for example above becomes:

Oh, I referenced "detect tests without checks" as a possible
improvement in my previous message, and didn't actually check whether
it was there. This statement made me go and check, and you're right,
it is there already. So I think we're well defended against the empty
check case, especially with the improvement here. Thanks!

If we have a good set of protections against misuse, does this mean
we're free to rename it? :) The concern raised for why `for` had to be
in the name was because it was using a control statement (a
at-most-1-execution for loop) to achieve its magic, and the control
statement puts certain restrictions and obligations on how to use it
correctly. If the misuse is detected reliably, we can choose a name
that's more descriptive about what it's doing.

>
>  # BUG: check outside of test at t/helper/test-example-tap.c:75
>
> ... and the unit test program continues.
>
> Signed-off-by: René Scharfe <l.s.r@web.de>
> ---
>  t/unit-tests/test-lib.c | 6 +++++-
>  1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/t/unit-tests/test-lib.c b/t/unit-tests/test-lib.c
> index 3c513ce59a..9977c81739 100644
> --- a/t/unit-tests/test-lib.c
> +++ b/t/unit-tests/test-lib.c
> @@ -264,7 +264,11 @@ static void test_todo(void)
>
>  int test_assert(const char *location, const char *check, int ok)
>  {
> -       assert(ctx.running);
> +       if (!ctx.running) {
> +               test_msg("BUG: check outside of test at %s",
> +                        make_relative(location));
> +               return 0;
> +       }
>
>         if (ctx.result == RESULT_SKIP) {
>                 test_msg("skipping check '%s' at %s", check,
> --
> 2.45.2
Patrick Steinhardt July 23, 2024, 6:36 a.m. UTC | #7
On Mon, Jul 22, 2024 at 12:36:57PM -0700, Junio C Hamano wrote:
> Kyle Lippincott <spectral@google.com> writes:
> >> +#define for_test(...)                                                  \
> >> +       for (int for_test_running_ = test__run_begin() ?                \
> >> +               (test__run_end(0, TEST_LOCATION(), __VA_ARGS__), 0) : 1;\
> >> +            for_test_running_;                                         \
> >> +            test__run_end(1, TEST_LOCATION(), __VA_ARGS__),            \
> >> +               for_test_running_ = 0)
> >
> > IMHO: this is borderline "too much magic" for my tastes. I think
> > having multiple test functions is generally easier to understand, and
> > the overhead isn't really relevant. It's not _as_ compact in the
> > source file, and requires that we have both the TEST statement and the
> > function (and forgetting the TEST statement means that we won't invoke
> > the function). If that is the main issue we're facing here, I wonder
> > if there's some other way of resolving that (such as unused function
> > detection via some compiler flags; even if it's not detected on all
> > platforms, getting at least one of the CI platforms should be
> > sufficient to prevent the issue [but we should target as many as
> > possible, so it's caught earlier than "I tried to send it to the
> > list"])
> 
> Interesting.
> 
> > If others agree that this is a good simplification for the people
> > reading the test code (and hopefully for the test author), I'm fine
> > with this going in (with a different name). I'm not trying to veto the
> > concept.
> 
> OK.  But what you suggested in the previous paragraph has merit.
> Are there other things that could be improved in the existing unit
> test helpers, that would help those who do not use this new for_test()
> thing?  Let's see how the patches to improve them would look like.

Honestly I'm also not too sure how I feel about these new macros, and
I'm somewhat in the same boat that it starts to feels "magicky".

Taking a step back: what is it that is bugging folks about the current
way of writing one function per test? The boilerplate cannot really be
it, as tests should be self-contained anyway and thus we should have the
same boilerplate regardless of whether we use separate functions or the
new macros. I also doubt that it's the function declaration itself, as
that isn't all that involved. So I guess what people are annoyed with is
that every function needs to be defined, but then also wired up via
`TEST_RUN()`, right? And that I totally get, as it is quite annoying.

So... can we solve this in a different way? I quite liked the system
that we had in libgit2: every function must conform to a specific name
`test_foo_bar`. We then have a Python script that reads all test files,
extracts all files that have the `test_` prefix, and writes those into
an array. Optionally, the `test_foo` test suite can also have a setup
and teardown function that gets called for every test, namely
`test_foo_setup()` and `test_foo_teardown()`.

Altogether, the output would look somewhat like this:


```test.c
static test_foo_setup(void)
{
    ... setup global state ...
}

static test_foo_teardown(void)
{
    ... tear down global state ...
}

static test_foo_one(void)
{
    ... first test ...
}

static test_foo_two(void)
{
    ... second test ...
}
```

```generated.c
static const struct test_func _test_foo_functions[] = {
    {
        name: "foo::one",
        test: test_foo_one,
        setup: test_foo_setup,
        teardown: test_foo_teardown,
    },
    {
        name: "foo::two",
        test: test_foo_two,
        setup: test_foo_setup,
        teardown: test_foo_teardown,
    },
};

int main(int argc, const char *argv[])
{
    ... boilerplate to execute all functions ...
}
```

The setup and teardown functions are optional -- if not defined for a
specific test unit, then we simply won't invoke them.

There is of course some magic involved with how we generate the file.
But I think that would be quite manageable, and ultimately all that the
developer would need to care about is writing a `test_foo_something()`
function. Everything else would be handled by our infra.

I'd be happy to implement something along these lines if folks think
that this is a good direction to go into.

Patrick
René Scharfe July 23, 2024, 7:18 a.m. UTC | #8
Am 23.07.24 um 00:41 schrieb Kyle Lippincott:
> On Mon, Jul 22, 2024 at 12:37 PM Junio C Hamano <gitster@pobox.com> wrote:
>>
>> Kyle Lippincott <spectral@google.com> writes:
>>
>>> I can see based on this description where the name came from, but
>>> without this context, it's not clear when reading a test what it
>>> actually does. The name comes from an implementation detail, and is
>>> not describing what it _does_, just _how_ it does it.
>>>
>>> Maybe a name like `small_test` or `quick_test` or something like that
>>> would better express the intended usage?
>>
>> Names that explicitly have C keyword for control structures, e.g.
>> "if_somecondition_test()", "while_foo_test()" or "for_test()" is
>> vastly preferrable than these, in order to make it clear that we are
>> introducing a macro that defines control structure.
>
> Perhaps expression_test or something, then? It's testing an expression
> (I think blocks are a type of expression? I never remember which is
> 'larger': expressions or statements, and that might only be a C++
> thing, C might not have the same terminology).

If in doubt, feel free to look it up.  We're not taking a test here
(bad pun intended).

https://en.cppreference.com/w/c/language/expressions
https://en.cppreference.com/w/c/language/statements

TEST requires test bodies to be expressions, for_test allows them to
be statements.  Statements can include arbitrary expressions, but not
the other way around.

The "for" in for_test indicates that it's a for-like macro.  This
already implies that it is followed by a statement.  It works for the
existing for_each macros.

René
René Scharfe July 23, 2024, 9:25 a.m. UTC | #9
Am 23.07.24 um 08:36 schrieb Patrick Steinhardt:
> On Mon, Jul 22, 2024 at 12:36:57PM -0700, Junio C Hamano wrote:
>> Kyle Lippincott <spectral@google.com> writes:
>>>> +#define for_test(...)                                                  \
>>>> +       for (int for_test_running_ = test__run_begin() ?                \
>>>> +               (test__run_end(0, TEST_LOCATION(), __VA_ARGS__), 0) : 1;\
>>>> +            for_test_running_;                                         \
>>>> +            test__run_end(1, TEST_LOCATION(), __VA_ARGS__),            \
>>>> +               for_test_running_ = 0)
>>>
>>> IMHO: this is borderline "too much magic" for my tastes. I think
>>> having multiple test functions is generally easier to understand, and
>>> the overhead isn't really relevant. It's not _as_ compact in the
>>> source file, and requires that we have both the TEST statement and the
>>> function (and forgetting the TEST statement means that we won't invoke
>>> the function). If that is the main issue we're facing here, I wonder
>>> if there's some other way of resolving that (such as unused function
>>> detection via some compiler flags; even if it's not detected on all
>>> platforms, getting at least one of the CI platforms should be
>>> sufficient to prevent the issue [but we should target as many as
>>> possible, so it's caught earlier than "I tried to send it to the
>>> list"])
>>
>> Interesting.
>>
>>> If others agree that this is a good simplification for the people
>>> reading the test code (and hopefully for the test author), I'm fine
>>> with this going in (with a different name). I'm not trying to veto the
>>> concept.
>>
>> OK.  But what you suggested in the previous paragraph has merit.
>> Are there other things that could be improved in the existing unit
>> test helpers, that would help those who do not use this new for_test()
>> thing?  Let's see how the patches to improve them would look like.
>
> Honestly I'm also not too sure how I feel about these new macros, and
> I'm somewhat in the same boat that it starts to feels "magicky".
>
> Taking a step back: what is it that is bugging folks about the current
> way of writing one function per test?

My goal is to be able to define a test without repeating its
description even partly, like test_expect_success allows for shell-based
tests.

> I quite liked the system
> that we had in libgit2: every function must conform to a specific name
> `test_foo_bar`. We then have a Python script that reads all test files,
> extracts all files that have the `test_` prefix, and writes those into
> an array. Optionally, the `test_foo` test suite can also have a setup
> and teardown function that gets called for every test, namely
> `test_foo_setup()` and `test_foo_teardown()`.
>
> Altogether, the output would look somewhat like this:
>
>
> ```test.c
> static test_foo_setup(void)
> {
>     ... setup global state ...
> }
>
> static test_foo_teardown(void)
> {
>     ... tear down global state ...
> }
>
> static test_foo_one(void)
> {
>     ... first test ...
> }
>
> static test_foo_two(void)
> {
>     ... second test ...
> }
> ```
>
> ```generated.c
> static const struct test_func _test_foo_functions[] = {
>     {
>         name: "foo::one",
>         test: test_foo_one,
>         setup: test_foo_setup,
>         teardown: test_foo_teardown,
>     },
>     {
>         name: "foo::two",
>         test: test_foo_two,
>         setup: test_foo_setup,
>         teardown: test_foo_teardown,
>     },
> };
>
> int main(int argc, const char *argv[])
> {
>     ... boilerplate to execute all functions ...
> }
> ```
>
> The setup and teardown functions are optional -- if not defined for a
> specific test unit, then we simply won't invoke them.

So the name of a test is generated automatically based on the function
name and there is no way to add a description beyond that.  This would
achieve my goal above, but prevent developers from using space and
punctuation in test descriptions.  Fair enough.

> There is of course some magic involved with how we generate the file.

It requires magic function names and generates code using a different
language, while for_test is a just single new control flow keyword,
like the dozen or so we already have.  So the magic employed by the
libgit2 system is both broader and deeper.

> But I think that would be quite manageable, and ultimately all that the
> developer would need to care about is writing a `test_foo_something()`
> function. Everything else would be handled by our infra.

With for_test all the developer has to do is write a test with a
description, no extra infrastructure beyond the existing unit test
framework needed.

> I'd be happy to implement something along these lines if folks think
> that this is a good direction to go into.

FWIW I don't see the appeal.  It uses more code and more dependencies
to almost allow what for_test permits.

René
Patrick Steinhardt July 23, 2024, 9:53 a.m. UTC | #10
On Tue, Jul 23, 2024 at 11:25:29AM +0200, René Scharfe wrote:
> Am 23.07.24 um 08:36 schrieb Patrick Steinhardt:
> > There is of course some magic involved with how we generate the file.
> 
> It requires magic function names and generates code using a different
> language, while for_test is a just single new control flow keyword,
> like the dozen or so we already have.  So the magic employed by the
> libgit2 system is both broader and deeper.

It is broader, that's certainly true. But it feels more self-contained,
less fragile and easier to read to me compared to macros.

> > But I think that would be quite manageable, and ultimately all that the
> > developer would need to care about is writing a `test_foo_something()`
> > function. Everything else would be handled by our infra.
> 
> With for_test all the developer has to do is write a test with a
> description, no extra infrastructure beyond the existing unit test
> framework needed.

True, but it feels like it is an invitation for writing unidiomatic
code to me. Unidiomatic in this context to me mostly means code that is
not self-contained and thus cannot be run standalone.

These are quite obviously subjective opinions though.

Patrick
René Scharfe July 23, 2024, 12:37 p.m. UTC | #11
Am 23.07.24 um 11:53 schrieb Patrick Steinhardt:
> On Tue, Jul 23, 2024 at 11:25:29AM +0200, René Scharfe wrote:
>> Am 23.07.24 um 08:36 schrieb Patrick Steinhardt:
>>> There is of course some magic involved with how we generate the file.
>>
>> It requires magic function names and generates code using a different
>> language, while for_test is a just single new control flow keyword,
>> like the dozen or so we already have.  So the magic employed by the
>> libgit2 system is both broader and deeper.
>
> It is broader, that's certainly true. But it feels more self-contained,
> less fragile and easier to read to me compared to macros.

In which ways can for_test break?

>>> But I think that would be quite manageable, and ultimately all that the
>>> developer would need to care about is writing a `test_foo_something()`
>>> function. Everything else would be handled by our infra.
>>
>> With for_test all the developer has to do is write a test with a
>> description, no extra infrastructure beyond the existing unit test
>> framework needed.
>
> True, but it feels like it is an invitation for writing unidiomatic
> code to me. Unidiomatic in this context to me mostly means code that is
> not self-contained and thus cannot be run standalone.

Partly this is intentional -- I want to do something that the current
idiom (the TEST macro) doesn't allow.

It's possible that reducing friction will cause more sloppy tests to
be created.  I hope that the bad parts will stand out more when
there's less boilerplate code.

René
René Scharfe July 23, 2024, 12:37 p.m. UTC | #12
Am 23.07.24 um 00:47 schrieb Kyle Lippincott:
>
> If we have a good set of protections against misuse, does this mean
> we're free to rename it? :) The concern raised for why `for` had to be
> in the name was because it was using a control statement (a
> at-most-1-execution for loop) to achieve its magic, and the control
> statement puts certain restrictions and obligations on how to use it
> correctly. If the misuse is detected reliably, we can choose a name
> that's more descriptive about what it's doing.

Well, I called it "test" initially because I'm lazy and it describes
its purpose.

But the fact remains that this is an iteration statement, even though
it kinda looks like a function, and it should be used as such.  We have
to explicitly tell clang-format, and having "for" in the name reminds
developers as well.

There is a slight dissonance between it executing at most once and it
being an iteration.  In C we commonly use conditionals ("if") for that.
Calling it "if_test" would be misleading with "for" under the hood,
though, as you cannot have an "else" branch -- which we don't need
anyway.

That said, iterating zero or one times is possible and understandable
not too much of a stretch IMHO.  I can understand that one needs a
minute to get used to that new keyword, though.

René
Patrick Steinhardt July 23, 2024, 1 p.m. UTC | #13
On Tue, Jul 23, 2024 at 02:37:35PM +0200, René Scharfe wrote:
> Am 23.07.24 um 11:53 schrieb Patrick Steinhardt:
> > On Tue, Jul 23, 2024 at 11:25:29AM +0200, René Scharfe wrote:
> >> Am 23.07.24 um 08:36 schrieb Patrick Steinhardt:
> >>> There is of course some magic involved with how we generate the file.
> >>
> >> It requires magic function names and generates code using a different
> >> language, while for_test is a just single new control flow keyword,
> >> like the dozen or so we already have.  So the magic employed by the
> >> libgit2 system is both broader and deeper.
> >
> > It is broader, that's certainly true. But it feels more self-contained,
> > less fragile and easier to read to me compared to macros.
> 
> In which ways can for_test break?

I was mostly referring to the potential for empty-by-accident test
bodies that were mentioned in other parts of this thread.

Patrick
Phillip Wood July 23, 2024, 1:23 p.m. UTC | #14
On 23/07/2024 13:37, René Scharfe wrote:
> Am 23.07.24 um 11:53 schrieb Patrick Steinhardt:
>> On Tue, Jul 23, 2024 at 11:25:29AM +0200, René Scharfe wrote:
>>> Am 23.07.24 um 08:36 schrieb Patrick Steinhardt:
>>>> There is of course some magic involved with how we generate the file.
>>>
>>> It requires magic function names and generates code using a different
>>> language, while for_test is a just single new control flow keyword,
>>> like the dozen or so we already have.  So the magic employed by the
>>> libgit2 system is both broader and deeper.
>>
>> It is broader, that's certainly true. But it feels more self-contained,
>> less fragile and easier to read to me compared to macros.
> 
> In which ways can for_test break?

Using a "break" statement to exit the test early will exit the loop 
without calling test__run_end()

Best Wishes

Phillip

> 
>>>> But I think that would be quite manageable, and ultimately all that the
>>>> developer would need to care about is writing a `test_foo_something()`
>>>> function. Everything else would be handled by our infra.
>>>
>>> With for_test all the developer has to do is write a test with a
>>> description, no extra infrastructure beyond the existing unit test
>>> framework needed.
>>
>> True, but it feels like it is an invitation for writing unidiomatic
>> code to me. Unidiomatic in this context to me mostly means code that is
>> not self-contained and thus cannot be run standalone.
> 
> Partly this is intentional -- I want to do something that the current
> idiom (the TEST macro) doesn't allow.
> 
> It's possible that reducing friction will cause more sloppy tests to
> be created.  I hope that the bad parts will stand out more when
> there's less boilerplate code.
> 
> René
>
Phillip Wood July 23, 2024, 1:24 p.m. UTC | #15
On 22/07/2024 20:13, Kyle Lippincott wrote:
> On Sat, Jul 20, 2024 at 11:22 PM René Scharfe <l.s.r@web.de> wrote:
>>
>> The macro TEST only allows defining a test that consists of a single
>> expression.  Add a new macro, for_test, which provides a way to define
>> unit tests that are made up of one or more statements.
>>
>> for_test allows defining self-contained tests en bloc, a bit like
>> test_expect_success does for regular tests.  It acts like a for loop
>> that runs at most once; the test body is executed if test_skip_all()
>> had not been called before.
> 
> I can see based on this description where the name came from, but
> without this context, it's not clear when reading a test what it
> actually does. The name comes from an implementation detail, and is
> not describing what it _does_, just _how_ it does it.

That's my feeling too. In fact I think the name "for_test" actually 
works against us by implicitly suggesting that "break" can be used to 
exit the test early when in fact that will not work because we'll skip 
the call to test__run_end()

>> + * Run a test unless test_skip_all() has been called.  Acts like a for
>> + * loop that runs at most once, with the test description between the
>> + * parentheses and the test body as a statement or block after them.
>> + * The description for each test should be unique.  E.g.:
>> + *
>> + *  for_test ("something else %d %d", arg1, arg2) {
>> + *          prepare();
>> + *          test_something_else(arg1, arg2);
>> + *          cleanup();
>> + *  }
>> + */
>> +#define for_test(...)                                                  \
>> +       for (int for_test_running_ = test__run_begin() ?                \
>> +               (test__run_end(0, TEST_LOCATION(), __VA_ARGS__), 0) : 1;\
>> +            for_test_running_;                                         \
>> +            test__run_end(1, TEST_LOCATION(), __VA_ARGS__),            \
>> +               for_test_running_ = 0)
> 
> IMHO: this is borderline "too much magic" for my tastes.

Yes, compared to TEST_RUN() in v1 the magic level has jumped from Muggle 
to Dumbledore. Using a for loop is clever as it ensures test__run_end() 
is called at the end of the test without the need for separate 
TEST_END()[1] macro. The alternative is something like 
test_if_enabled()[2] which was discussed in the last round.

[1] 
https://lore.kernel.org/git/4f41f509-1d44-4476-92b0-9bb643f64576@gmail.com
[2] https://lore.kernel.org/git/xmqqa5iot01s.fsf@gitster.g

> I think
> having multiple test functions is generally easier to understand, and
> the overhead isn't really relevant. It's not _as_ compact in the
> source file, and requires that we have both the TEST statement and the
> function (and forgetting the TEST statement means that we won't invoke
> the function). If that is the main issue we're facing here, I wonder
> if there's some other way of resolving that (such as unused function
> detection via some compiler flags; even if it's not detected on all
> platforms, getting at least one of the CI platforms should be
> sufficient to prevent the issue [but we should target as many as
> possible, so it's caught earlier than "I tried to send it to the
> list"])

I also have a preference for function based tests but I do think 
something like for_test() can be useful in certain situations. For 
example in test-ctype.c where testing the ctype macros leads to a lot of 
repetition or a giant macro with a function based approach. If isalpha() 
and friends were functions instead we could write a single helper 
function which is passed the function under test together with the input 
data and expect result. Because they are macros that approach does not work.

Another example is where we are using a helper function with several 
inputs and we would prefer to write

for_test("test 1") {
	int input[] = ...
	int expect[] = ...

	test_helper(input, expect);

...

for_test("test 10") {
	int input[] = ...
	int expect[] = ...

	test_helper(input, expect);
}

rather then declaring all the input and expect variables up front with

	int input_1 = ...
	int input_2 = ...
	...
	int expect_1 = ...
	int expect_2 = ...

	TEST(test_helper(input_1, expect_1), "test 1");
	...
	TEST(test_helper(input_10, expect_10), "test 10");

> If others agree that this is a good simplification for the people
> reading the test code (and hopefully for the test author), I'm fine
> with this going in (with a different name). I'm not trying to veto the
> concept.

That's my feeling too.

Best Wishes

Phillip

> 
>> +
>>   /*
>>    * Print a test plan, should be called before any tests. If the number
>>    * of tests is not known in advance test_done() will automatically
>> --
>> 2.45.2
>>
>
René Scharfe July 23, 2024, 1:58 p.m. UTC | #16
Am 23.07.24 um 15:23 schrieb Phillip Wood:
> On 23/07/2024 13:37, René Scharfe wrote:
>>
>> In which ways can for_test break?
>
> Using a "break" statement to exit the test early will exit the loop
> without calling test__run_end()
Good point!  That will only be caught at runtime by the asserts at the
start of test__run_begin() or test_done() depending on whether it was
the last test:

 Assertion failed: (!ctx.running), function test_done, file test-lib.c, line 128.

 Assertion failed: (!ctx.running), function test__run_begin, file test-lib.c, line 172.

These messages could be more helpful.  And the use of the unsupported
break with a first-class for-like keyword would yield a compiler error.
But at least it won't go unnoticed even with this limited look-alike.
Deserves a comment in test-lib.h.

René
Phillip Wood July 25, 2024, 9:45 a.m. UTC | #17
On 23/07/2024 14:24, Phillip Wood wrote:
> On 22/07/2024 20:13, Kyle Lippincott wrote:
>> On Sat, Jul 20, 2024 at 11:22 PM René Scharfe <l.s.r@web.de> wrote:
>>>
>> I think
>> having multiple test functions is generally easier to understand, and
>> the overhead isn't really relevant. It's not _as_ compact in the
>> source file, and requires that we have both the TEST statement and the
>> function (and forgetting the TEST statement means that we won't invoke
>> the function). If that is the main issue we're facing here, I wonder
>> if there's some other way of resolving that (such as unused function
>> detection via some compiler flags; even if it's not detected on all
>> platforms, getting at least one of the CI platforms should be
>> sufficient to prevent the issue [but we should target as many as
>> possible, so it's caught earlier than "I tried to send it to the
>> list"])
> 
> I also have a preference for function based tests but I do think 
> something like for_test() can be useful in certain situations.

I'm no-longer convinced that this is really the case

> For 
> example in test-ctype.c where testing the ctype macros leads to a lot of 
> repetition or a giant macro with a function based approach.

Having re-read the history of t/unit-tests/t-ctype.c I don't think the 
repetition in the original version was really that bad. The objection to 
it seems to have been that one had to write the class name (for example 
isalpha()) twice - once when defining the test function and again when 
calling it. I'm not sure that is really worth worrying about.

> Another example is where we are using a helper function with several 
> inputs and we would prefer to write
> 
> for_test("test 1") {
>      int input[] = ...
>      int expect[] = ...
> 
>      test_helper(input, expect);
> 
> ...
> 
> for_test("test 10") {
>      int input[] = ...
>      int expect[] = ...
> 
>      test_helper(input, expect);
> }

One can use blocks to achieve the same outcome with the TEST() macro

{
       int input[] = ...
       int expect[] = ...

       TEST(test_helper(input, expect), "test 1");
}

So overall I'm less convinced that adding something like for_test() is 
necessary and I'm very convinced that calling it for_test() and using 
"continue" to exit a test early is going to confuse contributors.

Best Wishes

Phillip

> rather then declaring all the input and expect variables up front with
> 
>      int input_1 = ...
>      int input_2 = ...
>      ...
>      int expect_1 = ...
>      int expect_2 = ...
> 
>      TEST(test_helper(input_1, expect_1), "test 1");
>      ...
>      TEST(test_helper(input_10, expect_10), "test 10");

>> If others agree that this is a good simplification for the people
>> reading the test code (and hopefully for the test author), I'm fine
>> with this going in (with a different name). I'm not trying to veto the
>> concept.
> 
> That's my feeling too.
> 
> Best Wishes
> 
> Phillip
> 
>>
>>> +
>>>   /*
>>>    * Print a test plan, should be called before any tests. If the number
>>>    * of tests is not known in advance test_done() will automatically
>>> -- 
>>> 2.45.2
>>>
>>
>
René Scharfe July 30, 2024, 2 p.m. UTC | #18
Am 25.07.24 um 11:45 schrieb Phillip Wood:
> On 23/07/2024 14:24, Phillip Wood wrote:
>
>> For example in test-ctype.c where testing the ctype macros leads to
>> a lot of repetition or a giant macro with a function based
>> approach.
>
> Having re-read the history of t/unit-tests/t-ctype.c I don't think
> the repetition in the original version was really that bad. The
> objection to it seems to have been that one had to write the class
> name (for example isalpha()) twice - once when defining the test
> function and again when calling it. I'm not sure that is really worth
> worrying about.

I can't see the purpose of requiring repetition like that.  It's an
unnecessary obstacle, small as it may be.  In the case of t-ctype it was
a regression to the original test helper code.  It's easy to avoid with
existing functions, except that those are off-limits to allow for future
changes.  So it's just a matter of packaging them nicely.

> So overall I'm less convinced that adding something like for_test()
> is necessary and I'm very convinced that calling it for_test() and
> using "continue" to exit a test early is going to confuse
> contributors.

I didn't expect anybody would want to use continue or break in a test,
but that's probably naive.  I got carried away there by the prospect
of a trivial macro-only solution, but alas, it's too clunky.  Two steps
back..

René
diff mbox series

Patch

diff --git a/.clang-format b/.clang-format
index 6408251577..863dc87dfc 100644
--- a/.clang-format
+++ b/.clang-format
@@ -151,6 +151,7 @@  Cpp11BracedListStyle: false
 # function calls. Taken from:
 #   git grep -h '^#define [^[:space:]]*for_\?each[^[:space:]]*(' |
 #   sed "s/^#define /  - '/; s/(.*$/'/" | sort | uniq
+# Added for_test from t/unit-tests/test-lib.h manually as a special case.
 ForEachMacros:
   - 'for_each_builtin'
   - 'for_each_string_list_item'
@@ -168,6 +169,7 @@  ForEachMacros:
   - 'strintmap_for_each_entry'
   - 'strmap_for_each_entry'
   - 'strset_for_each_entry'
+  - 'for_test'

 # The maximum number of consecutive empty lines to keep.
 MaxEmptyLinesToKeep: 1
diff --git a/t/helper/test-example-tap.c b/t/helper/test-example-tap.c
index d072ad559f..51d5e6e75b 100644
--- a/t/helper/test-example-tap.c
+++ b/t/helper/test-example-tap.c
@@ -92,5 +92,38 @@  int cmd__example_tap(int argc, const char **argv)
 	test_res = TEST(t_empty(), "test with no checks");
 	TEST(check_int(test_res, ==, 0), "test with no checks returns 0");

+	for_test ("for_test passing test")
+		check_int(1, ==, 1);
+	for_test ("for_test failing test")
+		check_int(1, ==, 2);
+	for_test ("for_test passing TEST_TODO()")
+		TEST_TODO(check(0));
+	for_test ("for_test failing TEST_TODO()")
+		TEST_TODO(check(1));
+	for_test ("for_test test_skip()") {
+		check(0);
+		test_skip("missing prerequisite");
+		check(1);
+	}
+	for_test ("for_test test_skip() inside TEST_TODO()")
+		TEST_TODO((test_skip("missing prerequisite"), 1));
+	for_test ("for_test TEST_TODO() after failing check") {
+		check(0);
+		TEST_TODO(check(0));
+	}
+	for_test ("for_test failing check after TEST_TODO()") {
+		check(1);
+		TEST_TODO(check(0));
+		check(0);
+	}
+	for_test ("for_test messages from failing string and char comparison") {
+		check_str("\thello\\", "there\"\n");
+		check_str("NULL", NULL);
+		check_char('a', ==, '\n');
+		check_char('\\', ==, '\'');
+	}
+	for_test ("for_test test with no checks")
+		; /* nothing */
+
 	return test_done();
 }
diff --git a/t/t0080/expect b/t/t0080/expect
index 0cfa0dc6d8..583f41b8c9 100644
--- a/t/t0080/expect
+++ b/t/t0080/expect
@@ -40,4 +40,37 @@  not ok 17 - messages from failing string and char comparison
 # BUG: test has no checks at t/helper/test-example-tap.c:92
 not ok 18 - test with no checks
 ok 19 - test with no checks returns 0
-1..19
+ok 20 - for_test passing test
+# check "1 == 2" failed at t/helper/test-example-tap.c:98
+#    left: 1
+#   right: 2
+not ok 21 - for_test failing test
+not ok 22 - for_test passing TEST_TODO() # TODO
+# todo check 'check(1)' succeeded at t/helper/test-example-tap.c:102
+not ok 23 - for_test failing TEST_TODO()
+# check "0" failed at t/helper/test-example-tap.c:104
+# skipping test - missing prerequisite
+# skipping check '1' at t/helper/test-example-tap.c:106
+ok 24 - for_test test_skip() # SKIP
+# skipping test - missing prerequisite
+ok 25 - for_test test_skip() inside TEST_TODO() # SKIP
+# check "0" failed at t/helper/test-example-tap.c:111
+not ok 26 - for_test TEST_TODO() after failing check
+# check "0" failed at t/helper/test-example-tap.c:117
+not ok 27 - for_test failing check after TEST_TODO()
+# check "!strcmp("\thello\\", "there\"\n")" failed at t/helper/test-example-tap.c:120
+#    left: "\011hello\\"
+#   right: "there\"\012"
+# check "!strcmp("NULL", NULL)" failed at t/helper/test-example-tap.c:121
+#    left: "NULL"
+#   right: NULL
+# check "'a' == '\n'" failed at t/helper/test-example-tap.c:122
+#    left: 'a'
+#   right: '\012'
+# check "'\\' == '\''" failed at t/helper/test-example-tap.c:123
+#    left: '\\'
+#   right: '\''
+not ok 28 - for_test messages from failing string and char comparison
+# BUG: test has no checks at t/helper/test-example-tap.c:125
+not ok 29 - for_test test with no checks
+1..29
diff --git a/t/unit-tests/test-lib.h b/t/unit-tests/test-lib.h
index 2de6d715d5..12afd47ac9 100644
--- a/t/unit-tests/test-lib.h
+++ b/t/unit-tests/test-lib.h
@@ -14,6 +14,25 @@ 
 	test__run_end(test__run_begin() ? 0 : (t, 1),	\
 		      TEST_LOCATION(),  __VA_ARGS__)

+/*
+ * Run a test unless test_skip_all() has been called.  Acts like a for
+ * loop that runs at most once, with the test description between the
+ * parentheses and the test body as a statement or block after them.
+ * The description for each test should be unique.  E.g.:
+ *
+ *  for_test ("something else %d %d", arg1, arg2) {
+ *          prepare();
+ *          test_something_else(arg1, arg2);
+ *          cleanup();
+ *  }
+ */
+#define for_test(...)							\
+	for (int for_test_running_ = test__run_begin() ?		\
+		(test__run_end(0, TEST_LOCATION(), __VA_ARGS__), 0) : 1;\
+	     for_test_running_;						\
+	     test__run_end(1, TEST_LOCATION(), __VA_ARGS__),		\
+		for_test_running_ = 0)
+
 /*
  * Print a test plan, should be called before any tests. If the number
  * of tests is not known in advance test_done() will automatically