diff mbox series

[RFC,v4,08/17] kunit: test: add support for test abort

Message ID 20190214213729.21702-9-brendanhiggins@google.com (mailing list archive)
State Superseded, archived
Headers show
Series kunit: introduce KUnit, the Linux kernel unit testing framework | expand

Commit Message

Brendan Higgins Feb. 14, 2019, 9:37 p.m. UTC
Add support for aborting/bailing out of test cases. Needed for
implementing assertions.

Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
---
Changes Since Last Version
 - This patch is new introducing a new cross-architecture way to abort
   out of a test case (needed for KUNIT_ASSERT_*, see next patch for
   details).
 - On a side note, this is not a complete replacement for the UML abort
   mechanism, but covers the majority of necessary functionality. UML
   architecture specific featurs have been dropped from the initial
   patchset.
---
 include/kunit/test.h |  24 +++++
 kunit/Makefile       |   3 +-
 kunit/test-test.c    | 127 ++++++++++++++++++++++++++
 kunit/test.c         | 208 +++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 353 insertions(+), 9 deletions(-)
 create mode 100644 kunit/test-test.c

Comments

Frank Rowand Feb. 18, 2019, 7:52 p.m. UTC | #1
On 2/14/19 1:37 PM, Brendan Higgins wrote:
> Add support for aborting/bailing out of test cases. Needed for
> implementing assertions.
> 
> Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
> ---
> Changes Since Last Version
>  - This patch is new introducing a new cross-architecture way to abort
>    out of a test case (needed for KUNIT_ASSERT_*, see next patch for
>    details).
>  - On a side note, this is not a complete replacement for the UML abort
>    mechanism, but covers the majority of necessary functionality. UML
>    architecture specific featurs have been dropped from the initial
>    patchset.
> ---
>  include/kunit/test.h |  24 +++++
>  kunit/Makefile       |   3 +-
>  kunit/test-test.c    | 127 ++++++++++++++++++++++++++
>  kunit/test.c         | 208 +++++++++++++++++++++++++++++++++++++++++--
>  4 files changed, 353 insertions(+), 9 deletions(-)
>  create mode 100644 kunit/test-test.c

< snip >

> diff --git a/kunit/test.c b/kunit/test.c
> index d18c50d5ed671..6e5244642ab07 100644
> --- a/kunit/test.c
> +++ b/kunit/test.c
> @@ -6,9 +6,9 @@
>   * Author: Brendan Higgins <brendanhiggins@google.com>
>   */
>  
> -#include <linux/sched.h>
>  #include <linux/sched/debug.h>
> -#include <os.h>
> +#include <linux/completion.h>
> +#include <linux/kthread.h>
>  #include <kunit/test.h>
>  
>  static bool kunit_get_success(struct kunit *test)
> @@ -32,6 +32,27 @@ static void kunit_set_success(struct kunit *test, bool success)
>  	spin_unlock_irqrestore(&test->lock, flags);
>  }
>  
> +static bool kunit_get_death_test(struct kunit *test)
> +{
> +	unsigned long flags;
> +	bool death_test;
> +
> +	spin_lock_irqsave(&test->lock, flags);
> +	death_test = test->death_test;
> +	spin_unlock_irqrestore(&test->lock, flags);
> +
> +	return death_test;
> +}
> +
> +static void kunit_set_death_test(struct kunit *test, bool death_test)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&test->lock, flags);
> +	test->death_test = death_test;
> +	spin_unlock_irqrestore(&test->lock, flags);
> +}
> +
>  static int kunit_vprintk_emit(const struct kunit *test,
>  			      int level,
>  			      const char *fmt,
> @@ -70,13 +91,29 @@ static void kunit_fail(struct kunit *test, struct kunit_stream *stream)
>  	stream->commit(stream);
>  }
>  
> +static void __noreturn kunit_abort(struct kunit *test)
> +{
> +	kunit_set_death_test(test, true);
> +
> +	test->try_catch.throw(&test->try_catch);
> +
> +	/*
> +	 * Throw could not abort from test.
> +	 */
> +	kunit_err(test, "Throw could not abort from test!");
> +	show_stack(NULL, NULL);
> +	BUG();

kunit_abort() is what will be call as the result of an assert failure.

BUG(), which is a panic, which is crashing the system is not acceptable
in the Linux kernel.  You will just annoy Linus if you submit this.

-Frank

< snip >
Brendan Higgins Feb. 20, 2019, 3:39 a.m. UTC | #2
On Mon, Feb 18, 2019 at 11:52 AM Frank Rowand <frowand.list@gmail.com> wrote:
>
> On 2/14/19 1:37 PM, Brendan Higgins wrote:
> > Add support for aborting/bailing out of test cases. Needed for
> > implementing assertions.
> >
> > Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
> > ---
> > Changes Since Last Version
> >  - This patch is new introducing a new cross-architecture way to abort
> >    out of a test case (needed for KUNIT_ASSERT_*, see next patch for
> >    details).
> >  - On a side note, this is not a complete replacement for the UML abort
> >    mechanism, but covers the majority of necessary functionality. UML
> >    architecture specific featurs have been dropped from the initial
> >    patchset.
> > ---
> >  include/kunit/test.h |  24 +++++
> >  kunit/Makefile       |   3 +-
> >  kunit/test-test.c    | 127 ++++++++++++++++++++++++++
> >  kunit/test.c         | 208 +++++++++++++++++++++++++++++++++++++++++--
> >  4 files changed, 353 insertions(+), 9 deletions(-)
> >  create mode 100644 kunit/test-test.c
>
> < snip >
>
> > diff --git a/kunit/test.c b/kunit/test.c
> > index d18c50d5ed671..6e5244642ab07 100644
> > --- a/kunit/test.c
> > +++ b/kunit/test.c
> > @@ -6,9 +6,9 @@
> >   * Author: Brendan Higgins <brendanhiggins@google.com>
> >   */
> >
> > -#include <linux/sched.h>
> >  #include <linux/sched/debug.h>
> > -#include <os.h>
> > +#include <linux/completion.h>
> > +#include <linux/kthread.h>
> >  #include <kunit/test.h>
> >
> >  static bool kunit_get_success(struct kunit *test)
> > @@ -32,6 +32,27 @@ static void kunit_set_success(struct kunit *test, bool success)
> >       spin_unlock_irqrestore(&test->lock, flags);
> >  }
> >
> > +static bool kunit_get_death_test(struct kunit *test)
> > +{
> > +     unsigned long flags;
> > +     bool death_test;
> > +
> > +     spin_lock_irqsave(&test->lock, flags);
> > +     death_test = test->death_test;
> > +     spin_unlock_irqrestore(&test->lock, flags);
> > +
> > +     return death_test;
> > +}
> > +
> > +static void kunit_set_death_test(struct kunit *test, bool death_test)
> > +{
> > +     unsigned long flags;
> > +
> > +     spin_lock_irqsave(&test->lock, flags);
> > +     test->death_test = death_test;
> > +     spin_unlock_irqrestore(&test->lock, flags);
> > +}
> > +
> >  static int kunit_vprintk_emit(const struct kunit *test,
> >                             int level,
> >                             const char *fmt,
> > @@ -70,13 +91,29 @@ static void kunit_fail(struct kunit *test, struct kunit_stream *stream)
> >       stream->commit(stream);
> >  }
> >
> > +static void __noreturn kunit_abort(struct kunit *test)
> > +{
> > +     kunit_set_death_test(test, true);
> > +
> > +     test->try_catch.throw(&test->try_catch);
> > +
> > +     /*
> > +      * Throw could not abort from test.
> > +      */
> > +     kunit_err(test, "Throw could not abort from test!");
> > +     show_stack(NULL, NULL);
> > +     BUG();
>
> kunit_abort() is what will be call as the result of an assert failure.

Yep. Does that need clarified somewhere?

>
> BUG(), which is a panic, which is crashing the system is not acceptable
> in the Linux kernel.  You will just annoy Linus if you submit this.

Sorry, I thought this was an acceptable use case since, a) this should
never be compiled in a production kernel, b) we are in a pretty bad,
unpredictable state if we get here and keep going. I think you might
have said elsewhere that you think "a" is not valid? In any case, I
can replace this with a WARN, would that be acceptable?
Frank Rowand Feb. 20, 2019, 6:44 a.m. UTC | #3
On 2/19/19 7:39 PM, Brendan Higgins wrote:
> On Mon, Feb 18, 2019 at 11:52 AM Frank Rowand <frowand.list@gmail.com> wrote:
>>
>> On 2/14/19 1:37 PM, Brendan Higgins wrote:
>>> Add support for aborting/bailing out of test cases. Needed for
>>> implementing assertions.
>>>
>>> Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
>>> ---
>>> Changes Since Last Version
>>>  - This patch is new introducing a new cross-architecture way to abort
>>>    out of a test case (needed for KUNIT_ASSERT_*, see next patch for
>>>    details).
>>>  - On a side note, this is not a complete replacement for the UML abort
>>>    mechanism, but covers the majority of necessary functionality. UML
>>>    architecture specific featurs have been dropped from the initial
>>>    patchset.
>>> ---
>>>  include/kunit/test.h |  24 +++++
>>>  kunit/Makefile       |   3 +-
>>>  kunit/test-test.c    | 127 ++++++++++++++++++++++++++
>>>  kunit/test.c         | 208 +++++++++++++++++++++++++++++++++++++++++--
>>>  4 files changed, 353 insertions(+), 9 deletions(-)
>>>  create mode 100644 kunit/test-test.c
>>
>> < snip >
>>
>>> diff --git a/kunit/test.c b/kunit/test.c
>>> index d18c50d5ed671..6e5244642ab07 100644
>>> --- a/kunit/test.c
>>> +++ b/kunit/test.c
>>> @@ -6,9 +6,9 @@
>>>   * Author: Brendan Higgins <brendanhiggins@google.com>
>>>   */
>>>
>>> -#include <linux/sched.h>
>>>  #include <linux/sched/debug.h>
>>> -#include <os.h>
>>> +#include <linux/completion.h>
>>> +#include <linux/kthread.h>
>>>  #include <kunit/test.h>
>>>
>>>  static bool kunit_get_success(struct kunit *test)
>>> @@ -32,6 +32,27 @@ static void kunit_set_success(struct kunit *test, bool success)
>>>       spin_unlock_irqrestore(&test->lock, flags);
>>>  }
>>>
>>> +static bool kunit_get_death_test(struct kunit *test)
>>> +{
>>> +     unsigned long flags;
>>> +     bool death_test;
>>> +
>>> +     spin_lock_irqsave(&test->lock, flags);
>>> +     death_test = test->death_test;
>>> +     spin_unlock_irqrestore(&test->lock, flags);
>>> +
>>> +     return death_test;
>>> +}
>>> +
>>> +static void kunit_set_death_test(struct kunit *test, bool death_test)
>>> +{
>>> +     unsigned long flags;
>>> +
>>> +     spin_lock_irqsave(&test->lock, flags);
>>> +     test->death_test = death_test;
>>> +     spin_unlock_irqrestore(&test->lock, flags);
>>> +}
>>> +
>>>  static int kunit_vprintk_emit(const struct kunit *test,
>>>                             int level,
>>>                             const char *fmt,
>>> @@ -70,13 +91,29 @@ static void kunit_fail(struct kunit *test, struct kunit_stream *stream)
>>>       stream->commit(stream);
>>>  }
>>>
>>> +static void __noreturn kunit_abort(struct kunit *test)
>>> +{
>>> +     kunit_set_death_test(test, true);
>>> +
>>> +     test->try_catch.throw(&test->try_catch);
>>> +
>>> +     /*
>>> +      * Throw could not abort from test.
>>> +      */
>>> +     kunit_err(test, "Throw could not abort from test!");
>>> +     show_stack(NULL, NULL);
>>> +     BUG();
>>
>> kunit_abort() is what will be call as the result of an assert failure.
> 
> Yep. Does that need clarified somewhere.
>>
>> BUG(), which is a panic, which is crashing the system is not acceptable
>> in the Linux kernel.  You will just annoy Linus if you submit this.
> 
> Sorry, I thought this was an acceptable use case since, a) this should
> never be compiled in a production kernel, b) we are in a pretty bad,
> unpredictable state if we get here and keep going. I think you might
> have said elsewhere that you think "a" is not valid? In any case, I
> can replace this with a WARN, would that be acceptable?

A WARN may or may not make sense, depending on the context.  It may
be sufficient to simply report a test failure (as in the old version
of case (2) below.

Answers to "a)" and "b)":

a) it might be in a production kernel

a') it is not acceptable in my development kernel either

b) No.  You don't crash a developer's kernel either unless it is
required to avoid data corruption.

b') And you can not do replacements like:

(1) in of_unittest_check_tree_linkage()

-----  old  -----

        if (!of_root)
                return;

-----  new  -----

        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, of_root);

(2) in of_unittest_property_string()

-----  old  -----

        /* of_property_read_string_index() tests */
        rc = of_property_read_string_index(np, "string-property", 0, strings);
        unittest(rc == 0 && !strcmp(strings[0], "foobar"), "of_property_read_string_index() failure; rc=%i\n", rc);

-----  new  -----

        /* of_property_read_string_index() tests */
        rc = of_property_read_string_index(np, "string-property", 0, strings);
        KUNIT_ASSERT_EQ(test, rc, 0);
        KUNIT_EXPECT_STREQ(test, strings[0], "foobar");


If a test fails, that is no reason to abort testing.  The remainder of the unit
tests can still run.  There may be cascading failures, but that is ok.
Stephen Boyd Feb. 26, 2019, 8:35 p.m. UTC | #4
Quoting Brendan Higgins (2019-02-14 13:37:20)
> Add support for aborting/bailing out of test cases. Needed for
> implementing assertions.

Can you add some more text here with the motivating reasons for
implementing assertions and bailing out of test cases?

For example, I wonder why unit tests can't just return with a failure
when they need to abort and then the test runner would detect that error
via the return value from the 'run test' function. That would be a more
direct approach, but also more verbose than a single KUNIT_ASSERT()
line. It would be more kernel idiomatic too because the control flow
isn't hidden inside a macro and it isn't intimately connected with
kthreads and completions.

> 
> Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
[...]
> diff --git a/kunit/test-test.c b/kunit/test-test.c
> new file mode 100644
> index 0000000000000..a936c041f1c8f

Could this whole file be another patch? It seems to be a test for the
try catch mechanism.

> diff --git a/kunit/test.c b/kunit/test.c
> index d18c50d5ed671..6e5244642ab07 100644
> --- a/kunit/test.c
> +++ b/kunit/test.c
[...]
> +
> +static void kunit_generic_throw(struct kunit_try_catch *try_catch)
> +{
> +       try_catch->context.try_result = -EFAULT;
> +       complete_and_exit(try_catch->context.try_completion, -EFAULT);
> +}
> +
> +static int kunit_generic_run_threadfn_adapter(void *data)
> +{
> +       struct kunit_try_catch *try_catch = data;
>  
> +       try_catch->try(&try_catch->context);
> +
> +       complete_and_exit(try_catch->context.try_completion, 0);

The exit code doesn't matter, right? If so, it might be clearer to just
return 0 from this function because kthreads exit themselves as far as I
recall.

> +}
> +
> +static void kunit_generic_run_try_catch(struct kunit_try_catch *try_catch)
> +{
> +       struct task_struct *task_struct;
> +       struct kunit *test = try_catch->context.test;
> +       int exit_code, wake_result;
> +       DECLARE_COMPLETION(test_case_completion);

DECLARE_COMPLETION_ONSTACK()?

> +
> +       try_catch->context.try_completion = &test_case_completion;
> +       try_catch->context.try_result = 0;
> +       task_struct = kthread_create(kunit_generic_run_threadfn_adapter,
> +                                            try_catch,
> +                                            "kunit_try_catch_thread");
> +       if (IS_ERR_OR_NULL(task_struct)) {

It looks like NULL is never returned from kthread_create(), so don't
check for it here.

> +               try_catch->catch(&try_catch->context);
> +               return;
> +       }
> +
> +       wake_result = wake_up_process(task_struct);
> +       if (wake_result != 0 && wake_result != 1) {

These are the only two possible return values of wake_up_process(), so
why not just use kthread_run() and check for an error on the process
creation?

> +               kunit_err(test, "task was not woken properly: %d", wake_result);
> +               try_catch->catch(&try_catch->context);
> +       }
> +
> +       /*
> +        * TODO(brendanhiggins@google.com): We should probably have some type of
> +        * timeout here. The only question is what that timeout value should be.
> +        *
> +        * The intention has always been, at some point, to be able to label
> +        * tests with some type of size bucket (unit/small, integration/medium,
> +        * large/system/end-to-end, etc), where each size bucket would get a
> +        * default timeout value kind of like what Bazel does:
> +        * https://docs.bazel.build/versions/master/be/common-definitions.html#test.size
> +        * There is still some debate to be had on exactly how we do this. (For
> +        * one, we probably want to have some sort of test runner level
> +        * timeout.)
> +        *
> +        * For more background on this topic, see:
> +        * https://mike-bland.com/2011/11/01/small-medium-large.html
> +        */
> +       wait_for_completion(&test_case_completion);

It doesn't seem like a bad idea to make this have some arbitrarily large
timeout value to start with. Just to make sure the whole thing doesn't
get wedged. It's a timeout, so in theory it should never trigger if it's
large enough.

> +
> +       exit_code = try_catch->context.try_result;
> +       if (exit_code == -EFAULT)
> +               try_catch->catch(&try_catch->context);
> +       else if (exit_code == -EINTR)
> +               kunit_err(test, "wake_up_process() was never called.");

Does kunit_err() add newlines? It would be good to stick to the rest of
kernel style (printk, tracing, etc.) and rely on the callers to have the
newlines they want. Also, remove the full-stop because it isn't really
necessary to have those in error logs.

> +       else if (exit_code)
> +               kunit_err(test, "Unknown error: %d", exit_code);
> +}
> +
> +void kunit_generic_try_catch_init(struct kunit_try_catch *try_catch)
> +{
> +       try_catch->run = kunit_generic_run_try_catch;

Is the idea that 'run' would be anything besides
'kunit_generic_run_try_catch'? If it isn't going to be different, then
maybe it's simpler to just have a function like
kunit_generic_run_try_catch() that is called by the unit tests instead
of having to write 'try_catch->run(try_catch)' and indirect for the
basic case. Maybe I've missed the point entirely though and this is all
scaffolding for more complicated exception handling later on.

> +       try_catch->throw = kunit_generic_throw;
> +}
> +
> +void __weak kunit_try_catch_init(struct kunit_try_catch *try_catch)
> +{
> +       kunit_generic_try_catch_init(try_catch);
> +}
> +
> +static void kunit_try_run_case(struct kunit_try_catch_context *context)
> +{
> +       struct kunit_try_catch_context *ctx = context;
> +       struct kunit *test = ctx->test;
> +       struct kunit_module *module = ctx->module;
> +       struct kunit_case *test_case = ctx->test_case;
> +
> +       /*
> +        * kunit_run_case_internal may encounter a fatal error; if it does, we
> +        * will jump to ENTER_HANDLER above instead of continuing normal control

Where is 'ENTER_HANDLER' above?

> +        * flow.
> +        */
>         kunit_run_case_internal(test, module, test_case);
> +       /* This line may never be reached. */
>         kunit_run_case_cleanup(test, module, test_case);
> +}
Brendan Higgins Feb. 28, 2019, 7:42 a.m. UTC | #5
On Tue, Feb 19, 2019 at 10:44 PM Frank Rowand <frowand.list@gmail.com> wrote:
>
> On 2/19/19 7:39 PM, Brendan Higgins wrote:
> > On Mon, Feb 18, 2019 at 11:52 AM Frank Rowand <frowand.list@gmail.com> wrote:
> >>
> >> On 2/14/19 1:37 PM, Brendan Higgins wrote:
> >>> Add support for aborting/bailing out of test cases. Needed for
> >>> implementing assertions.
> >>>
> >>> Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
> >>> ---
> >>> Changes Since Last Version
> >>>  - This patch is new introducing a new cross-architecture way to abort
> >>>    out of a test case (needed for KUNIT_ASSERT_*, see next patch for
> >>>    details).
> >>>  - On a side note, this is not a complete replacement for the UML abort
> >>>    mechanism, but covers the majority of necessary functionality. UML
> >>>    architecture specific featurs have been dropped from the initial
> >>>    patchset.
> >>> ---
> >>>  include/kunit/test.h |  24 +++++
> >>>  kunit/Makefile       |   3 +-
> >>>  kunit/test-test.c    | 127 ++++++++++++++++++++++++++
> >>>  kunit/test.c         | 208 +++++++++++++++++++++++++++++++++++++++++--
> >>>  4 files changed, 353 insertions(+), 9 deletions(-)
> >>>  create mode 100644 kunit/test-test.c
> >>
> >> < snip >
> >>
> >>> diff --git a/kunit/test.c b/kunit/test.c
> >>> index d18c50d5ed671..6e5244642ab07 100644
> >>> --- a/kunit/test.c
> >>> +++ b/kunit/test.c
> >>> @@ -6,9 +6,9 @@
> >>>   * Author: Brendan Higgins <brendanhiggins@google.com>
> >>>   */
> >>>
> >>> -#include <linux/sched.h>
> >>>  #include <linux/sched/debug.h>
> >>> -#include <os.h>
> >>> +#include <linux/completion.h>
> >>> +#include <linux/kthread.h>
> >>>  #include <kunit/test.h>
> >>>
> >>>  static bool kunit_get_success(struct kunit *test)
> >>> @@ -32,6 +32,27 @@ static void kunit_set_success(struct kunit *test, bool success)
> >>>       spin_unlock_irqrestore(&test->lock, flags);
> >>>  }
> >>>
> >>> +static bool kunit_get_death_test(struct kunit *test)
> >>> +{
> >>> +     unsigned long flags;
> >>> +     bool death_test;
> >>> +
> >>> +     spin_lock_irqsave(&test->lock, flags);
> >>> +     death_test = test->death_test;
> >>> +     spin_unlock_irqrestore(&test->lock, flags);
> >>> +
> >>> +     return death_test;
> >>> +}
> >>> +
> >>> +static void kunit_set_death_test(struct kunit *test, bool death_test)
> >>> +{
> >>> +     unsigned long flags;
> >>> +
> >>> +     spin_lock_irqsave(&test->lock, flags);
> >>> +     test->death_test = death_test;
> >>> +     spin_unlock_irqrestore(&test->lock, flags);
> >>> +}
> >>> +
> >>>  static int kunit_vprintk_emit(const struct kunit *test,
> >>>                             int level,
> >>>                             const char *fmt,
> >>> @@ -70,13 +91,29 @@ static void kunit_fail(struct kunit *test, struct kunit_stream *stream)
> >>>       stream->commit(stream);
> >>>  }
> >>>
> >>> +static void __noreturn kunit_abort(struct kunit *test)
> >>> +{
> >>> +     kunit_set_death_test(test, true);
> >>> +
> >>> +     test->try_catch.throw(&test->try_catch);
> >>> +
> >>> +     /*
> >>> +      * Throw could not abort from test.
> >>> +      */
> >>> +     kunit_err(test, "Throw could not abort from test!");
> >>> +     show_stack(NULL, NULL);
> >>> +     BUG();
> >>
> >> kunit_abort() is what will be call as the result of an assert failure.
> >
> > Yep. Does that need clarified somewhere.
> >>
> >> BUG(), which is a panic, which is crashing the system is not acceptable
> >> in the Linux kernel.  You will just annoy Linus if you submit this.
> >
> > Sorry, I thought this was an acceptable use case since, a) this should
> > never be compiled in a production kernel, b) we are in a pretty bad,
> > unpredictable state if we get here and keep going. I think you might
> > have said elsewhere that you think "a" is not valid? In any case, I
> > can replace this with a WARN, would that be acceptable?
>
> A WARN may or may not make sense, depending on the context.  It may
> be sufficient to simply report a test failure (as in the old version
> of case (2) below.
>
> Answers to "a)" and "b)":
>
> a) it might be in a production kernel

Sorry for a possibly stupid question, how might it be so? Why would
someone intentionally build unit tests into a production kernel?

>
> a') it is not acceptable in my development kernel either

Fair enough.

>
> b) No.  You don't crash a developer's kernel either unless it is
> required to avoid data corruption.

Alright, I thought that was one of those cases, but I am not going to
push the point. Also, in case it wasn't clear, the path where BUG()
gets called only happens if there is a bug in KUnit itself, not just
because a test case fails catastrophically.

>
> b') And you can not do replacements like:
>
> (1) in of_unittest_check_tree_linkage()
>
> -----  old  -----
>
>         if (!of_root)
>                 return;
>
> -----  new  -----
>
>         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, of_root);
>
> (2) in of_unittest_property_string()
>
> -----  old  -----
>
>         /* of_property_read_string_index() tests */
>         rc = of_property_read_string_index(np, "string-property", 0, strings);
>         unittest(rc == 0 && !strcmp(strings[0], "foobar"), "of_property_read_string_index() failure; rc=%i\n", rc);
>
> -----  new  -----
>
>         /* of_property_read_string_index() tests */
>         rc = of_property_read_string_index(np, "string-property", 0, strings);
>         KUNIT_ASSERT_EQ(test, rc, 0);
>         KUNIT_EXPECT_STREQ(test, strings[0], "foobar");
>
>
> If a test fails, that is no reason to abort testing.  The remainder of the unit
> tests can still run.  There may be cascading failures, but that is ok.

Sure, that's what I am trying to do. I don't see how (1) changes
anything, a failed KUNIT_ASSERT_* only bails on the current test case,
it does not quit the entire test suite let alone crash the kernel.

In case it wasn't clear above,
> >>> +     test->try_catch.throw(&test->try_catch);
should never, ever return. The only time it would, would be if KUnit
was very broken. This should never actually happen, even if the
assertion that called it was violated. KUNIT_ASSERT_* just says, "this
is a prerequisite property for this test case"; if it is violated, the
test case should fail and bail because the preconditions for the test
case cannot be satisfied. Nevertheless, other tests cases will still
run.
Brendan Higgins Feb. 28, 2019, 9:03 a.m. UTC | #6
On Tue, Feb 26, 2019 at 12:35 PM Stephen Boyd <sboyd@kernel.org> wrote:
>
> Quoting Brendan Higgins (2019-02-14 13:37:20)
> > Add support for aborting/bailing out of test cases. Needed for
> > implementing assertions.
>
> Can you add some more text here with the motivating reasons for
> implementing assertions and bailing out of test cases?

Sure. Yeah, this comes before the commit that adds assertions, so I
should probably put a better explanation here.
>
> For example, I wonder why unit tests can't just return with a failure

Well, you could. You can just do the check as you would without KUnit,
except call KUNIT_FAIL(...) before you return. For example, instead
of:

KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);

you could do:

if (IS_ERR_OR_NULL(ptr)) {
        KUNIT_FAIL(test, "ptr is an errno or null: %ld", ptr);
        return;
}

> when they need to abort and then the test runner would detect that error
> via the return value from the 'run test' function. That would be a more
> direct approach, but also more verbose than a single KUNIT_ASSERT()
> line. It would be more kernel idiomatic too because the control flow

Yeah, I was intentionally going against that idiom. I think that idiom
makes test logic more complicated than it needs to be, especially if
the assertion failure happens in a helper function; then you have to
pass that error all the way back up. It is important that test code
should be as simple as possible to the point of being immediately
obviously correct at first glance because there are no tests for
tests.

The idea with assertions is that you use them to state all the
preconditions for your test. Logically speaking, these are the
premises of the test case, so if a premise isn't true, there is no
point in continuing the test case because there are no conclusions
that can be drawn without the premises. Whereas, the expectation is
the thing you are trying to prove. It is not used universally in
x-unit style test frameworks, but I really like it as a convention.
You could still express the idea of a premise using the above idiom,
but I think KUNIT_ASSERT_* states the intended idea perfectly.

> isn't hidden inside a macro and it isn't intimately connected with
> kthreads and completions.

Yeah, I wasn't a fan of that myself, but it was broadly available. My
previous version (still the architecture specific version for UML, not
in this patchset though) relies on UML_LONGJMP, but is obviously only
works on UML. A number of people wanted support for other
architectures. Rob and Luis specifically wanted me to provide a
version of abort that would work on any architecture, even if it only
had reduced functionality; I thought this fit the bill okay.

>
> >
> > Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
> [...]
> > diff --git a/kunit/test-test.c b/kunit/test-test.c
> > new file mode 100644
> > index 0000000000000..a936c041f1c8f
>
> Could this whole file be another patch? It seems to be a test for the
> try catch mechanism.

Sure.

>
> > diff --git a/kunit/test.c b/kunit/test.c
> > index d18c50d5ed671..6e5244642ab07 100644
> > --- a/kunit/test.c
> > +++ b/kunit/test.c
> [...]
> > +
> > +static void kunit_generic_throw(struct kunit_try_catch *try_catch)
> > +{
> > +       try_catch->context.try_result = -EFAULT;
> > +       complete_and_exit(try_catch->context.try_completion, -EFAULT);
> > +}
> > +
> > +static int kunit_generic_run_threadfn_adapter(void *data)
> > +{
> > +       struct kunit_try_catch *try_catch = data;
> >
> > +       try_catch->try(&try_catch->context);
> > +
> > +       complete_and_exit(try_catch->context.try_completion, 0);
>
> The exit code doesn't matter, right? If so, it might be clearer to just
> return 0 from this function because kthreads exit themselves as far as I
> recall.

You mean complete and then return?

>
> > +}
> > +
> > +static void kunit_generic_run_try_catch(struct kunit_try_catch *try_catch)
> > +{
> > +       struct task_struct *task_struct;
> > +       struct kunit *test = try_catch->context.test;
> > +       int exit_code, wake_result;
> > +       DECLARE_COMPLETION(test_case_completion);
>
> DECLARE_COMPLETION_ONSTACK()?

Whoops, yeah, that one.

>
> > +
> > +       try_catch->context.try_completion = &test_case_completion;
> > +       try_catch->context.try_result = 0;
> > +       task_struct = kthread_create(kunit_generic_run_threadfn_adapter,
> > +                                            try_catch,
> > +                                            "kunit_try_catch_thread");
> > +       if (IS_ERR_OR_NULL(task_struct)) {
>
> It looks like NULL is never returned from kthread_create(), so don't
> check for it here.

Bad habit, sorry.

>
> > +               try_catch->catch(&try_catch->context);
> > +               return;
> > +       }
> > +
> > +       wake_result = wake_up_process(task_struct);
> > +       if (wake_result != 0 && wake_result != 1) {
>
> These are the only two possible return values of wake_up_process(), so
> why not just use kthread_run() and check for an error on the process
> creation?

Good point, I am not sure why I did that.

>
> > +               kunit_err(test, "task was not woken properly: %d", wake_result);
> > +               try_catch->catch(&try_catch->context);
> > +       }
> > +
> > +       /*
> > +        * TODO(brendanhiggins@google.com): We should probably have some type of
> > +        * timeout here. The only question is what that timeout value should be.
> > +        *
> > +        * The intention has always been, at some point, to be able to label
> > +        * tests with some type of size bucket (unit/small, integration/medium,
> > +        * large/system/end-to-end, etc), where each size bucket would get a
> > +        * default timeout value kind of like what Bazel does:
> > +        * https://docs.bazel.build/versions/master/be/common-definitions.html#test.size
> > +        * There is still some debate to be had on exactly how we do this. (For
> > +        * one, we probably want to have some sort of test runner level
> > +        * timeout.)
> > +        *
> > +        * For more background on this topic, see:
> > +        * https://mike-bland.com/2011/11/01/small-medium-large.html
> > +        */
> > +       wait_for_completion(&test_case_completion);
>
> It doesn't seem like a bad idea to make this have some arbitrarily large
> timeout value to start with. Just to make sure the whole thing doesn't
> get wedged. It's a timeout, so in theory it should never trigger if it's
> large enough.

Seems reasonable.

>
> > +
> > +       exit_code = try_catch->context.try_result;
> > +       if (exit_code == -EFAULT)
> > +               try_catch->catch(&try_catch->context);
> > +       else if (exit_code == -EINTR)
> > +               kunit_err(test, "wake_up_process() was never called.");
>
> Does kunit_err() add newlines? It would be good to stick to the rest of
> kernel style (printk, tracing, etc.) and rely on the callers to have the
> newlines they want. Also, remove the full-stop because it isn't really
> necessary to have those in error logs.

Will do.

>
> > +       else if (exit_code)
> > +               kunit_err(test, "Unknown error: %d", exit_code);
> > +}
> > +
> > +void kunit_generic_try_catch_init(struct kunit_try_catch *try_catch)
> > +{
> > +       try_catch->run = kunit_generic_run_try_catch;
>
> Is the idea that 'run' would be anything besides
> 'kunit_generic_run_try_catch'? If it isn't going to be different, then

Yeah, it can be overridden with an architecture specific version.

> maybe it's simpler to just have a function like
> kunit_generic_run_try_catch() that is called by the unit tests instead
> of having to write 'try_catch->run(try_catch)' and indirect for the
> basic case. Maybe I've missed the point entirely though and this is all
> scaffolding for more complicated exception handling later on.

Yeah, the idea is that different architectures can override exception
handling with their own implementation. This is just the generic one.
For example, UML has one that doesn't depend on kthreads or
completions; the UML version also allows recovery from some segfault
conditions.

>
> > +       try_catch->throw = kunit_generic_throw;
> > +}
> > +
> > +void __weak kunit_try_catch_init(struct kunit_try_catch *try_catch)
> > +{
> > +       kunit_generic_try_catch_init(try_catch);
> > +}
> > +
> > +static void kunit_try_run_case(struct kunit_try_catch_context *context)
> > +{
> > +       struct kunit_try_catch_context *ctx = context;
> > +       struct kunit *test = ctx->test;
> > +       struct kunit_module *module = ctx->module;
> > +       struct kunit_case *test_case = ctx->test_case;
> > +
> > +       /*
> > +        * kunit_run_case_internal may encounter a fatal error; if it does, we
> > +        * will jump to ENTER_HANDLER above instead of continuing normal control
>
> Where is 'ENTER_HANDLER' above?

Whoops, sorry, that is left over from v3. Will remove.

>
> > +        * flow.
> > +        */
> >         kunit_run_case_internal(test, module, test_case);
> > +       /* This line may never be reached. */
> >         kunit_run_case_cleanup(test, module, test_case);
> > +}

Thanks!
Dan Carpenter Feb. 28, 2019, 1:54 p.m. UTC | #7
On Thu, Feb 28, 2019 at 01:03:24AM -0800, Brendan Higgins wrote:
> you could do:
> 
> if (IS_ERR_OR_NULL(ptr)) {
>         KUNIT_FAIL(test, "ptr is an errno or null: %ld", ptr);
>         return;
> }

It's best to not mix error pointers and NULL but when we do mix them,
it means that NULL is a special kind of success.  Like we try to load
a feature and we get back:

    valid pointer <-- success
    null          <-- feature is disabled.  not an error.
    error pointer <-- feature is broken.  fail.

regards,
dan carpenter
Brendan Higgins March 4, 2019, 10:28 p.m. UTC | #8
On Thu, Feb 28, 2019 at 5:55 AM Dan Carpenter <dan.carpenter@oracle.com> wrote:
>
> On Thu, Feb 28, 2019 at 01:03:24AM -0800, Brendan Higgins wrote:
> > you could do:
> >
> > if (IS_ERR_OR_NULL(ptr)) {
> >         KUNIT_FAIL(test, "ptr is an errno or null: %ld", ptr);
> >         return;
> > }
>
> It's best to not mix error pointers and NULL but when we do mix them,
> it means that NULL is a special kind of success.  Like we try to load
> a feature and we get back:
>
>     valid pointer <-- success
>     null          <-- feature is disabled.  not an error.
>     error pointer <-- feature is broken.  fail.

Thanks for pointing that out! Will fix.
Brendan Higgins March 4, 2019, 10:39 p.m. UTC | #9
On Thu, Feb 28, 2019 at 10:02 AM Stephen Boyd <sboyd@kernel.org> wrote:
>
> Quoting Brendan Higgins (2019-02-28 01:03:24)
> > On Tue, Feb 26, 2019 at 12:35 PM Stephen Boyd <sboyd@kernel.org> wrote:
> > >
> > > when they need to abort and then the test runner would detect that error
> > > via the return value from the 'run test' function. That would be a more
> > > direct approach, but also more verbose than a single KUNIT_ASSERT()
> > > line. It would be more kernel idiomatic too because the control flow
> >
> > Yeah, I was intentionally going against that idiom. I think that idiom
> > makes test logic more complicated than it needs to be, especially if
> > the assertion failure happens in a helper function; then you have to
> > pass that error all the way back up. It is important that test code
> > should be as simple as possible to the point of being immediately
> > obviously correct at first glance because there are no tests for
> > tests.
> >
> > The idea with assertions is that you use them to state all the
> > preconditions for your test. Logically speaking, these are the
> > premises of the test case, so if a premise isn't true, there is no
> > point in continuing the test case because there are no conclusions
> > that can be drawn without the premises. Whereas, the expectation is
> > the thing you are trying to prove. It is not used universally in
> > x-unit style test frameworks, but I really like it as a convention.
> > You could still express the idea of a premise using the above idiom,
> > but I think KUNIT_ASSERT_* states the intended idea perfectly.
>
> Fair enough. It would be great if these sorts of things were described
> in the commit text.

Good point. Will fix.

>
> Is the assumption that things like held locks and refcounted elements
> won't exist when one of these assertions is made? It sounds like some of
> the cleanup logic could be fairly complicated if a helper function
> changes some state and then an assert fails and we have to unwind all
> the state from a corrupt location. A similar problem exists for a test
> timeout too. How do we get back to a sane state if the test locks up for
> a long time? Just don't try?

It depends on the situation, if it is part of a KUnit test itself
(probably not code under test), then you can use the kunit_resource
API: https://lkml.org/lkml/2019/2/14/1125; it is inspired by the
devm_* family of functions, such that when a KUnit test case ends, for
any reason, all the kunit_resources are automatically cleaned up.
Similarly, kunit_module.exit is called at the end of every test case,
regardless of how it terminates.

>
> >
> > > isn't hidden inside a macro and it isn't intimately connected with
> > > kthreads and completions.
> >
> > Yeah, I wasn't a fan of that myself, but it was broadly available. My
> > previous version (still the architecture specific version for UML, not
> > in this patchset though) relies on UML_LONGJMP, but is obviously only
> > works on UML. A number of people wanted support for other
> > architectures. Rob and Luis specifically wanted me to provide a
> > version of abort that would work on any architecture, even if it only
> > had reduced functionality; I thought this fit the bill okay.
>
> Ok.
>
> >
> > >
> > > >
> > > > diff --git a/kunit/test.c b/kunit/test.c
> > > > index d18c50d5ed671..6e5244642ab07 100644
> > > > --- a/kunit/test.c
> > > > +++ b/kunit/test.c
> > > [...]
> > > > +
> > > > +static void kunit_generic_throw(struct kunit_try_catch *try_catch)
> > > > +{
> > > > +       try_catch->context.try_result = -EFAULT;
> > > > +       complete_and_exit(try_catch->context.try_completion, -EFAULT);
> > > > +}
> > > > +
> > > > +static int kunit_generic_run_threadfn_adapter(void *data)
> > > > +{
> > > > +       struct kunit_try_catch *try_catch = data;
> > > >
> > > > +       try_catch->try(&try_catch->context);
> > > > +
> > > > +       complete_and_exit(try_catch->context.try_completion, 0);
> > >
> > > The exit code doesn't matter, right? If so, it might be clearer to just
> > > return 0 from this function because kthreads exit themselves as far as I
> > > recall.
> >
> > You mean complete and then return?
>
> Yes. I was confused for a minute because I thought the exit code was
> checked, but it isn't. Instead, the try_catch->context.try_result is
> where the test result goes, so calling exit explicitly doesn't seem to
> be important here, but it is important in the throw case.

Yep.

>
> >
> > >
> > > > +       else if (exit_code)
> > > > +               kunit_err(test, "Unknown error: %d", exit_code);
> > > > +}
> > > > +
> > > > +void kunit_generic_try_catch_init(struct kunit_try_catch *try_catch)
> > > > +{
> > > > +       try_catch->run = kunit_generic_run_try_catch;
> > >
> > > Is the idea that 'run' would be anything besides
> > > 'kunit_generic_run_try_catch'? If it isn't going to be different, then
> >
> > Yeah, it can be overridden with an architecture specific version.
> >
> > > maybe it's simpler to just have a function like
> > > kunit_generic_run_try_catch() that is called by the unit tests instead
> > > of having to write 'try_catch->run(try_catch)' and indirect for the
> > > basic case. Maybe I've missed the point entirely though and this is all
> > > scaffolding for more complicated exception handling later on.
> >
> > Yeah, the idea is that different architectures can override exception
> > handling with their own implementation. This is just the generic one.
> > For example, UML has one that doesn't depend on kthreads or
> > completions; the UML version also allows recovery from some segfault
> > conditions.
>
> Ok, got it. It may still be nice to have a wrapper or macro for that
> try_catch->run(try_catch) statement so we don't have to know that a
> try_catch struct has a run member.
>
>         static inline void kunit_run_try_catch(struct kunit_try_catch *try_catch)
>         {
>                 try_catch->run(try_catch);
>         }

Makes sense. Will fix in the next revision.
Frank Rowand March 22, 2019, 1:09 a.m. UTC | #10
On 2/27/19 11:42 PM, Brendan Higgins wrote:
> On Tue, Feb 19, 2019 at 10:44 PM Frank Rowand <frowand.list@gmail.com> wrote:
>>
>> On 2/19/19 7:39 PM, Brendan Higgins wrote:
>>> On Mon, Feb 18, 2019 at 11:52 AM Frank Rowand <frowand.list@gmail.com> wrote:
>>>>
>>>> On 2/14/19 1:37 PM, Brendan Higgins wrote:
>>>>> Add support for aborting/bailing out of test cases. Needed for
>>>>> implementing assertions.
>>>>>
>>>>> Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
>>>>> ---
>>>>> Changes Since Last Version
>>>>>  - This patch is new introducing a new cross-architecture way to abort
>>>>>    out of a test case (needed for KUNIT_ASSERT_*, see next patch for
>>>>>    details).
>>>>>  - On a side note, this is not a complete replacement for the UML abort
>>>>>    mechanism, but covers the majority of necessary functionality. UML
>>>>>    architecture specific featurs have been dropped from the initial
>>>>>    patchset.
>>>>> ---
>>>>>  include/kunit/test.h |  24 +++++
>>>>>  kunit/Makefile       |   3 +-
>>>>>  kunit/test-test.c    | 127 ++++++++++++++++++++++++++
>>>>>  kunit/test.c         | 208 +++++++++++++++++++++++++++++++++++++++++--
>>>>>  4 files changed, 353 insertions(+), 9 deletions(-)
>>>>>  create mode 100644 kunit/test-test.c
>>>>
>>>> < snip >
>>>>
>>>>> diff --git a/kunit/test.c b/kunit/test.c
>>>>> index d18c50d5ed671..6e5244642ab07 100644
>>>>> --- a/kunit/test.c
>>>>> +++ b/kunit/test.c
>>>>> @@ -6,9 +6,9 @@
>>>>>   * Author: Brendan Higgins <brendanhiggins@google.com>
>>>>>   */
>>>>>
>>>>> -#include <linux/sched.h>
>>>>>  #include <linux/sched/debug.h>
>>>>> -#include <os.h>
>>>>> +#include <linux/completion.h>
>>>>> +#include <linux/kthread.h>
>>>>>  #include <kunit/test.h>
>>>>>
>>>>>  static bool kunit_get_success(struct kunit *test)
>>>>> @@ -32,6 +32,27 @@ static void kunit_set_success(struct kunit *test, bool success)
>>>>>       spin_unlock_irqrestore(&test->lock, flags);
>>>>>  }
>>>>>
>>>>> +static bool kunit_get_death_test(struct kunit *test)
>>>>> +{
>>>>> +     unsigned long flags;
>>>>> +     bool death_test;
>>>>> +
>>>>> +     spin_lock_irqsave(&test->lock, flags);
>>>>> +     death_test = test->death_test;
>>>>> +     spin_unlock_irqrestore(&test->lock, flags);
>>>>> +
>>>>> +     return death_test;
>>>>> +}
>>>>> +
>>>>> +static void kunit_set_death_test(struct kunit *test, bool death_test)
>>>>> +{
>>>>> +     unsigned long flags;
>>>>> +
>>>>> +     spin_lock_irqsave(&test->lock, flags);
>>>>> +     test->death_test = death_test;
>>>>> +     spin_unlock_irqrestore(&test->lock, flags);
>>>>> +}
>>>>> +
>>>>>  static int kunit_vprintk_emit(const struct kunit *test,
>>>>>                             int level,
>>>>>                             const char *fmt,
>>>>> @@ -70,13 +91,29 @@ static void kunit_fail(struct kunit *test, struct kunit_stream *stream)
>>>>>       stream->commit(stream);
>>>>>  }
>>>>>
>>>>> +static void __noreturn kunit_abort(struct kunit *test)
>>>>> +{
>>>>> +     kunit_set_death_test(test, true);
>>>>> +
>>>>> +     test->try_catch.throw(&test->try_catch);
>>>>> +
>>>>> +     /*
>>>>> +      * Throw could not abort from test.
>>>>> +      */
>>>>> +     kunit_err(test, "Throw could not abort from test!");
>>>>> +     show_stack(NULL, NULL);
>>>>> +     BUG();
>>>>
>>>> kunit_abort() is what will be call as the result of an assert failure.
>>>
>>> Yep. Does that need clarified somewhere.
>>>>
>>>> BUG(), which is a panic, which is crashing the system is not acceptable
>>>> in the Linux kernel.  You will just annoy Linus if you submit this.
>>>
>>> Sorry, I thought this was an acceptable use case since, a) this should
>>> never be compiled in a production kernel, b) we are in a pretty bad,
>>> unpredictable state if we get here and keep going. I think you might
>>> have said elsewhere that you think "a" is not valid? In any case, I
>>> can replace this with a WARN, would that be acceptable?
>>
>> A WARN may or may not make sense, depending on the context.  It may
>> be sufficient to simply report a test failure (as in the old version
>> of case (2) below.
>>
>> Answers to "a)" and "b)":
>>
>> a) it might be in a production kernel
> 
> Sorry for a possibly stupid question, how might it be so? Why would
> someone intentionally build unit tests into a production kernel?

People do things.  Just expect it.


>>
>> a') it is not acceptable in my development kernel either
> 
> Fair enough.
> 
>>
>> b) No.  You don't crash a developer's kernel either unless it is
>> required to avoid data corruption.
> 
> Alright, I thought that was one of those cases, but I am not going to
> push the point. Also, in case it wasn't clear, the path where BUG()
> gets called only happens if there is a bug in KUnit itself, not just
> because a test case fails catastrophically.

Still not out of the woods.  Still facing Lions and Tigers and Bears,
Oh my!

So kunit_abort() is normally called as the result of an assert
failure (as written many lines further above).

kunit_abort()
   test->try_catch.throw(&test->try_catch)
   // this is really kunit_generic_throw(), yes?
      complete_and_exit()
         if (comp)
            // comp is test_case_completion?
            complete(comp)
         do_exit()
            // void __noreturn do_exit(long code)
            // depending on the task, either panic
            // or the task dies

I did not read through enough of the code to understand what is going
on here.  Is each kunit_module executed in a newly created thread?
And if kunit_abort() is called then that thread dies?  Or something
else?


>>
>> b') And you can not do replacements like:
>>
>> (1) in of_unittest_check_tree_linkage()
>>
>> -----  old  -----
>>
>>         if (!of_root)
>>                 return;
>>
>> -----  new  -----
>>
>>         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, of_root);
>>
>> (2) in of_unittest_property_string()
>>
>> -----  old  -----
>>
>>         /* of_property_read_string_index() tests */
>>         rc = of_property_read_string_index(np, "string-property", 0, strings);
>>         unittest(rc == 0 && !strcmp(strings[0], "foobar"), "of_property_read_string_index() failure; rc=%i\n", rc);
>>
>> -----  new  -----
>>
>>         /* of_property_read_string_index() tests */
>>         rc = of_property_read_string_index(np, "string-property", 0, strings);
>>         KUNIT_ASSERT_EQ(test, rc, 0);
>>         KUNIT_EXPECT_STREQ(test, strings[0], "foobar");
>>
>>
>> If a test fails, that is no reason to abort testing.  The remainder of the unit
>> tests can still run.  There may be cascading failures, but that is ok.
> 
> Sure, that's what I am trying to do. I don't see how (1) changes
> anything, a failed KUNIT_ASSERT_* only bails on the current test case,
> it does not quit the entire test suite let alone crash the kernel.

This may be another case of whether a kunit_module is approximately a
single KUNIT_EXPECT_*() or a larger number of them.

I still want, for example, of_unittest_property_string() to include a large
number of KUNIT_EXPECT_*() instances.  In that case I still want the rest of
the tests in the kunit_module to be executed even after a KUNIT_ASSERT_*()
fails.  The existing test code has that property.


> 
> In case it wasn't clear above,
>>>>> +     test->try_catch.throw(&test->try_catch);
> should never, ever return. The only time it would, would be if KUnit
> was very broken. This should never actually happen, even if the
> assertion that called it was violated. KUNIT_ASSERT_* just says, "this
> is a prerequisite property for this test case"; if it is violated, the
> test case should fail and bail because the preconditions for the test
> case cannot be satisfied. Nevertheless, other tests cases will still
> run.
>
Brendan Higgins March 22, 2019, 1:41 a.m. UTC | #11
On Thu, Mar 21, 2019 at 6:10 PM Frank Rowand <frowand.list@gmail.com> wrote:
>
> On 2/27/19 11:42 PM, Brendan Higgins wrote:
> > On Tue, Feb 19, 2019 at 10:44 PM Frank Rowand <frowand.list@gmail.com> wrote:
> >>
> >> On 2/19/19 7:39 PM, Brendan Higgins wrote:
> >>> On Mon, Feb 18, 2019 at 11:52 AM Frank Rowand <frowand.list@gmail.com> wrote:
> >>>>
> >>>> On 2/14/19 1:37 PM, Brendan Higgins wrote:
> >>>>> Add support for aborting/bailing out of test cases. Needed for
> >>>>> implementing assertions.
> >>>>>
> >>>>> Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
> >>>>> ---
> >>>>> Changes Since Last Version
> >>>>>  - This patch is new introducing a new cross-architecture way to abort
> >>>>>    out of a test case (needed for KUNIT_ASSERT_*, see next patch for
> >>>>>    details).
> >>>>>  - On a side note, this is not a complete replacement for the UML abort
> >>>>>    mechanism, but covers the majority of necessary functionality. UML
> >>>>>    architecture specific featurs have been dropped from the initial
> >>>>>    patchset.
> >>>>> ---
> >>>>>  include/kunit/test.h |  24 +++++
> >>>>>  kunit/Makefile       |   3 +-
> >>>>>  kunit/test-test.c    | 127 ++++++++++++++++++++++++++
> >>>>>  kunit/test.c         | 208 +++++++++++++++++++++++++++++++++++++++++--
> >>>>>  4 files changed, 353 insertions(+), 9 deletions(-)
> >>>>>  create mode 100644 kunit/test-test.c
> >>>>
> >>>> < snip >
> >>>>
> >>>>> diff --git a/kunit/test.c b/kunit/test.c
> >>>>> index d18c50d5ed671..6e5244642ab07 100644
> >>>>> --- a/kunit/test.c
> >>>>> +++ b/kunit/test.c
> >>>>> @@ -6,9 +6,9 @@
> >>>>>   * Author: Brendan Higgins <brendanhiggins@google.com>
> >>>>>   */
> >>>>>
> >>>>> -#include <linux/sched.h>
> >>>>>  #include <linux/sched/debug.h>
> >>>>> -#include <os.h>
> >>>>> +#include <linux/completion.h>
> >>>>> +#include <linux/kthread.h>
> >>>>>  #include <kunit/test.h>
> >>>>>
> >>>>>  static bool kunit_get_success(struct kunit *test)
> >>>>> @@ -32,6 +32,27 @@ static void kunit_set_success(struct kunit *test, bool success)
> >>>>>       spin_unlock_irqrestore(&test->lock, flags);
> >>>>>  }
> >>>>>
> >>>>> +static bool kunit_get_death_test(struct kunit *test)
> >>>>> +{
> >>>>> +     unsigned long flags;
> >>>>> +     bool death_test;
> >>>>> +
> >>>>> +     spin_lock_irqsave(&test->lock, flags);
> >>>>> +     death_test = test->death_test;
> >>>>> +     spin_unlock_irqrestore(&test->lock, flags);
> >>>>> +
> >>>>> +     return death_test;
> >>>>> +}
> >>>>> +
> >>>>> +static void kunit_set_death_test(struct kunit *test, bool death_test)
> >>>>> +{
> >>>>> +     unsigned long flags;
> >>>>> +
> >>>>> +     spin_lock_irqsave(&test->lock, flags);
> >>>>> +     test->death_test = death_test;
> >>>>> +     spin_unlock_irqrestore(&test->lock, flags);
> >>>>> +}
> >>>>> +
> >>>>>  static int kunit_vprintk_emit(const struct kunit *test,
> >>>>>                             int level,
> >>>>>                             const char *fmt,
> >>>>> @@ -70,13 +91,29 @@ static void kunit_fail(struct kunit *test, struct kunit_stream *stream)
> >>>>>       stream->commit(stream);
> >>>>>  }
> >>>>>
> >>>>> +static void __noreturn kunit_abort(struct kunit *test)
> >>>>> +{
> >>>>> +     kunit_set_death_test(test, true);
> >>>>> +
> >>>>> +     test->try_catch.throw(&test->try_catch);
> >>>>> +
> >>>>> +     /*
> >>>>> +      * Throw could not abort from test.
> >>>>> +      */
> >>>>> +     kunit_err(test, "Throw could not abort from test!");
> >>>>> +     show_stack(NULL, NULL);
> >>>>> +     BUG();
> >>>>
> >>>> kunit_abort() is what will be call as the result of an assert failure.
> >>>
> >>> Yep. Does that need clarified somewhere.
> >>>>
> >>>> BUG(), which is a panic, which is crashing the system is not acceptable
> >>>> in the Linux kernel.  You will just annoy Linus if you submit this.
> >>>
> >>> Sorry, I thought this was an acceptable use case since, a) this should
> >>> never be compiled in a production kernel, b) we are in a pretty bad,
> >>> unpredictable state if we get here and keep going. I think you might
> >>> have said elsewhere that you think "a" is not valid? In any case, I
> >>> can replace this with a WARN, would that be acceptable?
> >>
> >> A WARN may or may not make sense, depending on the context.  It may
> >> be sufficient to simply report a test failure (as in the old version
> >> of case (2) below.
> >>
> >> Answers to "a)" and "b)":
> >>
> >> a) it might be in a production kernel
> >
> > Sorry for a possibly stupid question, how might it be so? Why would
> > someone intentionally build unit tests into a production kernel?
>
> People do things.  Just expect it.

Huh, alright. I will take your word for it then.

>
> >>
> >> a') it is not acceptable in my development kernel either
> >
> > Fair enough.
> >
> >>
> >> b) No.  You don't crash a developer's kernel either unless it is
> >> required to avoid data corruption.
> >
> > Alright, I thought that was one of those cases, but I am not going to
> > push the point. Also, in case it wasn't clear, the path where BUG()
> > gets called only happens if there is a bug in KUnit itself, not just
> > because a test case fails catastrophically.
>
> Still not out of the woods.  Still facing Lions and Tigers and Bears,
> Oh my!

Nope, I guess not :-)

>
> So kunit_abort() is normally called as the result of an assert
> failure (as written many lines further above).
>
> kunit_abort()
>    test->try_catch.throw(&test->try_catch)
>    // this is really kunit_generic_throw(), yes?
>       complete_and_exit()
>          if (comp)
>             // comp is test_case_completion?
>             complete(comp)
>          do_exit()
>             // void __noreturn do_exit(long code)
>             // depending on the task, either panic
>             // or the task dies

You are right up until after it calls do_exit().

KUnit actually spawns a thread for the test case to run in so that
when exit is called, only the test case thread dies. The thread that
started KUnit is never affected.

>
> I did not read through enough of the code to understand what is going
> on here.  Is each kunit_module executed in a newly created thread?
> And if kunit_abort() is called then that thread dies?  Or something
> else?

Mostly right, each kunit_case (not kunit_module) gets executed in its
own newly created thread. If kunit_abort() is called in that thread,
the kunit_case thread dies. The parent thread keeps going, and other
test cases are executed.

>
>
> >>
> >> b') And you can not do replacements like:
> >>
> >> (1) in of_unittest_check_tree_linkage()
> >>
> >> -----  old  -----
> >>
> >>         if (!of_root)
> >>                 return;
> >>
> >> -----  new  -----
> >>
> >>         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, of_root);
> >>
> >> (2) in of_unittest_property_string()
> >>
> >> -----  old  -----
> >>
> >>         /* of_property_read_string_index() tests */
> >>         rc = of_property_read_string_index(np, "string-property", 0, strings);
> >>         unittest(rc == 0 && !strcmp(strings[0], "foobar"), "of_property_read_string_index() failure; rc=%i\n", rc);
> >>
> >> -----  new  -----
> >>
> >>         /* of_property_read_string_index() tests */
> >>         rc = of_property_read_string_index(np, "string-property", 0, strings);
> >>         KUNIT_ASSERT_EQ(test, rc, 0);
> >>         KUNIT_EXPECT_STREQ(test, strings[0], "foobar");
> >>
> >>
> >> If a test fails, that is no reason to abort testing.  The remainder of the unit
> >> tests can still run.  There may be cascading failures, but that is ok.
> >
> > Sure, that's what I am trying to do. I don't see how (1) changes
> > anything, a failed KUNIT_ASSERT_* only bails on the current test case,
> > it does not quit the entire test suite let alone crash the kernel.
>
> This may be another case of whether a kunit_module is approximately a
> single KUNIT_EXPECT_*() or a larger number of them.
>
> I still want, for example, of_unittest_property_string() to include a large
> number of KUNIT_EXPECT_*() instances.  In that case I still want the rest of
> the tests in the kunit_module to be executed even after a KUNIT_ASSERT_*()
> fails.  The existing test code has that property.

Sure, in the context of the reply you just sent me on the DT unittest
thread, that makes sense. I can pull out all but the ones that would
have terminated the collection of test cases (where you return early),
if that makes it better.
Knut Omang March 22, 2019, 7:10 a.m. UTC | #12
On Thu, 2019-03-21 at 18:41 -0700, Brendan Higgins wrote:
> On Thu, Mar 21, 2019 at 6:10 PM Frank Rowand <frowand.list@gmail.com> wrote:
> > On 2/27/19 11:42 PM, Brendan Higgins wrote:
> > > On Tue, Feb 19, 2019 at 10:44 PM Frank Rowand <frowand.list@gmail.com>
> > > wrote:
> > > > On 2/19/19 7:39 PM, Brendan Higgins wrote:
> > > > > On Mon, Feb 18, 2019 at 11:52 AM Frank Rowand <frowand.list@gmail.com>
> > > > > wrote:
> > > > > > On 2/14/19 1:37 PM, Brendan Higgins wrote:
> > > > > > > Add support for aborting/bailing out of test cases. Needed for
> > > > > > > implementing assertions.
> > > > > > > 
> > > > > > > Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
> > > > > > > ---
> > > > > > > Changes Since Last Version
> > > > > > >  - This patch is new introducing a new cross-architecture way to
> > > > > > > abort
> > > > > > >    out of a test case (needed for KUNIT_ASSERT_*, see next patch
> > > > > > > for
> > > > > > >    details).
> > > > > > >  - On a side note, this is not a complete replacement for the UML
> > > > > > > abort
> > > > > > >    mechanism, but covers the majority of necessary functionality.
> > > > > > > UML
> > > > > > >    architecture specific featurs have been dropped from the
> > > > > > > initial
> > > > > > >    patchset.
> > > > > > > ---
> > > > > > >  include/kunit/test.h |  24 +++++
> > > > > > >  kunit/Makefile       |   3 +-
> > > > > > >  kunit/test-test.c    | 127 ++++++++++++++++++++++++++
> > > > > > >  kunit/test.c         | 208
> > > > > > > +++++++++++++++++++++++++++++++++++++++++--
> > > > > > >  4 files changed, 353 insertions(+), 9 deletions(-)
> > > > > > >  create mode 100644 kunit/test-test.c
> > > > > > 
> > > > > > < snip >
> > > > > > 
> > > > > > > diff --git a/kunit/test.c b/kunit/test.c
> > > > > > > index d18c50d5ed671..6e5244642ab07 100644
> > > > > > > --- a/kunit/test.c
> > > > > > > +++ b/kunit/test.c
> > > > > > > @@ -6,9 +6,9 @@
> > > > > > >   * Author: Brendan Higgins <brendanhiggins@google.com>
> > > > > > >   */
> > > > > > > 
> > > > > > > -#include <linux/sched.h>
> > > > > > >  #include <linux/sched/debug.h>
> > > > > > > -#include <os.h>
> > > > > > > +#include <linux/completion.h>
> > > > > > > +#include <linux/kthread.h>
> > > > > > >  #include <kunit/test.h>
> > > > > > > 
> > > > > > >  static bool kunit_get_success(struct kunit *test)
> > > > > > > @@ -32,6 +32,27 @@ static void kunit_set_success(struct kunit
> > > > > > > *test, bool success)
> > > > > > >       spin_unlock_irqrestore(&test->lock, flags);
> > > > > > >  }
> > > > > > > 
> > > > > > > +static bool kunit_get_death_test(struct kunit *test)
> > > > > > > +{
> > > > > > > +     unsigned long flags;
> > > > > > > +     bool death_test;
> > > > > > > +
> > > > > > > +     spin_lock_irqsave(&test->lock, flags);
> > > > > > > +     death_test = test->death_test;
> > > > > > > +     spin_unlock_irqrestore(&test->lock, flags);
> > > > > > > +
> > > > > > > +     return death_test;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void kunit_set_death_test(struct kunit *test, bool
> > > > > > > death_test)
> > > > > > > +{
> > > > > > > +     unsigned long flags;
> > > > > > > +
> > > > > > > +     spin_lock_irqsave(&test->lock, flags);
> > > > > > > +     test->death_test = death_test;
> > > > > > > +     spin_unlock_irqrestore(&test->lock, flags);
> > > > > > > +}
> > > > > > > +
> > > > > > >  static int kunit_vprintk_emit(const struct kunit *test,
> > > > > > >                             int level,
> > > > > > >                             const char *fmt,
> > > > > > > @@ -70,13 +91,29 @@ static void kunit_fail(struct kunit *test,
> > > > > > > struct kunit_stream *stream)
> > > > > > >       stream->commit(stream);
> > > > > > >  }
> > > > > > > 
> > > > > > > +static void __noreturn kunit_abort(struct kunit *test)
> > > > > > > +{
> > > > > > > +     kunit_set_death_test(test, true);
> > > > > > > +
> > > > > > > +     test->try_catch.throw(&test->try_catch);
> > > > > > > +
> > > > > > > +     /*
> > > > > > > +      * Throw could not abort from test.
> > > > > > > +      */
> > > > > > > +     kunit_err(test, "Throw could not abort from test!");
> > > > > > > +     show_stack(NULL, NULL);
> > > > > > > +     BUG();
> > > > > > 
> > > > > > kunit_abort() is what will be call as the result of an assert
> > > > > > failure.
> > > > > 
> > > > > Yep. Does that need clarified somewhere.
> > > > > > BUG(), which is a panic, which is crashing the system is not
> > > > > > acceptable
> > > > > > in the Linux kernel.  You will just annoy Linus if you submit this.
> > > > > 
> > > > > Sorry, I thought this was an acceptable use case since, a) this should
> > > > > never be compiled in a production kernel, b) we are in a pretty bad,
> > > > > unpredictable state if we get here and keep going. I think you might
> > > > > have said elsewhere that you think "a" is not valid? In any case, I
> > > > > can replace this with a WARN, would that be acceptable?
> > > > 
> > > > A WARN may or may not make sense, depending on the context.  It may
> > > > be sufficient to simply report a test failure (as in the old version
> > > > of case (2) below.
> > > > 
> > > > Answers to "a)" and "b)":
> > > > 
> > > > a) it might be in a production kernel
> > > 
> > > Sorry for a possibly stupid question, how might it be so? Why would
> > > someone intentionally build unit tests into a production kernel?
> > 
> > People do things.  Just expect it.
> 
> Huh, alright. I will take your word for it then.

I have a better explanation: Production kernels have bugs, unfortunately.
And sometimes those need to be investigated on systems than cannot be 
brought down or affected more than absolutely necessary, maybe via a third party
doing the execution. A light weight, precise test (well tested ahead :) ) might
be a way of proving or disproving assumptions that can lead to the development
and application of a fix. 

IMHO you're confusing "building into" with temporary applying, then removing
again - like the difference between running a local user space program vs
installing it under /usr and have it in everyone's PATH.

> > > > a') it is not acceptable in my development kernel either

I think one of the fundamental properties of a good test framework is that it
should not require changes to the code under test by itself.

Knut

> > > Fair enough.
> > > 
> > > > b) No.  You don't crash a developer's kernel either unless it is
> > > > required to avoid data corruption.
> > > Alright, I thought that was one of those cases, but I am not going to
> > > push the point. Also, in case it wasn't clear, the path where BUG()
> > > gets called only happens if there is a bug in KUnit itself, not just
> > > because a test case fails catastrophically.
> > 
> > Still not out of the woods.  Still facing Lions and Tigers and Bears,
> > Oh my!
> 
> Nope, I guess not :-)
> 
> > So kunit_abort() is normally called as the result of an assert
> > failure (as written many lines further above).
> > 
> > kunit_abort()
> >    test->try_catch.throw(&test->try_catch)
> >    // this is really kunit_generic_throw(), yes?
> >       complete_and_exit()
> >          if (comp)
> >             // comp is test_case_completion?
> >             complete(comp)
> >          do_exit()
> >             // void __noreturn do_exit(long code)
> >             // depending on the task, either panic
> >             // or the task dies
> 
> You are right up until after it calls do_exit().
> 
> KUnit actually spawns a thread for the test case to run in so that
> when exit is called, only the test case thread dies. The thread that
> started KUnit is never affected.
> 
> > I did not read through enough of the code to understand what is going
> > on here.  Is each kunit_module executed in a newly created thread?
> > And if kunit_abort() is called then that thread dies?  Or something
> > else?
> 
> Mostly right, each kunit_case (not kunit_module) gets executed in its
> own newly created thread. If kunit_abort() is called in that thread,
> the kunit_case thread dies. The parent thread keeps going, and other
> test cases are executed.
> 
> > 
> > > > b') And you can not do replacements like:
> > > > 
> > > > (1) in of_unittest_check_tree_linkage()
> > > > 
> > > > -----  old  -----
> > > > 
> > > >         if (!of_root)
> > > >                 return;
> > > > 
> > > > -----  new  -----
> > > > 
> > > >         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, of_root);
> > > > 
> > > > (2) in of_unittest_property_string()
> > > > 
> > > > -----  old  -----
> > > > 
> > > >         /* of_property_read_string_index() tests */
> > > >         rc = of_property_read_string_index(np, "string-property", 0,
> > > > strings);
> > > >         unittest(rc == 0 && !strcmp(strings[0], "foobar"),
> > > > "of_property_read_string_index() failure; rc=%i\n", rc);
> > > > 
> > > > -----  new  -----
> > > > 
> > > >         /* of_property_read_string_index() tests */
> > > >         rc = of_property_read_string_index(np, "string-property", 0,
> > > > strings);
> > > >         KUNIT_ASSERT_EQ(test, rc, 0);
> > > >         KUNIT_EXPECT_STREQ(test, strings[0], "foobar");
> > > > 
> > > > 
> > > > If a test fails, that is no reason to abort testing.  The remainder of
> > > > the unit
> > > > tests can still run.  There may be cascading failures, but that is ok.
> > > 
> > > Sure, that's what I am trying to do. I don't see how (1) changes
> > > anything, a failed KUNIT_ASSERT_* only bails on the current test case,
> > > it does not quit the entire test suite let alone crash the kernel.
> > 
> > This may be another case of whether a kunit_module is approximately a
> > single KUNIT_EXPECT_*() or a larger number of them.
> > 
> > I still want, for example, of_unittest_property_string() to include a large
> > number of KUNIT_EXPECT_*() instances.  In that case I still want the rest of
> > the tests in the kunit_module to be executed even after a KUNIT_ASSERT_*()
> > fails.  The existing test code has that property.
> 
> Sure, in the context of the reply you just sent me on the DT unittest
> thread, that makes sense. I can pull out all but the ones that would
> have terminated the collection of test cases (where you return early),
> if that makes it better.
Brendan Higgins March 25, 2019, 10:32 p.m. UTC | #13
On Fri, Mar 22, 2019 at 12:11 AM Knut Omang <knut.omang@oracle.com> wrote:
>
> On Thu, 2019-03-21 at 18:41 -0700, Brendan Higgins wrote:
> > On Thu, Mar 21, 2019 at 6:10 PM Frank Rowand <frowand.list@gmail.com> wrote:
> > > On 2/27/19 11:42 PM, Brendan Higgins wrote:
> > > > On Tue, Feb 19, 2019 at 10:44 PM Frank Rowand <frowand.list@gmail.com>
> > > > wrote:
> > > > > On 2/19/19 7:39 PM, Brendan Higgins wrote:
> > > > > > On Mon, Feb 18, 2019 at 11:52 AM Frank Rowand <frowand.list@gmail.com>
> > > > > > wrote:
> > > > > > > On 2/14/19 1:37 PM, Brendan Higgins wrote:
< snip >
> > > > > > > kunit_abort() is what will be call as the result of an assert
> > > > > > > failure.
> > > > > >
> > > > > > Yep. Does that need clarified somewhere.
> > > > > > > BUG(), which is a panic, which is crashing the system is not
> > > > > > > acceptable
> > > > > > > in the Linux kernel.  You will just annoy Linus if you submit this.
> > > > > >
> > > > > > Sorry, I thought this was an acceptable use case since, a) this should
> > > > > > never be compiled in a production kernel, b) we are in a pretty bad,
> > > > > > unpredictable state if we get here and keep going. I think you might
> > > > > > have said elsewhere that you think "a" is not valid? In any case, I
> > > > > > can replace this with a WARN, would that be acceptable?
> > > > >
> > > > > A WARN may or may not make sense, depending on the context.  It may
> > > > > be sufficient to simply report a test failure (as in the old version
> > > > > of case (2) below.
> > > > >
> > > > > Answers to "a)" and "b)":
> > > > >
> > > > > a) it might be in a production kernel
> > > >
> > > > Sorry for a possibly stupid question, how might it be so? Why would
> > > > someone intentionally build unit tests into a production kernel?
> > >
> > > People do things.  Just expect it.
> >
> > Huh, alright. I will take your word for it then.
>
> I have a better explanation: Production kernels have bugs, unfortunately.
> And sometimes those need to be investigated on systems than cannot be
> brought down or affected more than absolutely necessary, maybe via a third party
> doing the execution. A light weight, precise test (well tested ahead :) ) might
> be a way of proving or disproving assumptions that can lead to the development
> and application of a fix.

Sorry, you are not suggesting testing in production are you? To be
clear, I am not concerned about someone using testing, KUnit, or
whatever in a *production-like* environment: that's not what we are
talking about here. My assumption is that no one will deploy tests
into actual production.

>
> IMHO you're confusing "building into" with temporary applying, then removing
> again - like the difference between running a local user space program vs
> installing it under /usr and have it in everyone's PATH.

I don't really see the point of distinguishing between "building into"
and "temporary applying" in this case; that's part of my point. Maybe
it makes sense in whitebox end-to-end testing, but in the case of unit
testing, I don't think so.

>
> > > > > a') it is not acceptable in my development kernel either
>
> I think one of the fundamental properties of a good test framework is that it
> should not require changes to the code under test by itself.
>

Sure, but that has nothing to do with the environment the code/tests
are running in.

< snip >

Cheers
Knut Omang March 26, 2019, 7:44 a.m. UTC | #14
On Mon, 2019-03-25 at 15:32 -0700, Brendan Higgins wrote:
> On Fri, Mar 22, 2019 at 12:11 AM Knut Omang <knut.omang@oracle.com> wrote:
> > On Thu, 2019-03-21 at 18:41 -0700, Brendan Higgins wrote:
> > > On Thu, Mar 21, 2019 at 6:10 PM Frank Rowand <frowand.list@gmail.com> wrote:
> > > > On 2/27/19 11:42 PM, Brendan Higgins wrote:
> > > > > On Tue, Feb 19, 2019 at 10:44 PM Frank Rowand <frowand.list@gmail.com>
> > > > > wrote:
> > > > > > On 2/19/19 7:39 PM, Brendan Higgins wrote:
> > > > > > > On Mon, Feb 18, 2019 at 11:52 AM Frank Rowand <frowand.list@gmail.com>
> > > > > > > wrote:
> > > > > > > > On 2/14/19 1:37 PM, Brendan Higgins wrote:
> < snip >
> > > > > > > > kunit_abort() is what will be call as the result of an assert
> > > > > > > > failure.
> > > > > > > 
> > > > > > > Yep. Does that need clarified somewhere.
> > > > > > > > BUG(), which is a panic, which is crashing the system is not
> > > > > > > > acceptable
> > > > > > > > in the Linux kernel.  You will just annoy Linus if you submit this.
> > > > > > > 
> > > > > > > Sorry, I thought this was an acceptable use case since, a) this should
> > > > > > > never be compiled in a production kernel, b) we are in a pretty bad,
> > > > > > > unpredictable state if we get here and keep going. I think you might
> > > > > > > have said elsewhere that you think "a" is not valid? In any case, I
> > > > > > > can replace this with a WARN, would that be acceptable?
> > > > > > 
> > > > > > A WARN may or may not make sense, depending on the context.  It may
> > > > > > be sufficient to simply report a test failure (as in the old version
> > > > > > of case (2) below.
> > > > > > 
> > > > > > Answers to "a)" and "b)":
> > > > > > 
> > > > > > a) it might be in a production kernel
> > > > > 
> > > > > Sorry for a possibly stupid question, how might it be so? Why would
> > > > > someone intentionally build unit tests into a production kernel?
> > > > 
> > > > People do things.  Just expect it.
> > > 
> > > Huh, alright. I will take your word for it then.
> > 
> > I have a better explanation: Production kernels have bugs, unfortunately.
> > And sometimes those need to be investigated on systems than cannot be
> > brought down or affected more than absolutely necessary, maybe via a third party
> > doing the execution. A light weight, precise test (well tested ahead :) ) might
> > be a way of proving or disproving assumptions that can lead to the development
> > and application of a fix.
> 
> Sorry, you are not suggesting testing in production are you? To be
> clear, I am not concerned about someone using testing, KUnit, or
> whatever in a *production-like* environment: that's not what we are
> talking about here. My assumption is that no one will deploy tests
> into actual production.

And my take is that you should not make such assumptions.
Even the cost of bringing down a "production-like" environment can be
significant, and the test infrastructure shouldn't think of itself as 
important enough to justify doing such things.

> > IMHO you're confusing "building into" with temporary applying, then removing
> > again - like the difference between running a local user space program vs
> > installing it under /usr and have it in everyone's PATH.
> 
> I don't really see the point of distinguishing between "building into"
> and "temporary applying" in this case; that's part of my point. Maybe
> it makes sense in whitebox end-to-end testing, but in the case of unit
> testing, I don't think so.
> 
> > > > > > a') it is not acceptable in my development kernel either
> > 
> > I think one of the fundamental properties of a good test framework is that it
> > should not require changes to the code under test by itself.
> > 
> 
> Sure, but that has nothing to do with the environment the code/tests
> are running in.

Well, just that if the tests require a special environment to run, 
you limit the usability of the tests in detecting or ruling out real issues.

Thanks,
Knut

> 
> < snip >
> 
> Cheers
diff mbox series

Patch

diff --git a/include/kunit/test.h b/include/kunit/test.h
index a36ad1a502c66..cd02dca96eb61 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -151,6 +151,26 @@  struct kunit_module {
 	struct kunit_case *test_cases;
 };
 
+struct kunit_try_catch_context {
+	struct kunit *test;
+	struct kunit_module *module;
+	struct kunit_case *test_case;
+	struct completion *try_completion;
+	int try_result;
+};
+
+struct kunit_try_catch {
+	void (*run)(struct kunit_try_catch *try_catch);
+	void (*throw)(struct kunit_try_catch *try_catch);
+	struct kunit_try_catch_context context;
+	void (*try)(struct kunit_try_catch_context *context);
+	void (*catch)(struct kunit_try_catch_context *context);
+};
+
+void kunit_try_catch_init(struct kunit_try_catch *try_catch);
+
+void kunit_generic_try_catch_init(struct kunit_try_catch *try_catch);
+
 /**
  * struct kunit - represents a running instance of a test.
  * @priv: for user to store arbitrary data. Commonly used to pass data created
@@ -166,13 +186,17 @@  struct kunit {
 
 	/* private: internal use only. */
 	const char *name; /* Read only after initialization! */
+	struct kunit_try_catch try_catch;
 	spinlock_t lock; /* Gaurds all mutable test state. */
 	bool success; /* Protected by lock. */
+	bool death_test; /* Protected by lock. */
 	struct list_head resources; /* Protected by lock. */
+	void (*set_death_test)(struct kunit *test, bool death_test);
 	void (*vprintk)(const struct kunit *test,
 			const char *level,
 			struct va_format *vaf);
 	void (*fail)(struct kunit *test, struct kunit_stream *stream);
+	void (*abort)(struct kunit *test);
 };
 
 int kunit_init_test(struct kunit *test, const char *name);
diff --git a/kunit/Makefile b/kunit/Makefile
index 60a9ea6cb4697..e4c300f67479a 100644
--- a/kunit/Makefile
+++ b/kunit/Makefile
@@ -2,6 +2,7 @@  obj-$(CONFIG_KUNIT) +=			test.o \
 					string-stream.o \
 					kunit-stream.o
 
-obj-$(CONFIG_KUNIT_TEST) +=		string-stream-test.o
+obj-$(CONFIG_KUNIT_TEST) +=		test-test.o \
+					string-stream-test.o
 
 obj-$(CONFIG_KUNIT_EXAMPLE_TEST) +=	example-test.o
diff --git a/kunit/test-test.c b/kunit/test-test.c
new file mode 100644
index 0000000000000..a936c041f1c8f
--- /dev/null
+++ b/kunit/test-test.c
@@ -0,0 +1,127 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KUnit test for core test infrastructure.
+ *
+ * Copyright (C) 2019, Google LLC.
+ * Author: Brendan Higgins <brendanhiggins@google.com>
+ */
+#include <kunit/test.h>
+
+struct kunit_try_catch_test_context {
+	struct kunit_try_catch *try_catch;
+	bool function_called;
+};
+
+void kunit_test_successful_try(struct kunit_try_catch_context *context)
+{
+	struct kunit_try_catch_test_context *ctx = context->test->priv;
+
+	ctx->function_called = true;
+}
+
+void kunit_test_no_catch(struct kunit_try_catch_context *context)
+{
+	KUNIT_FAIL(context->test, "Catch should not be called.");
+}
+
+static void kunit_test_try_catch_successful_try_no_catch(struct kunit *test)
+{
+	struct kunit_try_catch_test_context *ctx = test->priv;
+	struct kunit_try_catch *try_catch = ctx->try_catch;
+
+	try_catch->try = kunit_test_successful_try;
+	try_catch->catch = kunit_test_no_catch;
+	try_catch->run(try_catch);
+
+	KUNIT_EXPECT_TRUE(test, ctx->function_called);
+}
+
+void kunit_test_unsuccessful_try(struct kunit_try_catch_context *context)
+{
+	struct kunit_try_catch *try_catch = container_of(context,
+							 struct kunit_try_catch,
+							 context);
+
+	try_catch->throw(try_catch);
+	KUNIT_FAIL(context->test, "This line should never be reached.");
+}
+
+void kunit_test_catch(struct kunit_try_catch_context *context)
+{
+	struct kunit_try_catch_test_context *ctx = context->test->priv;
+
+	ctx->function_called = true;
+}
+
+static void kunit_test_try_catch_unsuccessful_try_does_catch(struct kunit *test)
+{
+	struct kunit_try_catch_test_context *ctx = test->priv;
+	struct kunit_try_catch *try_catch = ctx->try_catch;
+
+	try_catch->try = kunit_test_unsuccessful_try;
+	try_catch->catch = kunit_test_catch;
+	try_catch->run(try_catch);
+
+	KUNIT_EXPECT_TRUE(test, ctx->function_called);
+}
+
+static void kunit_test_generic_try_catch_successful_try_no_catch(
+		struct kunit *test)
+{
+	struct kunit_try_catch_test_context *ctx = test->priv;
+	struct kunit_try_catch *try_catch = ctx->try_catch;
+
+	kunit_generic_try_catch_init(try_catch);
+
+	try_catch->try = kunit_test_successful_try;
+	try_catch->catch = kunit_test_no_catch;
+	try_catch->run(try_catch);
+
+	KUNIT_EXPECT_TRUE(test, ctx->function_called);
+}
+
+static void kunit_test_generic_try_catch_unsuccessful_try_does_catch(
+		struct kunit *test)
+{
+	struct kunit_try_catch_test_context *ctx = test->priv;
+	struct kunit_try_catch *try_catch = ctx->try_catch;
+
+	kunit_generic_try_catch_init(try_catch);
+
+	try_catch->try = kunit_test_unsuccessful_try;
+	try_catch->catch = kunit_test_catch;
+	try_catch->run(try_catch);
+
+	KUNIT_EXPECT_TRUE(test, ctx->function_called);
+}
+
+static int kunit_try_catch_test_init(struct kunit *test)
+{
+	struct kunit_try_catch_test_context *ctx;
+
+	ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
+	test->priv = ctx;
+
+	ctx->try_catch = kunit_kmalloc(test,
+				       sizeof(*ctx->try_catch),
+				       GFP_KERNEL);
+	kunit_try_catch_init(ctx->try_catch);
+	ctx->try_catch->context.test = test;
+
+	return 0;
+}
+
+static struct kunit_case kunit_try_catch_test_cases[] = {
+	KUNIT_CASE(kunit_test_try_catch_successful_try_no_catch),
+	KUNIT_CASE(kunit_test_try_catch_unsuccessful_try_does_catch),
+	KUNIT_CASE(kunit_test_generic_try_catch_successful_try_no_catch),
+	KUNIT_CASE(kunit_test_generic_try_catch_unsuccessful_try_does_catch),
+	{},
+};
+
+static struct kunit_module kunit_try_catch_test_module = {
+	.name = "kunit-try-catch-test",
+	.init = kunit_try_catch_test_init,
+	.test_cases = kunit_try_catch_test_cases,
+};
+module_test(kunit_try_catch_test_module);
diff --git a/kunit/test.c b/kunit/test.c
index d18c50d5ed671..6e5244642ab07 100644
--- a/kunit/test.c
+++ b/kunit/test.c
@@ -6,9 +6,9 @@ 
  * Author: Brendan Higgins <brendanhiggins@google.com>
  */
 
-#include <linux/sched.h>
 #include <linux/sched/debug.h>
-#include <os.h>
+#include <linux/completion.h>
+#include <linux/kthread.h>
 #include <kunit/test.h>
 
 static bool kunit_get_success(struct kunit *test)
@@ -32,6 +32,27 @@  static void kunit_set_success(struct kunit *test, bool success)
 	spin_unlock_irqrestore(&test->lock, flags);
 }
 
+static bool kunit_get_death_test(struct kunit *test)
+{
+	unsigned long flags;
+	bool death_test;
+
+	spin_lock_irqsave(&test->lock, flags);
+	death_test = test->death_test;
+	spin_unlock_irqrestore(&test->lock, flags);
+
+	return death_test;
+}
+
+static void kunit_set_death_test(struct kunit *test, bool death_test)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&test->lock, flags);
+	test->death_test = death_test;
+	spin_unlock_irqrestore(&test->lock, flags);
+}
+
 static int kunit_vprintk_emit(const struct kunit *test,
 			      int level,
 			      const char *fmt,
@@ -70,13 +91,29 @@  static void kunit_fail(struct kunit *test, struct kunit_stream *stream)
 	stream->commit(stream);
 }
 
+static void __noreturn kunit_abort(struct kunit *test)
+{
+	kunit_set_death_test(test, true);
+
+	test->try_catch.throw(&test->try_catch);
+
+	/*
+	 * Throw could not abort from test.
+	 */
+	kunit_err(test, "Throw could not abort from test!");
+	show_stack(NULL, NULL);
+	BUG();
+}
+
 int kunit_init_test(struct kunit *test, const char *name)
 {
 	spin_lock_init(&test->lock);
 	INIT_LIST_HEAD(&test->resources);
 	test->name = name;
+	test->set_death_test = kunit_set_death_test;
 	test->vprintk = kunit_vprintk;
 	test->fail = kunit_fail;
+	test->abort = kunit_abort;
 
 	return 0;
 }
@@ -122,16 +159,171 @@  static void kunit_run_case_cleanup(struct kunit *test,
 }
 
 /*
- * Performs all logic to run a test case.
+ * Handles an unexpected crash in a test case.
  */
-static bool kunit_run_case(struct kunit *test,
-			   struct kunit_module *module,
-			   struct kunit_case *test_case)
+static void kunit_handle_test_crash(struct kunit *test,
+				   struct kunit_module *module,
+				   struct kunit_case *test_case)
 {
-	kunit_set_success(test, true);
+	kunit_err(test, "%s crashed", test_case->name);
+	/*
+	 * TODO(brendanhiggins@google.com): This prints the stack trace up
+	 * through this frame, not up to the frame that caused the crash.
+	 */
+	show_stack(NULL, NULL);
+
+	kunit_case_internal_cleanup(test);
+}
+
+static void kunit_generic_throw(struct kunit_try_catch *try_catch)
+{
+	try_catch->context.try_result = -EFAULT;
+	complete_and_exit(try_catch->context.try_completion, -EFAULT);
+}
+
+static int kunit_generic_run_threadfn_adapter(void *data)
+{
+	struct kunit_try_catch *try_catch = data;
 
+	try_catch->try(&try_catch->context);
+
+	complete_and_exit(try_catch->context.try_completion, 0);
+}
+
+static void kunit_generic_run_try_catch(struct kunit_try_catch *try_catch)
+{
+	struct task_struct *task_struct;
+	struct kunit *test = try_catch->context.test;
+	int exit_code, wake_result;
+	DECLARE_COMPLETION(test_case_completion);
+
+	try_catch->context.try_completion = &test_case_completion;
+	try_catch->context.try_result = 0;
+	task_struct = kthread_create(kunit_generic_run_threadfn_adapter,
+					     try_catch,
+					     "kunit_try_catch_thread");
+	if (IS_ERR_OR_NULL(task_struct)) {
+		try_catch->catch(&try_catch->context);
+		return;
+	}
+
+	wake_result = wake_up_process(task_struct);
+	if (wake_result != 0 && wake_result != 1) {
+		kunit_err(test, "task was not woken properly: %d", wake_result);
+		try_catch->catch(&try_catch->context);
+	}
+
+	/*
+	 * TODO(brendanhiggins@google.com): We should probably have some type of
+	 * timeout here. The only question is what that timeout value should be.
+	 *
+	 * The intention has always been, at some point, to be able to label
+	 * tests with some type of size bucket (unit/small, integration/medium,
+	 * large/system/end-to-end, etc), where each size bucket would get a
+	 * default timeout value kind of like what Bazel does:
+	 * https://docs.bazel.build/versions/master/be/common-definitions.html#test.size
+	 * There is still some debate to be had on exactly how we do this. (For
+	 * one, we probably want to have some sort of test runner level
+	 * timeout.)
+	 *
+	 * For more background on this topic, see:
+	 * https://mike-bland.com/2011/11/01/small-medium-large.html
+	 */
+	wait_for_completion(&test_case_completion);
+
+	exit_code = try_catch->context.try_result;
+	if (exit_code == -EFAULT)
+		try_catch->catch(&try_catch->context);
+	else if (exit_code == -EINTR)
+		kunit_err(test, "wake_up_process() was never called.");
+	else if (exit_code)
+		kunit_err(test, "Unknown error: %d", exit_code);
+}
+
+void kunit_generic_try_catch_init(struct kunit_try_catch *try_catch)
+{
+	try_catch->run = kunit_generic_run_try_catch;
+	try_catch->throw = kunit_generic_throw;
+}
+
+void __weak kunit_try_catch_init(struct kunit_try_catch *try_catch)
+{
+	kunit_generic_try_catch_init(try_catch);
+}
+
+static void kunit_try_run_case(struct kunit_try_catch_context *context)
+{
+	struct kunit_try_catch_context *ctx = context;
+	struct kunit *test = ctx->test;
+	struct kunit_module *module = ctx->module;
+	struct kunit_case *test_case = ctx->test_case;
+
+	/*
+	 * kunit_run_case_internal may encounter a fatal error; if it does, we
+	 * will jump to ENTER_HANDLER above instead of continuing normal control
+	 * flow.
+	 */
 	kunit_run_case_internal(test, module, test_case);
+	/* This line may never be reached. */
 	kunit_run_case_cleanup(test, module, test_case);
+}
+
+static void kunit_catch_run_case(struct kunit_try_catch_context *context)
+{
+	struct kunit_try_catch_context *ctx = context;
+	struct kunit *test = ctx->test;
+	struct kunit_module *module = ctx->module;
+	struct kunit_case *test_case = ctx->test_case;
+
+	if (kunit_get_death_test(test)) {
+		/*
+		 * EXPECTED DEATH: kunit_run_case_internal encountered
+		 * anticipated fatal error. Everything should be in a safe
+		 * state.
+		 */
+		kunit_run_case_cleanup(test, module, test_case);
+	} else {
+		/*
+		 * UNEXPECTED DEATH: kunit_run_case_internal encountered an
+		 * unanticipated fatal error. We have no idea what the state of
+		 * the test case is in.
+		 */
+		kunit_handle_test_crash(test, module, test_case);
+		kunit_set_success(test, false);
+	}
+}
+
+/*
+ * Performs all logic to run a test case. It also catches most errors that
+ * occurs in a test case and reports them as failures.
+ *
+ * XXX: THIS DOES NOT FOLLOW NORMAL CONTROL FLOW. READ CAREFULLY!!!
+ */
+static bool kunit_run_case_catch_errors(struct kunit *test,
+				       struct kunit_module *module,
+				       struct kunit_case *test_case)
+{
+	struct kunit_try_catch *try_catch = &test->try_catch;
+	struct kunit_try_catch_context *context = &try_catch->context;
+
+	kunit_try_catch_init(try_catch);
+
+	kunit_set_success(test, true);
+	kunit_set_death_test(test, false);
+
+	/*
+	 * ENTER HANDLER: If a failure occurs, we enter here.
+	 */
+	context->test = test;
+	context->module = module;
+	context->test_case = test_case;
+	try_catch->try = kunit_try_run_case;
+	try_catch->catch = kunit_catch_run_case;
+	try_catch->run(try_catch);
+	/*
+	 * EXIT HANDLER: test case has been run and all possible errors have
+	 * been handled.
+	 */
 
 	return kunit_get_success(test);
 }
@@ -148,7 +340,7 @@  int kunit_run_tests(struct kunit_module *module)
 		return ret;
 
 	for (test_case = module->test_cases; test_case->run_case; test_case++) {
-		success = kunit_run_case(&test, module, test_case);
+		success = kunit_run_case_catch_errors(&test, module, test_case);
 		if (!success)
 			all_passed = false;