diff mbox series

[RFC,v3,11/19] kunit: add Python libraries for handing KUnit config and kernel

Message ID 20181128193636.254378-12-brendanhiggins@google.com (mailing list archive)
State New, archived
Headers show
Series kunit: introduce KUnit, the Linux kernel unit testing framework | expand

Commit Message

Brendan Higgins Nov. 28, 2018, 7:36 p.m. UTC
The ultimate goal is to create minimal isolated test binaries; in the
meantime we are using UML to provide the infrastructure to run tests, so
define an abstract way to configure and run tests that allow us to
change the context in which tests are built without affecting the user.
This also makes pretty and dynamic error reporting, and a lot of other
nice features easier.

kunit_config.py:
  - parse .config and Kconfig files.

kunit_kernel.py: provides helper functions to:
  - configure the kernel using kunitconfig.
  - build the kernel with the appropriate configuration.
  - provide function to invoke the kernel and stream the output back.

Signed-off-by: Felix Guo <felixguoxiuping@gmail.com>
Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
---
 tools/testing/kunit/.gitignore      |   3 +
 tools/testing/kunit/kunit_config.py |  60 +++++++++++++
 tools/testing/kunit/kunit_kernel.py | 126 ++++++++++++++++++++++++++++
 3 files changed, 189 insertions(+)
 create mode 100644 tools/testing/kunit/.gitignore
 create mode 100644 tools/testing/kunit/kunit_config.py
 create mode 100644 tools/testing/kunit/kunit_kernel.py

Comments

Kieran Bingham Nov. 29, 2018, 1:54 p.m. UTC | #1
Hi Brendan,

Thanks again for this series!

On 28/11/2018 19:36, Brendan Higgins wrote:
> The ultimate goal is to create minimal isolated test binaries; in the
> meantime we are using UML to provide the infrastructure to run tests, so
> define an abstract way to configure and run tests that allow us to
> change the context in which tests are built without affecting the user.
> This also makes pretty and dynamic error reporting, and a lot of other
> nice features easier.


I wonder if we could somehow generate a shared library object
'libkernel' or 'libumlinux' from a UM configured set of headers and
objects so that we could create binary targets directly ?


> kunit_config.py:
>   - parse .config and Kconfig files.
> 
> kunit_kernel.py: provides helper functions to:
>   - configure the kernel using kunitconfig.
>   - build the kernel with the appropriate configuration.
>   - provide function to invoke the kernel and stream the output back.
> 
> Signed-off-by: Felix Guo <felixguoxiuping@gmail.com>
> Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
> ---
>  tools/testing/kunit/.gitignore      |   3 +
>  tools/testing/kunit/kunit_config.py |  60 +++++++++++++
>  tools/testing/kunit/kunit_kernel.py | 126 ++++++++++++++++++++++++++++
>  3 files changed, 189 insertions(+)
>  create mode 100644 tools/testing/kunit/.gitignore
>  create mode 100644 tools/testing/kunit/kunit_config.py
>  create mode 100644 tools/testing/kunit/kunit_kernel.py
> 
> diff --git a/tools/testing/kunit/.gitignore b/tools/testing/kunit/.gitignore
> new file mode 100644
> index 0000000000000..c791ff59a37a9
> --- /dev/null
> +++ b/tools/testing/kunit/.gitignore
> @@ -0,0 +1,3 @@
> +# Byte-compiled / optimized / DLL files
> +__pycache__/
> +*.py[cod]
> \ No newline at end of file
> diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py
> new file mode 100644
> index 0000000000000..183bd5e758762
> --- /dev/null
> +++ b/tools/testing/kunit/kunit_config.py
> @@ -0,0 +1,60 @@
> +# SPDX-License-Identifier: GPL-2.0
> +
> +import collections
> +import re
> +
> +CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_\w+ is not set$'
> +CONFIG_PATTERN = r'^CONFIG_\w+=\S+$'
> +
> +KconfigEntryBase = collections.namedtuple('KconfigEntry', ['raw_entry'])
> +
> +
> +class KconfigEntry(KconfigEntryBase):
> +
> +	def __str__(self) -> str:
> +		return self.raw_entry
> +
> +
> +class KconfigParseError(Exception):
> +	"""Error parsing Kconfig defconfig or .config."""
> +
> +
> +class Kconfig(object):
> +	"""Represents defconfig or .config specified using the Kconfig language."""
> +
> +	def __init__(self):
> +		self._entries = []
> +
> +	def entries(self):
> +		return set(self._entries)
> +
> +	def add_entry(self, entry: KconfigEntry) -> None:
> +		self._entries.append(entry)
> +
> +	def is_subset_of(self, other: "Kconfig") -> bool:
> +		return self.entries().issubset(other.entries())
> +
> +	def write_to_file(self, path: str) -> None:
> +		with open(path, 'w') as f:
> +			for entry in self.entries():
> +				f.write(str(entry) + '\n')
> +
> +	def parse_from_string(self, blob: str) -> None:
> +		"""Parses a string containing KconfigEntrys and populates this Kconfig."""
> +		self._entries = []
> +		is_not_set_matcher = re.compile(CONFIG_IS_NOT_SET_PATTERN)
> +		config_matcher = re.compile(CONFIG_PATTERN)
> +		for line in blob.split('\n'):
> +			line = line.strip()
> +			if not line:
> +				continue
> +			elif config_matcher.match(line) or is_not_set_matcher.match(line):
> +				self._entries.append(KconfigEntry(line))
> +			elif line[0] == '#':
> +				continue
> +			else:
> +				raise KconfigParseError('Failed to parse: ' + line)
> +
> +	def read_from_file(self, path: str) -> None:
> +		with open(path, 'r') as f:
> +			self.parse_from_string(f.read())
> diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
> new file mode 100644
> index 0000000000000..bba7ea7ca1869
> --- /dev/null
> +++ b/tools/testing/kunit/kunit_kernel.py
> @@ -0,0 +1,126 @@
> +# SPDX-License-Identifier: GPL-2.0
> +
> +import logging
> +import subprocess
> +import os
> +
> +import kunit_config
> +
> +KCONFIG_PATH = '.config'
> +
> +class ConfigError(Exception):
> +	"""Represents an error trying to configure the Linux kernel."""
> +
> +
> +class BuildError(Exception):
> +	"""Represents an error trying to build the Linux kernel."""
> +
> +
> +class LinuxSourceTreeOperations(object):
> +	"""An abstraction over command line operations performed on a source tree."""
> +
> +	def make_mrproper(self):
> +		try:
> +			subprocess.check_output(['make', 'mrproper'])
> +		except OSError as e:
> +			raise ConfigError('Could not call make command: ' + e)
> +		except subprocess.CalledProcessError as e:
> +			raise ConfigError(e.output)
> +
> +	def make_olddefconfig(self):
> +		try:
> +			subprocess.check_output(['make', 'ARCH=um', 'olddefconfig'])
> +		except OSError as e:
> +			raise ConfigError('Could not call make command: ' + e)
> +		except subprocess.CalledProcessError as e:
> +			raise ConfigError(e.output)
> +
> +	def make(self, jobs):
> +		try:
> +			subprocess.check_output([
> +					'make',
> +					'ARCH=um',
> +					'--jobs=' + str(jobs)])

Perhaps as a future extension:

It would be nice if we could set an O= here to keep the source tree
pristine.

In fact I might even suggest that this should always be set so that the
unittesting could live along side an existing kernel build? :

 O ?= $KBUILD_SRC/
 O := $(O)/kunittest/$(ARCH)/build


> +		except OSError as e:
> +			raise BuildError('Could not call execute make: ' + e)
> +		except subprocess.CalledProcessError as e:
> +			raise BuildError(e.output)
> +
> +	def linux_bin(self, params, timeout):
> +		"""Runs the Linux UML binary. Must be named 'linux'."""
> +		process = subprocess.Popen(
> +			['./linux'] + params,
> +			stdin=subprocess.PIPE,
> +			stdout=subprocess.PIPE,
> +			stderr=subprocess.PIPE)
> +		process.wait(timeout=timeout)
> +		return process
> +
> +
> +class LinuxSourceTree(object):
> +	"""Represents a Linux kernel source tree with KUnit tests."""
> +
> +	def __init__(self):
> +		self._kconfig = kunit_config.Kconfig()
> +		self._kconfig.read_from_file('kunitconfig')
> +		self._ops = LinuxSourceTreeOperations()
> +
> +	def clean(self):
> +		try:
> +			self._ops.make_mrproper()
> +		except ConfigError as e:
> +			logging.error(e)
> +			return False
> +		return True
> +
> +	def build_config(self):
> +		self._kconfig.write_to_file(KCONFIG_PATH)
> +		try:
> +			self._ops.make_olddefconfig()
> +		except ConfigError as e:
> +			logging.error(e)
> +			return False
> +		validated_kconfig = kunit_config.Kconfig()
> +		validated_kconfig.read_from_file(KCONFIG_PATH)
> +		if not self._kconfig.is_subset_of(validated_kconfig):
> +			logging.error('Provided Kconfig is not contained in validated .config!')
> +			return False
> +		return True
> +
> +	def build_reconfig(self):
> +		"""Creates a new .config if it is not a subset of the kunitconfig."""
> +		if os.path.exists(KCONFIG_PATH):
> +			existing_kconfig = kunit_config.Kconfig()
> +			existing_kconfig.read_from_file(KCONFIG_PATH)
> +			if not self._kconfig.is_subset_of(existing_kconfig):
> +				print('Regenerating .config ...')
> +				os.remove(KCONFIG_PATH)
> +				return self.build_config()
> +			else:
> +				return True
> +		else:
> +			print('Generating .config ...')
> +			return self.build_config()
> +
> +	def build_um_kernel(self, jobs):
> +		try:
> +			self._ops.make_olddefconfig()
> +			self._ops.make(jobs)
> +		except (ConfigError, BuildError) as e:
> +			logging.error(e)
> +			return False
> +		used_kconfig = kunit_config.Kconfig()
> +		used_kconfig.read_from_file(KCONFIG_PATH)
> +		if not self._kconfig.is_subset_of(used_kconfig):
> +			logging.error('Provided Kconfig is not contained in final config!')
> +			return False
> +		return True
> +
> +	def run_kernel(self, args=[]):
> +		timeout = None
> +		args.extend(['mem=256M'])
> +		process = self._ops.linux_bin(args, timeout)
> +		with open('test.log', 'w') as f:
> +			for line in process.stdout:
> +				f.write(line.rstrip().decode('ascii') + '\n')
> +				yield line.rstrip().decode('ascii')
>
Luis Chamberlain Nov. 30, 2018, 3:44 a.m. UTC | #2
On Wed, Nov 28, 2018 at 11:36:28AM -0800, Brendan Higgins wrote:
> The ultimate goal is to create minimal isolated test binaries; in the
> meantime we are using UML to provide the infrastructure to run tests, so
> define an abstract way to configure and run tests that allow us to
> change the context in which tests are built without affecting the user.
> This also makes pretty and dynamic error reporting, and a lot of other
> nice features easier.
> 
> kunit_config.py:
>   - parse .config and Kconfig files.
>
> 
> kunit_kernel.py: provides helper functions to:
>   - configure the kernel using kunitconfig.

We get the tools to run the config stuff, build, etc, but not a top
level 'make kunitconfig' or whatever. We have things like 'make
kvmconfig' and 'make xenconfig', I think it would be reasonable to
add similar for this.

  Luis
Brendan Higgins Dec. 3, 2018, 11:48 p.m. UTC | #3
On Thu, Nov 29, 2018 at 5:54 AM Kieran Bingham
<kieran.bingham@ideasonboard.com> wrote:
>
> Hi Brendan,
>
> Thanks again for this series!
>
> On 28/11/2018 19:36, Brendan Higgins wrote:
> > The ultimate goal is to create minimal isolated test binaries; in the
> > meantime we are using UML to provide the infrastructure to run tests, so
> > define an abstract way to configure and run tests that allow us to
> > change the context in which tests are built without affecting the user.
> > This also makes pretty and dynamic error reporting, and a lot of other
> > nice features easier.
>
>
> I wonder if we could somehow generate a shared library object
> 'libkernel' or 'libumlinux' from a UM configured set of headers and
> objects so that we could create binary targets directly ?

That's an interesting idea. I think it would be difficult to figure
out exactly where to draw the line of what goes in there and what
needs to be built specific to a test a priori. Of course, that leads
into the biggest problem in general, needed to know what I need to
build to test the thing that I want to test.

Nevertheless, I could definitely imagine that being useful in a lot of cases.

> > diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
> > new file mode 100644
> > index 0000000000000..bba7ea7ca1869
> > --- /dev/null
> > +++ b/tools/testing/kunit/kunit_kernel.py
...
> > +     def make(self, jobs):
> > +             try:
> > +                     subprocess.check_output([
> > +                                     'make',
> > +                                     'ARCH=um',
> > +                                     '--jobs=' + str(jobs)])
>
> Perhaps as a future extension:
>
> It would be nice if we could set an O= here to keep the source tree
> pristine.
>
> In fact I might even suggest that this should always be set so that the
> unittesting could live along side an existing kernel build? :
>
>  O ?= $KBUILD_SRC/
>  O := $(O)/kunittest/$(ARCH)/build

I agree with that. It would be pretty annoying to run a unit test and
have it mess up your .config and force you to rebuild everything else.
(I have actually done this to myself a couple of times...)

Cheers
Brendan Higgins Dec. 3, 2018, 11:50 p.m. UTC | #4
On Thu, Nov 29, 2018 at 7:44 PM Luis Chamberlain <mcgrof@kernel.org> wrote:
>
> On Wed, Nov 28, 2018 at 11:36:28AM -0800, Brendan Higgins wrote:
> > The ultimate goal is to create minimal isolated test binaries; in the
> > meantime we are using UML to provide the infrastructure to run tests, so
> > define an abstract way to configure and run tests that allow us to
> > change the context in which tests are built without affecting the user.
> > This also makes pretty and dynamic error reporting, and a lot of other
> > nice features easier.
> >
> > kunit_config.py:
> >   - parse .config and Kconfig files.
> >
> >
> > kunit_kernel.py: provides helper functions to:
> >   - configure the kernel using kunitconfig.
>
> We get the tools to run the config stuff, build, etc, but not a top
> level 'make kunitconfig' or whatever. We have things like 'make
> kvmconfig' and 'make xenconfig', I think it would be reasonable to
> add similar for this.

Are you just asking for a defconfig for KUnit, or are you asking for a
way to run KUnit from make?
Luis Chamberlain Dec. 4, 2018, 8:47 p.m. UTC | #5
On Mon, Dec 03, 2018 at 03:48:15PM -0800, Brendan Higgins wrote:
> On Thu, Nov 29, 2018 at 5:54 AM Kieran Bingham
> <kieran.bingham@ideasonboard.com> wrote:
> >
> > Hi Brendan,
> >
> > Thanks again for this series!
> >
> > On 28/11/2018 19:36, Brendan Higgins wrote:
> > > The ultimate goal is to create minimal isolated test binaries; in the
> > > meantime we are using UML to provide the infrastructure to run tests, so
> > > define an abstract way to configure and run tests that allow us to
> > > change the context in which tests are built without affecting the user.
> > > This also makes pretty and dynamic error reporting, and a lot of other
> > > nice features easier.
> >
> >
> > I wonder if we could somehow generate a shared library object
> > 'libkernel' or 'libumlinux' from a UM configured set of headers and
> > objects so that we could create binary targets directly ?
> 
> That's an interesting idea. I think it would be difficult to figure
> out exactly where to draw the line of what goes in there and what
> needs to be built specific to a test a priori. Of course, that leads
> into the biggest problem in general, needed to know what I need to
> build to test the thing that I want to test.
> 
> Nevertheless, I could definitely imagine that being useful in a lot of cases.

Whether or not we can abstract away the kernel into such a mechanism
with uml libraries is a good question worth exploring.

Developers working upstream do modify their kernels a lot, so we'd have
to update such libraries quite a bit, but I think that's fine too. The
*real* value I think from the above suggestion would be enterprise /
mobile distros or stable kernel maintainers which have a static kernel
they need to support for a relatively *long time*, consider a 10 year
time frame. Running unit tests without qemu with uml and libraries for
respective kernels seems real worthy.

The overhead for testing a unit test for said targets, *ideally*, would
just be to to reboot into the system with such libraries available, a
unit test would just look for the respective uname -r library and mimic
that kernel, much the same way enterprise distributions today rely on
having debugging symbols available to run against crash / gdb. Having
debug modules / kernel for crash requires such effort already, so this
would just be an extra layer of other prospect tests.

All ideaware for now, but the roadmap seems to be paving itself.

  Luis
Luis Chamberlain Dec. 4, 2018, 8:48 p.m. UTC | #6
On Mon, Dec 03, 2018 at 03:50:48PM -0800, Brendan Higgins wrote:
> On Thu, Nov 29, 2018 at 7:44 PM Luis Chamberlain <mcgrof@kernel.org> wrote:
> >
> > On Wed, Nov 28, 2018 at 11:36:28AM -0800, Brendan Higgins wrote:
> > > The ultimate goal is to create minimal isolated test binaries; in the
> > > meantime we are using UML to provide the infrastructure to run tests, so
> > > define an abstract way to configure and run tests that allow us to
> > > change the context in which tests are built without affecting the user.
> > > This also makes pretty and dynamic error reporting, and a lot of other
> > > nice features easier.
> > >
> > > kunit_config.py:
> > >   - parse .config and Kconfig files.
> > >
> > >
> > > kunit_kernel.py: provides helper functions to:
> > >   - configure the kernel using kunitconfig.
> >
> > We get the tools to run the config stuff, build, etc, but not a top
> > level 'make kunitconfig' or whatever. We have things like 'make
> > kvmconfig' and 'make xenconfig', I think it would be reasonable to
> > add similar for this.
> 
> Are you just asking for a defconfig for KUnit, or are you asking for a
> way to run KUnit from make?

At least the first. The later seems intrusive as a top level Makefile
thing.

  Luis
Kieran Bingham Dec. 6, 2018, 12:32 p.m. UTC | #7
Hi Luis,

On 04/12/2018 20:47, Luis Chamberlain wrote:
> On Mon, Dec 03, 2018 at 03:48:15PM -0800, Brendan Higgins wrote:
>> On Thu, Nov 29, 2018 at 5:54 AM Kieran Bingham
>> <kieran.bingham@ideasonboard.com> wrote:
>>>
>>> Hi Brendan,
>>>
>>> Thanks again for this series!
>>>
>>> On 28/11/2018 19:36, Brendan Higgins wrote:
>>>> The ultimate goal is to create minimal isolated test binaries; in the
>>>> meantime we are using UML to provide the infrastructure to run tests, so
>>>> define an abstract way to configure and run tests that allow us to
>>>> change the context in which tests are built without affecting the user.
>>>> This also makes pretty and dynamic error reporting, and a lot of other
>>>> nice features easier.
>>>
>>>
>>> I wonder if we could somehow generate a shared library object
>>> 'libkernel' or 'libumlinux' from a UM configured set of headers and
>>> objects so that we could create binary targets directly ?
>>
>> That's an interesting idea. I think it would be difficult to figure
>> out exactly where to draw the line of what goes in there and what
>> needs to be built specific to a test a priori. Of course, that leads
>> into the biggest problem in general, needed to know what I need to
>> build to test the thing that I want to test.
>>
>> Nevertheless, I could definitely imagine that being useful in a lot of cases.
> 
> Whether or not we can abstract away the kernel into such a mechanism
> with uml libraries is a good question worth exploring.
> 
> Developers working upstream do modify their kernels a lot, so we'd have
> to update such libraries quite a bit, but I think that's fine too. The
> *real* value I think from the above suggestion would be enterprise /
> mobile distros or stable kernel maintainers which have a static kernel
> they need to support for a relatively *long time*, consider a 10 year
> time frame. Running unit tests without qemu with uml and libraries for
> respective kernels seems real worthy.


I think any such library might be something generated by the kernel
build system, so if someone makes substantial changes to a core
component provided by the library - it can be up to them to build a
corresponding userspace library as well.

We could also consider to only provide *static* libraries rather than
dynamic. So any one building some userspace tool / test with this would
be required to compile against (the version of) the kernel they expect
perhaps... - much like we expect modules to be compiled currently.

And then the userspace binary would be sufficiently able to live it's
life on it's own :)


> The overhead for testing a unit test for said targets, *ideally*, would
> just be to to reboot into the system with such libraries available, a
> unit test would just look for the respective uname -r library and mimic
> that kernel, much the same way enterprise distributions today rely on
> having debugging symbols available to run against crash / gdb. Having
> debug modules / kernel for crash requires such effort already, so this
> would just be an extra layer of other prospect tests.

Oh - although, yes - there are some good concepts there - but I'm a bit
weary of how easy it would be to 'run' the said test against multiple
kernel version libraries... there would be a lot of possible ABI
conflicts perhaps.

My main initial idea for a libumlinux is to provide infrastructure such
as our linked-lists and other kernel formatting so that we can take
kernel code directly to userspace for test and debug (assuming that
there are no hardware dependencies or things that we can't mock out)


I think all of this could complement kunit of course - this isn't
suggesting an alternative implementation :-)


> All ideaware for now, but the roadmap seems to be paving itself.

I guess all great ideas start as ideaware somehow ...

Now we just have to start the race to see who can tweak the kernel build
system to produce an output library first :)

 (I won't be upset if I don't win the race)
Matthew Wilcox Dec. 6, 2018, 3:37 p.m. UTC | #8
On Thu, Dec 06, 2018 at 12:32:47PM +0000, Kieran Bingham wrote:
> On 04/12/2018 20:47, Luis Chamberlain wrote:
> > On Mon, Dec 03, 2018 at 03:48:15PM -0800, Brendan Higgins wrote:
> >> On Thu, Nov 29, 2018 at 5:54 AM Kieran Bingham
> >> <kieran.bingham@ideasonboard.com> wrote:
> >>>
> >>> Hi Brendan,
> >>>
> >>> Thanks again for this series!
> >>>
> >>> On 28/11/2018 19:36, Brendan Higgins wrote:
> >>>> The ultimate goal is to create minimal isolated test binaries; in the
> >>>> meantime we are using UML to provide the infrastructure to run tests, so
> >>>> define an abstract way to configure and run tests that allow us to
> >>>> change the context in which tests are built without affecting the user.
> >>>> This also makes pretty and dynamic error reporting, and a lot of other
> >>>> nice features easier.
> >>>
> >>>
> >>> I wonder if we could somehow generate a shared library object
> >>> 'libkernel' or 'libumlinux' from a UM configured set of headers and
> >>> objects so that we could create binary targets directly ?
> >>
> >> That's an interesting idea. I think it would be difficult to figure
> >> out exactly where to draw the line of what goes in there and what
> >> needs to be built specific to a test a priori. Of course, that leads
> >> into the biggest problem in general, needed to know what I need to
> >> build to test the thing that I want to test.
> >>
> >> Nevertheless, I could definitely imagine that being useful in a lot of cases.
> > 
> > Whether or not we can abstract away the kernel into such a mechanism
> > with uml libraries is a good question worth exploring.
> > 
> > Developers working upstream do modify their kernels a lot, so we'd have
> > to update such libraries quite a bit, but I think that's fine too. The
> > *real* value I think from the above suggestion would be enterprise /
> > mobile distros or stable kernel maintainers which have a static kernel
> > they need to support for a relatively *long time*, consider a 10 year
> > time frame. Running unit tests without qemu with uml and libraries for
> > respective kernels seems real worthy.
> 
> I think any such library might be something generated by the kernel
> build system, so if someone makes substantial changes to a core
> component provided by the library - it can be up to them to build a
> corresponding userspace library as well.
> 
> We could also consider to only provide *static* libraries rather than
> dynamic. So any one building some userspace tool / test with this would
> be required to compile against (the version of) the kernel they expect
> perhaps... - much like we expect modules to be compiled currently.
> 
> And then the userspace binary would be sufficiently able to live it's
> life on it's own :)
> 
> > The overhead for testing a unit test for said targets, *ideally*, would
> > just be to to reboot into the system with such libraries available, a
> > unit test would just look for the respective uname -r library and mimic
> > that kernel, much the same way enterprise distributions today rely on
> > having debugging symbols available to run against crash / gdb. Having
> > debug modules / kernel for crash requires such effort already, so this
> > would just be an extra layer of other prospect tests.
> 
> Oh - although, yes - there are some good concepts there - but I'm a bit
> weary of how easy it would be to 'run' the said test against multiple
> kernel version libraries... there would be a lot of possible ABI
> conflicts perhaps.
> 
> My main initial idea for a libumlinux is to provide infrastructure such
> as our linked-lists and other kernel formatting so that we can take
> kernel code directly to userspace for test and debug (assuming that
> there are no hardware dependencies or things that we can't mock out)
> 
> I think all of this could complement kunit of course - this isn't
> suggesting an alternative implementation :-)

I suspect the reason Luis cc'd me on this is that we already have some
artisinally-crafted userspace kernel-mocking interfaces under tools/.
The tools/testing/radix-tree directory is the source of some of this,
but I've been moving pieces out into tools/ more generally where it
makes sense to.

We have liburcu already, which is good.  The main sticking points are:

 - No emulation of kernel thread interfaces
 - The kernel does not provide the ability to aggressively fail memory
   allocations (which is useful when trying to exercise the memory failure
   paths).
 - printk has started adding a lot of %pX enhancements which printf
   obviously doesn't know about.
 - No global pseudo-random number generator in the kernel.  Probably
   we should steal the i915 one.

I know Dan Williams has also done a lot of working mocking kernel
interfaces for libnvdimm.
Luis Chamberlain Dec. 7, 2018, 1:05 a.m. UTC | #9
On Thu, Dec 06, 2018 at 12:32:47PM +0000, Kieran Bingham wrote:
> My main initial idea for a libumlinux is to provide infrastructure such
> as our linked-lists and other kernel formatting so that we can take
> kernel code directly to userspace for test and debug (assuming that
> there are no hardware dependencies or things that we can't mock out)

The tools/ directory already does this for a tons of things. Its where
I ended up placing some API I tested a long time ago when I wanted to
test it in userspace, and provide the unit test in userspace (for my
linker table patches).

> Now we just have to start the race to see who can tweak the kernel build
> system to produce an output library first :)

Should be relatively easy if the tools directory used. Yes, there is
an inherent risk of duplication, but that was decided long ago.

  Luis
Kieran Bingham Dec. 7, 2018, 11:30 a.m. UTC | #10
Hi Matthew,

On 06/12/2018 15:37, Matthew Wilcox wrote:
> On Thu, Dec 06, 2018 at 12:32:47PM +0000, Kieran Bingham wrote:
>> On 04/12/2018 20:47, Luis Chamberlain wrote:
>>> On Mon, Dec 03, 2018 at 03:48:15PM -0800, Brendan Higgins wrote:
>>>> On Thu, Nov 29, 2018 at 5:54 AM Kieran Bingham
>>>> <kieran.bingham@ideasonboard.com> wrote:
>>>>>
>>>>> Hi Brendan,
>>>>>
>>>>> Thanks again for this series!
>>>>>
>>>>> On 28/11/2018 19:36, Brendan Higgins wrote:
>>>>>> The ultimate goal is to create minimal isolated test binaries; in the
>>>>>> meantime we are using UML to provide the infrastructure to run tests, so
>>>>>> define an abstract way to configure and run tests that allow us to
>>>>>> change the context in which tests are built without affecting the user.
>>>>>> This also makes pretty and dynamic error reporting, and a lot of other
>>>>>> nice features easier.
>>>>>
>>>>>
>>>>> I wonder if we could somehow generate a shared library object
>>>>> 'libkernel' or 'libumlinux' from a UM configured set of headers and
>>>>> objects so that we could create binary targets directly ?
>>>>
>>>> That's an interesting idea. I think it would be difficult to figure
>>>> out exactly where to draw the line of what goes in there and what
>>>> needs to be built specific to a test a priori. Of course, that leads
>>>> into the biggest problem in general, needed to know what I need to
>>>> build to test the thing that I want to test.
>>>>
>>>> Nevertheless, I could definitely imagine that being useful in a lot of cases.
>>>
>>> Whether or not we can abstract away the kernel into such a mechanism
>>> with uml libraries is a good question worth exploring.
>>>
>>> Developers working upstream do modify their kernels a lot, so we'd have
>>> to update such libraries quite a bit, but I think that's fine too. The
>>> *real* value I think from the above suggestion would be enterprise /
>>> mobile distros or stable kernel maintainers which have a static kernel
>>> they need to support for a relatively *long time*, consider a 10 year
>>> time frame. Running unit tests without qemu with uml and libraries for
>>> respective kernels seems real worthy.
>>
>> I think any such library might be something generated by the kernel
>> build system, so if someone makes substantial changes to a core
>> component provided by the library - it can be up to them to build a
>> corresponding userspace library as well.
>>
>> We could also consider to only provide *static* libraries rather than
>> dynamic. So any one building some userspace tool / test with this would
>> be required to compile against (the version of) the kernel they expect
>> perhaps... - much like we expect modules to be compiled currently.
>>
>> And then the userspace binary would be sufficiently able to live it's
>> life on it's own :)
>>
>>> The overhead for testing a unit test for said targets, *ideally*, would
>>> just be to to reboot into the system with such libraries available, a
>>> unit test would just look for the respective uname -r library and mimic
>>> that kernel, much the same way enterprise distributions today rely on
>>> having debugging symbols available to run against crash / gdb. Having
>>> debug modules / kernel for crash requires such effort already, so this
>>> would just be an extra layer of other prospect tests.
>>
>> Oh - although, yes - there are some good concepts there - but I'm a bit
>> weary of how easy it would be to 'run' the said test against multiple
>> kernel version libraries... there would be a lot of possible ABI
>> conflicts perhaps.
>>
>> My main initial idea for a libumlinux is to provide infrastructure such
>> as our linked-lists and other kernel formatting so that we can take
>> kernel code directly to userspace for test and debug (assuming that
>> there are no hardware dependencies or things that we can't mock out)
>>
>> I think all of this could complement kunit of course - this isn't
>> suggesting an alternative implementation :-)
> 
> I suspect the reason Luis cc'd me on this is that we already have some
> artisinally-crafted userspace kernel-mocking interfaces under tools/.

Aha - excellent - I had hoped to grab you at Plumbers to talk about
this, after hearing you mention something at your Xarray talk - but
didn't seem to find a suitable time.

> The tools/testing/radix-tree directory is the source of some of this,
> but I've been moving pieces out into tools/ more generally where it
> makes sense to.

Sounds like we already have a starting point then.


> We have liburcu already, which is good.  The main sticking points are:
> 
>  - No emulation of kernel thread interfaces

Scheduling finesse aside, This shouldn't be too hard to emulate/wrap
with pthreads?


>  - The kernel does not provide the ability to aggressively fail memory
>    allocations (which is useful when trying to exercise the memory failure
>    paths).

Fault injection throughout would certainly be a valuable addition to any
unit-testing.

Wrapping tests into a single userspace binary could facilitate further
memory leak checking or other valgrind facilities too.



>  - printk has started adding a lot of %pX enhancements which printf
>    obviously doesn't know about.

Wrapping through User-mode linux essentially provides this already
though. In fact I guess that goes for the thread interfaces topic above too.


>  - No global pseudo-random number generator in the kernel.  Probably
>    we should steal the i915 one.
> 
> I know Dan Williams has also done a lot of working mocking kernel
> interfaces for libnvdimm.


Thanks for the references - more to investigate.
Kent Overstreet Dec. 7, 2018, 6:35 p.m. UTC | #11
On Thu, Dec 06, 2018 at 12:32:47PM +0000, Kieran Bingham wrote:
> Oh - although, yes - there are some good concepts there - but I'm a bit
> weary of how easy it would be to 'run' the said test against multiple
> kernel version libraries... there would be a lot of possible ABI
> conflicts perhaps.
> 
> My main initial idea for a libumlinux is to provide infrastructure such
> as our linked-lists and other kernel formatting so that we can take
> kernel code directly to userspace for test and debug (assuming that
> there are no hardware dependencies or things that we can't mock out)

I think this would be a really wonderful to make happen, and could potentially
be much wore widely useful than for just running tests, by making it easier to
share code between both kernel and userspace.

For bcachefs I've got a shim layer that lets me build almost everything in
fs/bcachefs and use it as a library in the userspace bcachefs-tools - e.g. for
fsck and migrate. Mine was a quick and dirty hack, but even so it's been
_extremely_ useful and a major success - I think if this became something more
official a lot of uses would be found for it.

I'm not sure if you've actually started on this (haven't seen most of the thread
yet), but if any of the bcachefs-tools shim code is useful feel free to steal it
- I've got dirt-simple, minimum viable shims for the kthread api, workqueus,
timers, the block layer, and assorted other stuff:

https://evilpiepirate.org/git/bcachefs-tools.git/

Going forward, one issue is going to be that a libumllinux is going to want to
shim some interfaces, and for other things it'll just want to pull in the kernel
implementation - e.g. rhashtables. It might be nice if we could refactor things
a bit so that things like rhashtables could be built as a standalone library, as
is.
Petr Mladek Dec. 11, 2018, 2:09 p.m. UTC | #12
On Thu 2018-12-06 07:37:18, Matthew Wilcox wrote:
> On Thu, Dec 06, 2018 at 12:32:47PM +0000, Kieran Bingham wrote:
> > On 04/12/2018 20:47, Luis Chamberlain wrote:
> > > On Mon, Dec 03, 2018 at 03:48:15PM -0800, Brendan Higgins wrote:
> > >> On Thu, Nov 29, 2018 at 5:54 AM Kieran Bingham
> > >> <kieran.bingham@ideasonboard.com> wrote:
> > > Developers working upstream do modify their kernels a lot, so we'd have
> > > to update such libraries quite a bit, but I think that's fine too. The
> > > *real* value I think from the above suggestion would be enterprise /
> > > mobile distros or stable kernel maintainers which have a static kernel
> > > they need to support for a relatively *long time*, consider a 10 year
> > > time frame. Running unit tests without qemu with uml and libraries for
> > > respective kernels seems real worthy.
> > 
> > I think any such library might be something generated by the kernel
> > build system, so if someone makes substantial changes to a core
> > component provided by the library - it can be up to them to build a
> > corresponding userspace library as well.
> > 
> > My main initial idea for a libumlinux is to provide infrastructure such
> > as our linked-lists and other kernel formatting so that we can take
> > kernel code directly to userspace for test and debug (assuming that
> > there are no hardware dependencies or things that we can't mock out)
> 
> We have liburcu already, which is good.  The main sticking points are:
> 
>  - printk has started adding a lot of %pX enhancements which printf
>    obviously doesn't know about.

I wonder how big problem it is and if it is worth using another
approach.

An alternative would be to replace them with helper functions
the would produce the same string. The meaning would be easier
to understand. But concatenating with the surrounding text
would be less elegant. People might start using pr_cont()
that is problematic (mixed lines).

Also the %pX formats are mostly used to print context of some
structures. Even the helper functions would need some maintenance
to keep them compatible.

BTW: The printk() feature has been introduced 10 years ago by
the commit 4d8a743cdd2690c0bc8 ("vsprintf: add infrastructure
support for extended '%p' specifiers").

Best Regards,
Petr
Steven Rostedt Dec. 11, 2018, 2:41 p.m. UTC | #13
On Tue, 11 Dec 2018 15:09:26 +0100
Petr Mladek <pmladek@suse.com> wrote:

> > We have liburcu already, which is good.  The main sticking points are:
> > 
> >  - printk has started adding a lot of %pX enhancements which printf
> >    obviously doesn't know about.  
> 
> I wonder how big problem it is and if it is worth using another
> approach.

No, please do not change the %pX approach.

> 
> An alternative would be to replace them with helper functions
> the would produce the same string. The meaning would be easier
> to understand. But concatenating with the surrounding text
> would be less elegant. People might start using pr_cont()
> that is problematic (mixed lines).
> 
> Also the %pX formats are mostly used to print context of some
> structures. Even the helper functions would need some maintenance
> to keep them compatible.
> 
> BTW: The printk() feature has been introduced 10 years ago by
> the commit 4d8a743cdd2690c0bc8 ("vsprintf: add infrastructure
> support for extended '%p' specifiers").

trace-cmd and perf know about most of the %pX data and how to read it.
Perhaps we can extend the libtraceevent library to export a generic way
to read data from printk() output for other tools to use.

-- Steve
Anton Ivanov Dec. 11, 2018, 5:01 p.m. UTC | #14
On 12/11/18 2:41 PM, Steven Rostedt wrote:
> On Tue, 11 Dec 2018 15:09:26 +0100
> Petr Mladek <pmladek@suse.com> wrote:
>
>>> We have liburcu already, which is good.  The main sticking points are:
>>>
>>>   - printk has started adding a lot of %pX enhancements which printf
>>>     obviously doesn't know about.
>> I wonder how big problem it is and if it is worth using another
>> approach.
> No, please do not change the %pX approach.
>
>> An alternative would be to replace them with helper functions
>> the would produce the same string. The meaning would be easier
>> to understand. But concatenating with the surrounding text
>> would be less elegant. People might start using pr_cont()
>> that is problematic (mixed lines).
>>
>> Also the %pX formats are mostly used to print context of some
>> structures. Even the helper functions would need some maintenance
>> to keep them compatible.
>>
>> BTW: The printk() feature has been introduced 10 years ago by
>> the commit 4d8a743cdd2690c0bc8 ("vsprintf: add infrastructure
>> support for extended '%p' specifiers").
> trace-cmd and perf know about most of the %pX data and how to read it.
> Perhaps we can extend the libtraceevent library to export a generic way
> to read data from printk() output for other tools to use.

Going back for a second to using UML for this. UML console at present is 
interrupt driven - it emulates serial IO using several different 
back-ends (file descriptors, xterm or actual tty/ptys). Epoll events on 
the host side are used to trigger the UML interrupts - both read and write.

This works OK for normal use, but may result in all kinds of interesting 
false positives/false negatives when UML is used to run unit tests 
against a change which changes interrupt behavior.

IMO it may be useful to consider some alternatives specifically for unit 
test coverage purposes where printk and/or the whole console output 
altogether bypass some of the IRQ driven semantics.
Brendan Higgins Feb. 9, 2019, 12:40 a.m. UTC | #15
On Tue, Dec 11, 2018 at 9:02 AM Anton Ivanov
<anton.ivanov@cambridgegreys.com> wrote:
>
>
> On 12/11/18 2:41 PM, Steven Rostedt wrote:
> > On Tue, 11 Dec 2018 15:09:26 +0100
> > Petr Mladek <pmladek@suse.com> wrote:
> >
> >>> We have liburcu already, which is good.  The main sticking points are:
> >>>
> >>>   - printk has started adding a lot of %pX enhancements which printf
> >>>     obviously doesn't know about.
> >> I wonder how big problem it is and if it is worth using another
> >> approach.
> > No, please do not change the %pX approach.
> >
> >> An alternative would be to replace them with helper functions
> >> the would produce the same string. The meaning would be easier
> >> to understand. But concatenating with the surrounding text
> >> would be less elegant. People might start using pr_cont()
> >> that is problematic (mixed lines).
> >>
> >> Also the %pX formats are mostly used to print context of some
> >> structures. Even the helper functions would need some maintenance
> >> to keep them compatible.
> >>
> >> BTW: The printk() feature has been introduced 10 years ago by
> >> the commit 4d8a743cdd2690c0bc8 ("vsprintf: add infrastructure
> >> support for extended '%p' specifiers").
> > trace-cmd and perf know about most of the %pX data and how to read it.
> > Perhaps we can extend the libtraceevent library to export a generic way
> > to read data from printk() output for other tools to use.
>
> Going back for a second to using UML for this. UML console at present is
> interrupt driven - it emulates serial IO using several different
> back-ends (file descriptors, xterm or actual tty/ptys). Epoll events on
> the host side are used to trigger the UML interrupts - both read and write.
>
> This works OK for normal use, but may result in all kinds of interesting
> false positives/false negatives when UML is used to run unit tests
> against a change which changes interrupt behavior.
>
> IMO it may be useful to consider some alternatives specifically for unit
> test coverage purposes where printk and/or the whole console output
> altogether bypass some of the IRQ driven semantics.

Whoops, sorry, didn't see your comment before I went on vacation.

I completely agree. It is also annoying when trying to test other
really low level parts of the kernel. I would really like to get KUnit
to the point where it does not have any dependencies on anything in
the kernel, but that is very challenging for many reasons. This
loosely relates to what Luis, myself, and others have talked about in
other threads about having a stricter notion of code dependencies in
the kernel. Thinking about it now, I suspect it might be easier to
limit KUnit's dependency on kernel infrastructure first; that could
kind of motivate the later work.
diff mbox series

Patch

diff --git a/tools/testing/kunit/.gitignore b/tools/testing/kunit/.gitignore
new file mode 100644
index 0000000000000..c791ff59a37a9
--- /dev/null
+++ b/tools/testing/kunit/.gitignore
@@ -0,0 +1,3 @@ 
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
\ No newline at end of file
diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py
new file mode 100644
index 0000000000000..183bd5e758762
--- /dev/null
+++ b/tools/testing/kunit/kunit_config.py
@@ -0,0 +1,60 @@ 
+# SPDX-License-Identifier: GPL-2.0
+
+import collections
+import re
+
+CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_\w+ is not set$'
+CONFIG_PATTERN = r'^CONFIG_\w+=\S+$'
+
+KconfigEntryBase = collections.namedtuple('KconfigEntry', ['raw_entry'])
+
+
+class KconfigEntry(KconfigEntryBase):
+
+	def __str__(self) -> str:
+		return self.raw_entry
+
+
+class KconfigParseError(Exception):
+	"""Error parsing Kconfig defconfig or .config."""
+
+
+class Kconfig(object):
+	"""Represents defconfig or .config specified using the Kconfig language."""
+
+	def __init__(self):
+		self._entries = []
+
+	def entries(self):
+		return set(self._entries)
+
+	def add_entry(self, entry: KconfigEntry) -> None:
+		self._entries.append(entry)
+
+	def is_subset_of(self, other: "Kconfig") -> bool:
+		return self.entries().issubset(other.entries())
+
+	def write_to_file(self, path: str) -> None:
+		with open(path, 'w') as f:
+			for entry in self.entries():
+				f.write(str(entry) + '\n')
+
+	def parse_from_string(self, blob: str) -> None:
+		"""Parses a string containing KconfigEntrys and populates this Kconfig."""
+		self._entries = []
+		is_not_set_matcher = re.compile(CONFIG_IS_NOT_SET_PATTERN)
+		config_matcher = re.compile(CONFIG_PATTERN)
+		for line in blob.split('\n'):
+			line = line.strip()
+			if not line:
+				continue
+			elif config_matcher.match(line) or is_not_set_matcher.match(line):
+				self._entries.append(KconfigEntry(line))
+			elif line[0] == '#':
+				continue
+			else:
+				raise KconfigParseError('Failed to parse: ' + line)
+
+	def read_from_file(self, path: str) -> None:
+		with open(path, 'r') as f:
+			self.parse_from_string(f.read())
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
new file mode 100644
index 0000000000000..bba7ea7ca1869
--- /dev/null
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -0,0 +1,126 @@ 
+# SPDX-License-Identifier: GPL-2.0
+
+import logging
+import subprocess
+import os
+
+import kunit_config
+
+KCONFIG_PATH = '.config'
+
+class ConfigError(Exception):
+	"""Represents an error trying to configure the Linux kernel."""
+
+
+class BuildError(Exception):
+	"""Represents an error trying to build the Linux kernel."""
+
+
+class LinuxSourceTreeOperations(object):
+	"""An abstraction over command line operations performed on a source tree."""
+
+	def make_mrproper(self):
+		try:
+			subprocess.check_output(['make', 'mrproper'])
+		except OSError as e:
+			raise ConfigError('Could not call make command: ' + e)
+		except subprocess.CalledProcessError as e:
+			raise ConfigError(e.output)
+
+	def make_olddefconfig(self):
+		try:
+			subprocess.check_output(['make', 'ARCH=um', 'olddefconfig'])
+		except OSError as e:
+			raise ConfigError('Could not call make command: ' + e)
+		except subprocess.CalledProcessError as e:
+			raise ConfigError(e.output)
+
+	def make(self, jobs):
+		try:
+			subprocess.check_output([
+					'make',
+					'ARCH=um',
+					'--jobs=' + str(jobs)])
+		except OSError as e:
+			raise BuildError('Could not call execute make: ' + e)
+		except subprocess.CalledProcessError as e:
+			raise BuildError(e.output)
+
+	def linux_bin(self, params, timeout):
+		"""Runs the Linux UML binary. Must be named 'linux'."""
+		process = subprocess.Popen(
+			['./linux'] + params,
+			stdin=subprocess.PIPE,
+			stdout=subprocess.PIPE,
+			stderr=subprocess.PIPE)
+		process.wait(timeout=timeout)
+		return process
+
+
+class LinuxSourceTree(object):
+	"""Represents a Linux kernel source tree with KUnit tests."""
+
+	def __init__(self):
+		self._kconfig = kunit_config.Kconfig()
+		self._kconfig.read_from_file('kunitconfig')
+		self._ops = LinuxSourceTreeOperations()
+
+	def clean(self):
+		try:
+			self._ops.make_mrproper()
+		except ConfigError as e:
+			logging.error(e)
+			return False
+		return True
+
+	def build_config(self):
+		self._kconfig.write_to_file(KCONFIG_PATH)
+		try:
+			self._ops.make_olddefconfig()
+		except ConfigError as e:
+			logging.error(e)
+			return False
+		validated_kconfig = kunit_config.Kconfig()
+		validated_kconfig.read_from_file(KCONFIG_PATH)
+		if not self._kconfig.is_subset_of(validated_kconfig):
+			logging.error('Provided Kconfig is not contained in validated .config!')
+			return False
+		return True
+
+	def build_reconfig(self):
+		"""Creates a new .config if it is not a subset of the kunitconfig."""
+		if os.path.exists(KCONFIG_PATH):
+			existing_kconfig = kunit_config.Kconfig()
+			existing_kconfig.read_from_file(KCONFIG_PATH)
+			if not self._kconfig.is_subset_of(existing_kconfig):
+				print('Regenerating .config ...')
+				os.remove(KCONFIG_PATH)
+				return self.build_config()
+			else:
+				return True
+		else:
+			print('Generating .config ...')
+			return self.build_config()
+
+	def build_um_kernel(self, jobs):
+		try:
+			self._ops.make_olddefconfig()
+			self._ops.make(jobs)
+		except (ConfigError, BuildError) as e:
+			logging.error(e)
+			return False
+		used_kconfig = kunit_config.Kconfig()
+		used_kconfig.read_from_file(KCONFIG_PATH)
+		if not self._kconfig.is_subset_of(used_kconfig):
+			logging.error('Provided Kconfig is not contained in final config!')
+			return False
+		return True
+
+	def run_kernel(self, args=[]):
+		timeout = None
+		args.extend(['mem=256M'])
+		process = self._ops.linux_bin(args, timeout)
+		with open('test.log', 'w') as f:
+			for line in process.stdout:
+				f.write(line.rstrip().decode('ascii') + '\n')
+				yield line.rstrip().decode('ascii')