diff mbox series

[5/5] libkmod: Use kernel decompression when available

Message ID 20230601224001.23397-6-lucas.de.marchi@gmail.com (mailing list archive)
State New, archived
Headers show
Series libkmod: Use kernel decompression support | expand

Commit Message

Lucas De Marchi June 1, 2023, 10:40 p.m. UTC
With the recent changes to bypass loading the file it's possible to
reduce the work in userspace and delegating it to the kernel. Without
any compression to illustrate:

Before:
	read(3, "\177ELF\2\1", 6)               = 6
	lseek(3, 0, SEEK_SET)                   = 0
	newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=238592, ...}, AT_EMPTY_PATH) = 0
	mmap(NULL, 238592, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fd85cbd1000
	finit_module(3, "", 0)                  = 0
	munmap(0x7fd85cbd1000, 238592)          = 0
	close(3)                                = 0

After:
	read(3, "\177ELF\2\1", 6)               = 6
	lseek(3, 0, SEEK_SET)                   = 0
	finit_module(3, "", 0)                  = 0
	close(3)                                = 0

When using kernel compression now it's also possible to direct libkmod
to take the finit_module() path, avoiding the decompression in userspace
and just delegating it to the kernel.

Before:
	read(3, "(\265/\375\244\0", 6)          = 6
	lseek(3, 0, SEEK_SET)                   = 0
	read(3, "(\265/\375\244", 5)            = 5
	mmap(NULL, 135168, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3fa431e000
	read(3, "\0\244\3\0\\y\6", 7)           = 7
	mmap(NULL, 372736, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3fa414f000
	brk(0x55944c6a1000)                     = 0x55944c6a1000
	read(3, "\356|\6G\27U\20 \312\260s\211\335\333\263\326\330\336\273O\211\356\306K\360Z\341\374U6\342\221"..., 53038) = 53038
	mremap(0x7f3fa431e000, 135168, 266240, MREMAP_MAYMOVE) = 0x7f3fa410e000
	read(3, ",;\3\nqf\311\362\325\211\7\341\375A\355\221\371L\\\5\7\375 \32\246<(\258=K\304"..., 20851) = 20851
	mremap(0x7f3fa410e000, 266240, 397312, MREMAP_MAYMOVE) = 0x7f3fa40ad000
	read(3, ")\36\250\213", 4)              = 4
	read(3, "", 4)                          = 0
	munmap(0x7f3fa414f000, 372736)          = 0
	init_module(0x7f3fa40ad010, 238592, "") = 0
	munmap(0x7f3fa40ad000, 397312)          = 0
	close(3)                                = 0

After:
	read(3, "(\265/\375\244P", 6)           = 6
	lseek(3, 0, SEEK_SET)                   = 0
	finit_module(3, "", 0x4 /* MODULE_INIT_??? */) = 0
	close(3)                                = 0

Signed-off-by: Lucas De Marchi <lucas.de.marchi@gmail.com>
---
 libkmod/libkmod-file.c     |  4 ++--
 libkmod/libkmod-internal.h |  3 ++-
 libkmod/libkmod-module.c   | 17 +++++++++++++----
 libkmod/libkmod.c          |  5 +++++
 4 files changed, 22 insertions(+), 7 deletions(-)

Comments

Luis Chamberlain June 6, 2023, 6:38 p.m. UTC | #1
On Thu, Jun 01, 2023 at 03:40:01PM -0700, Lucas De Marchi wrote:
> With the recent changes to bypass loading the file it's possible to
> reduce the work in userspace and delegating it to the kernel. Without
> any compression to illustrate:
> 
> Before:
> 	read(3, "\177ELF\2\1", 6)               = 6
> 	lseek(3, 0, SEEK_SET)                   = 0
> 	newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=238592, ...}, AT_EMPTY_PATH) = 0
> 	mmap(NULL, 238592, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fd85cbd1000
> 	finit_module(3, "", 0)                  = 0
> 	munmap(0x7fd85cbd1000, 238592)          = 0
> 	close(3)                                = 0
> 
> After:
> 	read(3, "\177ELF\2\1", 6)               = 6
> 	lseek(3, 0, SEEK_SET)                   = 0
> 	finit_module(3, "", 0)                  = 0
> 	close(3)                                = 0

It's not clear to me how the patches did the above, in particular
avoiding the newfstatat() for the non-decompression use case.

  Luis
Lucas De Marchi June 6, 2023, 7:01 p.m. UTC | #2
On Tue, Jun 06, 2023 at 11:38:35AM -0700, Luis Chamberlain wrote:
>On Thu, Jun 01, 2023 at 03:40:01PM -0700, Lucas De Marchi wrote:
>> With the recent changes to bypass loading the file it's possible to
>> reduce the work in userspace and delegating it to the kernel. Without
>> any compression to illustrate:
>>
>> Before:
>> 	read(3, "\177ELF\2\1", 6)               = 6
>> 	lseek(3, 0, SEEK_SET)                   = 0
>> 	newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=238592, ...}, AT_EMPTY_PATH) = 0
>> 	mmap(NULL, 238592, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fd85cbd1000
>> 	finit_module(3, "", 0)                  = 0
>> 	munmap(0x7fd85cbd1000, 238592)          = 0
>> 	close(3)                                = 0
>>
>> After:
>> 	read(3, "\177ELF\2\1", 6)               = 6
>> 	lseek(3, 0, SEEK_SET)                   = 0
>> 	finit_module(3, "", 0)                  = 0
>> 	close(3)                                = 0
>
>It's not clear to me how the patches did the above, in particular
>avoiding the newfstatat() for the non-decompression use case.

because now we are not taking the path to mmap the file anymore.
 From load_reg():

	if (fstat(file->fd, &st) < 0)
		return -errno;

	file->size = st.st_size;
	file->memory = mmap(NULL, file->size, PROT_READ, MAP_PRIVATE,
			    file->fd, 0);

from STAT(2):

	The  underlying  system  call  employed  by the glibc fstatat() wrapper
	function is actually called  fstatat64()  or,  on  some  architectures,
	newfstatat().

With load_reg() not being called anymore, these 2 syscalls are gone.

We still read the header (first 6 bytes as per above), to make sure we select
the right handler for the compression method. In the case above it was uncompressed
("\177ELF\2\1"), so we lseek() and give it to the kernel. If it was
a compression algo matching the one in use by the kernel, we would just add
the compression flag and do the same thing.
If it was a different compression type, then we'd fallback to the
previous handling with mmap() + decompression in usersapce +
init_module().


Lucas De Marchi

>
>  Luis
diff mbox series

Patch

diff --git a/libkmod/libkmod-file.c b/libkmod/libkmod-file.c
index 705770a..ffdda92 100644
--- a/libkmod/libkmod-file.c
+++ b/libkmod/libkmod-file.c
@@ -513,9 +513,9 @@  off_t kmod_file_get_size(const struct kmod_file *file)
 	return file->size;
 }
 
-bool kmod_file_get_direct(const struct kmod_file *file)
+enum kmod_file_compression_type kmod_file_get_compression(const struct kmod_file *file)
 {
-	return file->compression == KMOD_FILE_COMPRESSION_NONE;
+	return file->compression;
 }
 
 int kmod_file_get_fd(const struct kmod_file *file)
diff --git a/libkmod/libkmod-internal.h b/libkmod/libkmod-internal.h
index 7b8a158..edb4eac 100644
--- a/libkmod/libkmod-internal.h
+++ b/libkmod/libkmod-internal.h
@@ -112,6 +112,7 @@  void kmod_pool_add_module(struct kmod_ctx *ctx, struct kmod_module *mod, const c
 void kmod_pool_del_module(struct kmod_ctx *ctx, struct kmod_module *mod, const char *key) __attribute__((nonnull(1, 2, 3)));
 
 const struct kmod_config *kmod_get_config(const struct kmod_ctx *ctx) __attribute__((nonnull(1)));
+enum kmod_file_compression_type kmod_get_kernel_compression(const struct kmod_ctx *ctx) __attribute__((nonnull(1)));
 
 /* libkmod-config.c */
 struct kmod_config_path {
@@ -162,7 +163,7 @@  struct kmod_elf *kmod_file_get_elf(struct kmod_file *file) __attribute__((nonnul
 int kmod_file_load_contents(struct kmod_file *file) _must_check_ __attribute__((nonnull(1)));
 void *kmod_file_get_contents(const struct kmod_file *file) _must_check_ __attribute__((nonnull(1)));
 off_t kmod_file_get_size(const struct kmod_file *file) _must_check_ __attribute__((nonnull(1)));
-bool kmod_file_get_direct(const struct kmod_file *file) _must_check_ __attribute__((nonnull(1)));
+enum kmod_file_compression_type kmod_file_get_compression(const struct kmod_file *file) _must_check_ __attribute__((nonnull(1)));
 int kmod_file_get_fd(const struct kmod_file *file) _must_check_ __attribute__((nonnull(1)));
 void kmod_file_unref(struct kmod_file *file) __attribute__((nonnull(1)));
 
diff --git a/libkmod/libkmod-module.c b/libkmod/libkmod-module.c
index 6ed5ad4..a9e1be8 100644
--- a/libkmod/libkmod-module.c
+++ b/libkmod/libkmod-module.c
@@ -864,15 +864,24 @@  extern long init_module(const void *mem, unsigned long len, const char *args);
 static int do_finit_module(struct kmod_module *mod, unsigned int flags,
 			   const char *args)
 {
+	enum kmod_file_compression_type compression, kernel_compression;
 	unsigned int kernel_flags = 0;
 	int err;
 
 	/*
-	 * Re-use ENOSYS, returned when there is no such syscall, so the
-	 * fallback to init_module applies
+	 * When module is not compressed or its compression type matches the
+	 * one in use by the kernel, there is no need to read the file
+	 * in userspace. Otherwise, re-use ENOSYS to trigger the same fallback
+	 * as when finit_module() is not supported.
 	 */
-	if (!kmod_file_get_direct(mod->file))
-		return -ENOSYS;
+	compression = kmod_file_get_compression(mod->file);
+	kernel_compression = kmod_get_kernel_compression(mod->ctx);
+	if (!(compression == KMOD_FILE_COMPRESSION_NONE ||
+	      compression == kernel_compression))
+		return ENOSYS;
+
+	if (compression != KMOD_FILE_COMPRESSION_NONE)
+		kernel_flags |= MODULE_INIT_COMPRESSED_FILE;
 
 	if (flags & KMOD_INSERT_FORCE_VERMAGIC)
 		kernel_flags |= MODULE_INIT_IGNORE_VERMAGIC;
diff --git a/libkmod/libkmod.c b/libkmod/libkmod.c
index 103469e..1b8773c 100644
--- a/libkmod/libkmod.c
+++ b/libkmod/libkmod.c
@@ -1016,3 +1016,8 @@  const struct kmod_config *kmod_get_config(const struct kmod_ctx *ctx)
 {
 	return ctx->config;
 }
+
+enum kmod_file_compression_type kmod_get_kernel_compression(const struct kmod_ctx *ctx)
+{
+	return ctx->kernel_compression;
+}