diff mbox series

[bpf-next,1/3] libbpf: Implement basic zip archive parsing support

Message ID 20230217191908.1000004-2-deso@posteo.net (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series libbpf: Make uprobe attachment APK aware | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 8 maintainers not CCed: john.fastabend@gmail.com sdf@google.com jolsa@kernel.org song@kernel.org martin.lau@linux.dev haoluo@google.com yhs@fb.com kpsingh@kernel.org
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning CHECK: Prefer using the BIT macro WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: braces {} are not necessary for single statement blocks WARNING: do not add new typedefs WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns WARNING: line length of 94 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns WARNING: line length of 97 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns WARNING: line length of 99 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-7 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-17 fail Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32 on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-37 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-36 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_progs_parallel on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 fail Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on s390x with gcc

Commit Message

Daniel Müller Feb. 17, 2023, 7:19 p.m. UTC
This change implements support for reading zip archives, including
opening an archive, finding an entry based on its path and name in it,
and closing it.
The code was copied from https://github.com/iovisor/bcc/pull/4440, which
implements similar functionality for bcc. The author confirmed that he
is fine with this usage and the corresponding relicensing. I adjusted it
to adhere to libbpf coding standards.

Signed-off-by: Daniel Müller <deso@posteo.net>
---
 tools/lib/bpf/Build |   2 +-
 tools/lib/bpf/zip.c | 378 ++++++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/zip.h |  47 ++++++
 3 files changed, 426 insertions(+), 1 deletion(-)
 create mode 100644 tools/lib/bpf/zip.c
 create mode 100644 tools/lib/bpf/zip.h

Comments

Andrii Nakryiko Feb. 18, 2023, 12:10 a.m. UTC | #1
On Fri, Feb 17, 2023 at 11:19 AM Daniel Müller <deso@posteo.net> wrote:
>
> This change implements support for reading zip archives, including
> opening an archive, finding an entry based on its path and name in it,
> and closing it.
> The code was copied from https://github.com/iovisor/bcc/pull/4440, which
> implements similar functionality for bcc. The author confirmed that he
> is fine with this usage and the corresponding relicensing. I adjusted it
> to adhere to libbpf coding standards.

Let's record this with

Acked-by: Michał Gregorczyk <michalgr@meta.com>

tag added next to your Signed-off-by. I've added Michal to cc for visibility.

>
> Signed-off-by: Daniel Müller <deso@posteo.net>
> ---
>  tools/lib/bpf/Build |   2 +-
>  tools/lib/bpf/zip.c | 378 ++++++++++++++++++++++++++++++++++++++++++++
>  tools/lib/bpf/zip.h |  47 ++++++
>  3 files changed, 426 insertions(+), 1 deletion(-)
>  create mode 100644 tools/lib/bpf/zip.c
>  create mode 100644 tools/lib/bpf/zip.h
>
> diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
> index 5a3dfb..b8b0a63 100644
> --- a/tools/lib/bpf/Build
> +++ b/tools/lib/bpf/Build
> @@ -1,4 +1,4 @@
>  libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
>             netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
>             btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
> -           usdt.o
> +           usdt.o zip.o
> diff --git a/tools/lib/bpf/zip.c b/tools/lib/bpf/zip.c
> new file mode 100644
> index 0000000..59ec79
> --- /dev/null
> +++ b/tools/lib/bpf/zip.c
> @@ -0,0 +1,378 @@
> +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
> +/*
> + * Routines for dealing with .zip archives.
> + *
> + * Copyright (c) Meta Platforms, Inc. and affiliates.
> + */
> +
> +#include <fcntl.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <sys/mman.h>
> +#include <unistd.h>
> +
> +#include "zip.h"
> +
> +/* Specification of ZIP file format can be found here:
> + * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
> + * For a high level overview of the structure of a ZIP file see
> + * sections 4.3.1 - 4.3.6.
> + *
> + * Data structures appearing in ZIP files do not contain any
> + * padding and they might be misaligned. To allow us to safely
> + * operate on pointers to such structures and their members, without
> + * worrying of platform specific alignment issues, we define
> + * unaligned_uint16_t and unaligned_uint32_t types with no alignment
> + * requirements.
> + */
> +typedef struct {
> +       uint8_t raw[2];
> +} unaligned_uint16_t;
> +
> +static uint16_t unaligned_uint16_read(unaligned_uint16_t value)
> +{
> +       uint16_t return_value;
> +
> +       memcpy(&return_value, value.raw, sizeof(return_value));
> +       return return_value;
> +}
> +
> +typedef struct {
> +       uint8_t raw[4];
> +} unaligned_uint32_t;
> +
> +static uint32_t unaligned_uint32_read(unaligned_uint32_t value)
> +{
> +       uint32_t return_value;
> +
> +       memcpy(&return_value, value.raw, sizeof(return_value));
> +       return return_value;
> +}

it feels like an overkill to use this unaligned_xxx_t approach. Let's
use __attribute__((packed)) on corresponding structs and just
uint16_t/uint32_t field types.

> +
> +#define END_OF_CD_RECORD_MAGIC 0x06054b50
> +
> +/* See section 4.3.16 of the spec. */
> +struct end_of_central_directory_record {
> +       /* Magic value equal to END_OF_CD_RECORD_MAGIC */
> +       unaligned_uint32_t magic;
> +
> +       /* Number of the file containing this structure or 0xFFFF if ZIP64 archive.
> +        * Zip archive might span multiple files (disks).
> +        */
> +       unaligned_uint16_t this_disk;
> +
> +       /* Number of the file containing the beginning of the central directory or
> +        * 0xFFFF if ZIP64 archive.
> +        */
> +       unaligned_uint16_t cd_disk;
> +
> +       /* Number of central directory records on this disk or 0xFFFF if ZIP64
> +        * archive.
> +        */
> +       unaligned_uint16_t cd_records;
> +
> +       /* Number of central directory records on all disks or 0xFFFF if ZIP64
> +        * archive.
> +        */
> +       unaligned_uint16_t cd_records_total;
> +
> +       /* Size of the central directory recrod or 0xFFFFFFFF if ZIP64 archive. */

typo: record

> +       unaligned_uint32_t cd_size;
> +
> +       /* Offset of the central directory from the beginning of the archive or
> +        * 0xFFFFFFFF if ZIP64 archive.
> +        */
> +       unaligned_uint32_t cd_offset;
> +
> +       /* Length of comment data following end of central driectory record. */

typo: directory


> +       unaligned_uint16_t comment_length;
> +
> +       /* Up to 64k of arbitrary bytes. */
> +       /* uint8_t comment[comment_length] */
> +};
> +
> +#define CD_FILE_HEADER_MAGIC 0x02014b50
> +#define FLAG_ENCRYPTED (1 << 0)
> +#define FLAG_HAS_DATA_DESCRIPTOR (1 << 3)
> +
> +/* See section 4.3.12 of the spec. */
> +struct central_directory_file_header {


naming nit here and below: we use CD in constants, but spell out
"central_directory", which feels a bit mouthful. Let's use "cd"
consistently throughout (even if it's a bit confusing with "change
directory", but I think in the context of parsing ZIP contents should
be recognizable enough)

> +       /* Magic value equal to CD_FILE_HEADER_MAGIC. */
> +       unaligned_uint32_t magic;
> +       unaligned_uint16_t version;
> +       /* Minimum zip version needed to extract the file. */
> +       unaligned_uint16_t min_version;
> +       unaligned_uint16_t flags;
> +       unaligned_uint16_t compression;
> +       unaligned_uint16_t last_modified_time;
> +       unaligned_uint16_t last_modified_date;
> +       unaligned_uint32_t crc;
> +       unaligned_uint32_t compressed_size;
> +       unaligned_uint32_t uncompressed_size;
> +       unaligned_uint16_t file_name_length;
> +       unaligned_uint16_t extra_field_length;
> +       unaligned_uint16_t file_comment_length;
> +       /* Number of the disk where the file starts or 0xFFFF if ZIP64 archive. */
> +       unaligned_uint16_t disk;
> +       unaligned_uint16_t internal_attributes;
> +       unaligned_uint32_t external_attributes;
> +       /* Offset from the start of the disk containing the local file header to the
> +        * start of the local file header.
> +        */
> +       unaligned_uint32_t offset;
> +};
> +
> +#define LOCAL_FILE_HEADER_MAGIC 0x04034b50
> +
> +/* See section 4.3.7 of the spec. */
> +struct local_file_header {
> +       /* Magic value equal to LOCAL_FILE_HEADER_MAGIC. */
> +       unaligned_uint32_t magic;
> +       /* Minimum zip version needed to extract the file. */
> +       unaligned_uint16_t min_version;
> +       unaligned_uint16_t flags;
> +       unaligned_uint16_t compression;
> +       unaligned_uint16_t last_modified_time;
> +       unaligned_uint16_t last_modified_date;
> +       unaligned_uint32_t crc;
> +       unaligned_uint32_t compressed_size;
> +       unaligned_uint32_t uncompressed_size;
> +       unaligned_uint16_t file_name_length;
> +       unaligned_uint16_t extra_field_length;
> +};
> +
> +struct zip_archive {
> +       void *data;
> +       uint32_t size;
> +       uint32_t cd_offset;
> +       uint32_t cd_records;
> +};
> +

$ rg -w 'uint\d\d_t' | wc -l
21
$ rg -w '__u\d\d' | wc -l
873

seems like we overwhelmingly prefer __u32/__u16 in libbpf code base,
let's use those instead?

> +static void *check_access(struct zip_archive *archive, uint32_t offset, uint32_t size)
> +{
> +       if (offset + size > archive->size || offset > offset + size) {
> +               return NULL;
> +       }
> +       return archive->data + offset;
> +}
> +
> +/* Returns 0 on success, -1 on error and -2 if the eocd indicates

let's use -EINVAL and -ENOTSUP instead of -1 and -2

> + * the archive uses features which are not supported.
> + */
> +static int try_parse_end_of_central_directory(struct zip_archive *archive, uint32_t offset)
> +{
> +       struct end_of_central_directory_record *eocd =
> +               check_access(archive, offset, sizeof(struct end_of_central_directory_record));
> +       uint16_t comment_length, cd_records;
> +       uint32_t cd_offset, cd_size;
> +
> +       if (!eocd || unaligned_uint32_read(eocd->magic) != END_OF_CD_RECORD_MAGIC) {
> +               return -1;
> +       }
> +
> +       comment_length = unaligned_uint16_read(eocd->comment_length);
> +       if (offset + sizeof(struct end_of_central_directory_record) + comment_length !=
> +           archive->size) {
> +               return -1;
> +       }
> +
> +       cd_records = unaligned_uint16_read(eocd->cd_records);
> +       if (unaligned_uint16_read(eocd->this_disk) != 0 ||
> +           unaligned_uint16_read(eocd->cd_disk) != 0 ||
> +           unaligned_uint16_read(eocd->cd_records_total) != cd_records) {
> +               /* This is a valid eocd, but we only support single-file non-ZIP64 archives. */
> +               return -2;
> +       }
> +
> +       cd_offset = unaligned_uint32_read(eocd->cd_offset);
> +       cd_size = unaligned_uint32_read(eocd->cd_size);
> +       if (!check_access(archive, cd_offset, cd_size)) {
> +               return -1;
> +       }
> +
> +       archive->cd_offset = cd_offset;
> +       archive->cd_records = cd_records;
> +       return 0;
> +}
> +
> +static int find_central_directory(struct zip_archive *archive)
> +{
> +       uint32_t offset;
> +       int64_t limit;
> +       int rc = -1;
> +
> +       if (archive->size <= sizeof(struct end_of_central_directory_record)) {
> +               return -1;
> +       }
> +
> +       /* Because the end of central directory ends with a variable length array of
> +        * up to 0xFFFF bytes we can't know exactly where it starts and need to
> +        * search for it at the end of the file, scanning the (limit, offset] range.
> +        */
> +       offset = archive->size - sizeof(struct end_of_central_directory_record);
> +       limit = (int64_t)offset - (1 << 16);
> +
> +       for (; offset >= 0 && offset > limit && rc == -1; offset--) {
> +               rc = try_parse_end_of_central_directory(archive, offset);
> +       }
> +
> +       return rc;
> +}
> +
> +struct zip_archive *zip_archive_open(const char *path)
> +{
> +       struct zip_archive *archive;
> +       int fd = open(path, O_RDONLY);

let's not do complicated operations during variable initialization,
let's just init fd right before checking it for <0

> +       off_t size;
> +       void *data;
> +
> +       if (fd < 0) {
> +               return NULL;
> +       }

no {} for single-line if statement

> +
> +       size = lseek(fd, 0, SEEK_END);
> +       if (size == (off_t)-1 || size > UINT32_MAX) {
> +               close(fd);
> +               return NULL;
> +       }
> +
> +       data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
> +       close(fd);
> +
> +       if (data == MAP_FAILED) {
> +               return NULL;
> +       }

ditto about {}

> +
> +       archive = malloc(sizeof(struct zip_archive));
> +       if (!archive) {
> +               munmap(data, size);
> +               return NULL;
> +       };
> +
> +       archive->data = data;
> +       archive->size = size;
> +       if (find_central_directory(archive)) {
> +               munmap(data, size);
> +               free(archive);
> +               archive = NULL;

return NULL; ?

> +       }
> +
> +       return archive;
> +}
> +
> +void zip_archive_close(struct zip_archive *archive)
> +{
> +       munmap(archive->data, archive->size);
> +       free(archive);
> +}
> +
> +static struct local_file_header *local_file_header_at_offset(struct zip_archive *archive,
> +                                                            uint32_t offset)

is there a non-local file in ZIP archive? Seems a bit too verbose,
just "file_header_at_offset"? and seeing "get_entry_at_offset" below,
should it be "get_file_header_at_offset" then?

> +{
> +       struct local_file_header *lfh =
> +               check_access(archive, offset, sizeof(struct local_file_header));

empty line between variables and code

> +       if (!lfh || unaligned_uint32_read(lfh->magic) != LOCAL_FILE_HEADER_MAGIC) {
> +               return NULL;
> +       }

here and below, same {} issue

> +       return lfh;
> +}
> +
> +static int get_entry_at_offset(struct zip_archive *archive, uint32_t offset, struct zip_entry *out)
> +{
> +       struct local_file_header *lfh = local_file_header_at_offset(archive, offset);

same as above, let's not do extensive operations during variable
initialization, please split

> +       uint16_t flags, name_length, extra_field_length;
> +       uint32_t compressed_size;
> +       const char *name;
> +       void *data;
> +
> +       offset += sizeof(struct local_file_header);

logically this probably should happen after we checked lfh for NULL?

> +       if (!lfh) {
> +               return -1;

here and everywhere, let's use kernel error code constants, e.g., -EINVAL

> +       };
> +
> +       flags = unaligned_uint16_read(lfh->flags);
> +       if ((flags & FLAG_ENCRYPTED) || (flags & FLAG_HAS_DATA_DESCRIPTOR)) {
> +               return -1;
> +       }
> +
> +       name_length = unaligned_uint16_read(lfh->file_name_length);
> +       name = check_access(archive, offset, name_length);
> +       offset += name_length;
> +       if (!name) {
> +               return -1;
> +       }
> +
> +       extra_field_length = unaligned_uint16_read(lfh->extra_field_length);
> +       if (!check_access(archive, offset, extra_field_length)) {
> +               return -1;
> +       }
> +       offset += extra_field_length;
> +
> +       compressed_size = unaligned_uint32_read(lfh->compressed_size);

why not fill out out->data_length instead of all these local variables?

> +       data = check_access(archive, offset, compressed_size);
> +       if (!data) {
> +               return -1;
> +       }
> +
> +       out->compression = unaligned_uint16_read(lfh->compression);
> +       out->name_length = name_length;
> +       out->name = name;
> +       out->data = data;
> +       out->data_length = compressed_size;
> +       out->data_offset = offset;
> +
> +       return 0;
> +}
> +
> +static struct central_directory_file_header *cd_file_header_at_offset(struct zip_archive *archive,
> +                                                                     uint32_t offset)

this function is called just once below, why a helper?

> +{
> +       struct central_directory_file_header *cdfh =
> +               check_access(archive, offset, sizeof(struct central_directory_file_header));

empty line

> +       if (!cdfh || unaligned_uint32_read(cdfh->magic) != CD_FILE_HEADER_MAGIC) {
> +               return NULL;
> +       }
> +       return cdfh;
> +}
> +
> +int zip_archive_find_entry(struct zip_archive *archive, const char *file_name,
> +                          struct zip_entry *out)
> +{
> +       size_t file_name_length = strlen(file_name);
> +

no need for empty line, let's keep variable declarations in one block

> +       uint32_t i, offset = archive->cd_offset;
> +
> +       for (i = 0; i < archive->cd_records; ++i) {
> +               struct central_directory_file_header *cdfh =
> +                       cd_file_header_at_offset(archive, offset);
> +               uint16_t cdfh_name_length, cdfh_flags;
> +               const char *cdfh_name;
> +
> +               offset += sizeof(struct central_directory_file_header);

same, logically offset should be updated after we made sure we have
cd_file_header

> +               if (!cdfh) {
> +                       return -1;
> +               }
> +
> +               cdfh_name_length = unaligned_uint16_read(cdfh->file_name_length);
> +               cdfh_name = check_access(archive, offset, cdfh_name_length);
> +               if (!cdfh_name) {
> +                       return -1;
> +               }
> +
> +               cdfh_flags = unaligned_uint16_read(cdfh->flags);
> +               if ((cdfh_flags & FLAG_ENCRYPTED) == 0 &&
> +                   (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 &&
> +                   file_name_length == cdfh_name_length &&
> +                   memcmp(file_name, archive->data + offset, file_name_length) == 0) {
> +                       return get_entry_at_offset(archive, unaligned_uint32_read(cdfh->offset),
> +                                                  out);
> +               }
> +
> +               offset += cdfh_name_length;
> +               offset += unaligned_uint16_read(cdfh->extra_field_length);
> +               offset += unaligned_uint16_read(cdfh->file_comment_length);
> +       }
> +
> +       return -1;
> +}
> diff --git a/tools/lib/bpf/zip.h b/tools/lib/bpf/zip.h
> new file mode 100644
> index 0000000..a9083f
> --- /dev/null
> +++ b/tools/lib/bpf/zip.h
> @@ -0,0 +1,47 @@
> +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
> +
> +#ifndef __LIBBPF_ZIP_H
> +#define __LIBBPF_ZIP_H
> +
> +#include <stdint.h>
> +
> +/* Represents an opened zip archive.

s/opened/open/


> + * Only basic ZIP files are supported, in particular the following are not
> + * supported:
> + * - encryption
> + * - streaming
> + * - multi-part ZIP files
> + * - ZIP64
> + */
> +struct zip_archive;
> +
> +/* Carries information on name, compression method, and data corresponding to a
> + * file in a zip archive.
> + */
> +struct zip_entry {
> +       /* Compression method as defined in pkzip spec. 0 means data is uncompressed. */
> +       uint16_t compression;
> +
> +       /* Non-null terminated name of the file. */
> +       const char *name;
> +       /* Length of the file name. */
> +       uint16_t name_length;
> +
> +       /* Pointer to the file data. */
> +       const void *data;
> +       /* Length of the file data. */
> +       uint32_t data_length;
> +       /* Offset of the file data within the archive. */
> +       uint32_t data_offset;
> +};
> +
> +/* Open a zip archive. Returns NULL in case of an error. */
> +struct zip_archive *zip_archive_open(const char *path);
> +
> +/* Close a zip archive and release resources. */
> +void zip_archive_close(struct zip_archive *archive);
> +
> +/* Look up an entry corresponding to a file in given zip archive. */
> +int zip_archive_find_entry(struct zip_archive *archive, const char *name, struct zip_entry *out);
> +
> +#endif
> --
> 2.30.2
>
Daniel Müller Feb. 21, 2023, 7:20 p.m. UTC | #2
On Fri, Feb 17, 2023 at 04:10:04PM -0800, Andrii Nakryiko wrote:
> On Fri, Feb 17, 2023 at 11:19 AM Daniel Müller <deso@posteo.net> wrote:
> >
> > This change implements support for reading zip archives, including
> > opening an archive, finding an entry based on its path and name in it,
> > and closing it.
> > The code was copied from https://github.com/iovisor/bcc/pull/4440, which
> > implements similar functionality for bcc. The author confirmed that he
> > is fine with this usage and the corresponding relicensing. I adjusted it
> > to adhere to libbpf coding standards.
> 
> Let's record this with
> 
> Acked-by: Michał Gregorczyk <michalgr@meta.com>
> 
> tag added next to your Signed-off-by. I've added Michal to cc for visibility.
> 
> >
> > Signed-off-by: Daniel Müller <deso@posteo.net>
> > ---
> >  tools/lib/bpf/Build |   2 +-
> >  tools/lib/bpf/zip.c | 378 ++++++++++++++++++++++++++++++++++++++++++++
> >  tools/lib/bpf/zip.h |  47 ++++++
> >  3 files changed, 426 insertions(+), 1 deletion(-)
> >  create mode 100644 tools/lib/bpf/zip.c
> >  create mode 100644 tools/lib/bpf/zip.h
> >
> > diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
> > index 5a3dfb..b8b0a63 100644
> > --- a/tools/lib/bpf/Build
> > +++ b/tools/lib/bpf/Build
> > @@ -1,4 +1,4 @@
> >  libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
> >             netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
> >             btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
> > -           usdt.o
> > +           usdt.o zip.o
> > diff --git a/tools/lib/bpf/zip.c b/tools/lib/bpf/zip.c
> > new file mode 100644
> > index 0000000..59ec79
> > --- /dev/null
> > +++ b/tools/lib/bpf/zip.c
> > @@ -0,0 +1,378 @@
> > +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
> > +/*
> > + * Routines for dealing with .zip archives.
> > + *
> > + * Copyright (c) Meta Platforms, Inc. and affiliates.
> > + */
> > +
> > +#include <fcntl.h>
> > +#include <stdint.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <sys/mman.h>
> > +#include <unistd.h>
> > +
> > +#include "zip.h"
> > +
> > +/* Specification of ZIP file format can be found here:
> > + * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
> > + * For a high level overview of the structure of a ZIP file see
> > + * sections 4.3.1 - 4.3.6.
> > + *
> > + * Data structures appearing in ZIP files do not contain any
> > + * padding and they might be misaligned. To allow us to safely
> > + * operate on pointers to such structures and their members, without
> > + * worrying of platform specific alignment issues, we define
> > + * unaligned_uint16_t and unaligned_uint32_t types with no alignment
> > + * requirements.
> > + */
> > +typedef struct {
> > +       uint8_t raw[2];
> > +} unaligned_uint16_t;
> > +
> > +static uint16_t unaligned_uint16_read(unaligned_uint16_t value)
> > +{
> > +       uint16_t return_value;
> > +
> > +       memcpy(&return_value, value.raw, sizeof(return_value));
> > +       return return_value;
> > +}
> > +
> > +typedef struct {
> > +       uint8_t raw[4];
> > +} unaligned_uint32_t;
> > +
> > +static uint32_t unaligned_uint32_read(unaligned_uint32_t value)
> > +{
> > +       uint32_t return_value;
> > +
> > +       memcpy(&return_value, value.raw, sizeof(return_value));
> > +       return return_value;
> > +}
> 
> it feels like an overkill to use this unaligned_xxx_t approach. Let's
> use __attribute__((packed)) on corresponding structs and just
> uint16_t/uint32_t field types.
> 
> > +
> > +#define END_OF_CD_RECORD_MAGIC 0x06054b50
> > +
> > +/* See section 4.3.16 of the spec. */
> > +struct end_of_central_directory_record {
> > +       /* Magic value equal to END_OF_CD_RECORD_MAGIC */
> > +       unaligned_uint32_t magic;
> > +
> > +       /* Number of the file containing this structure or 0xFFFF if ZIP64 archive.
> > +        * Zip archive might span multiple files (disks).
> > +        */
> > +       unaligned_uint16_t this_disk;
> > +
> > +       /* Number of the file containing the beginning of the central directory or
> > +        * 0xFFFF if ZIP64 archive.
> > +        */
> > +       unaligned_uint16_t cd_disk;
> > +
> > +       /* Number of central directory records on this disk or 0xFFFF if ZIP64
> > +        * archive.
> > +        */
> > +       unaligned_uint16_t cd_records;
> > +
> > +       /* Number of central directory records on all disks or 0xFFFF if ZIP64
> > +        * archive.
> > +        */
> > +       unaligned_uint16_t cd_records_total;
> > +
> > +       /* Size of the central directory recrod or 0xFFFFFFFF if ZIP64 archive. */
> 
> typo: record
> 
> > +       unaligned_uint32_t cd_size;
> > +
> > +       /* Offset of the central directory from the beginning of the archive or
> > +        * 0xFFFFFFFF if ZIP64 archive.
> > +        */
> > +       unaligned_uint32_t cd_offset;
> > +
> > +       /* Length of comment data following end of central driectory record. */
> 
> typo: directory
> 
> 
> > +       unaligned_uint16_t comment_length;
> > +
> > +       /* Up to 64k of arbitrary bytes. */
> > +       /* uint8_t comment[comment_length] */
> > +};
> > +
> > +#define CD_FILE_HEADER_MAGIC 0x02014b50
> > +#define FLAG_ENCRYPTED (1 << 0)
> > +#define FLAG_HAS_DATA_DESCRIPTOR (1 << 3)
> > +
> > +/* See section 4.3.12 of the spec. */
> > +struct central_directory_file_header {
> 
> 
> naming nit here and below: we use CD in constants, but spell out
> "central_directory", which feels a bit mouthful. Let's use "cd"
> consistently throughout (even if it's a bit confusing with "change
> directory", but I think in the context of parsing ZIP contents should
> be recognizable enough)

Sure.

> > +       /* Magic value equal to CD_FILE_HEADER_MAGIC. */
> > +       unaligned_uint32_t magic;
> > +       unaligned_uint16_t version;
> > +       /* Minimum zip version needed to extract the file. */
> > +       unaligned_uint16_t min_version;
> > +       unaligned_uint16_t flags;
> > +       unaligned_uint16_t compression;
> > +       unaligned_uint16_t last_modified_time;
> > +       unaligned_uint16_t last_modified_date;
> > +       unaligned_uint32_t crc;
> > +       unaligned_uint32_t compressed_size;
> > +       unaligned_uint32_t uncompressed_size;
> > +       unaligned_uint16_t file_name_length;
> > +       unaligned_uint16_t extra_field_length;
> > +       unaligned_uint16_t file_comment_length;
> > +       /* Number of the disk where the file starts or 0xFFFF if ZIP64 archive. */
> > +       unaligned_uint16_t disk;
> > +       unaligned_uint16_t internal_attributes;
> > +       unaligned_uint32_t external_attributes;
> > +       /* Offset from the start of the disk containing the local file header to the
> > +        * start of the local file header.
> > +        */
> > +       unaligned_uint32_t offset;
> > +};
> > +
> > +#define LOCAL_FILE_HEADER_MAGIC 0x04034b50
> > +
> > +/* See section 4.3.7 of the spec. */
> > +struct local_file_header {
> > +       /* Magic value equal to LOCAL_FILE_HEADER_MAGIC. */
> > +       unaligned_uint32_t magic;
> > +       /* Minimum zip version needed to extract the file. */
> > +       unaligned_uint16_t min_version;
> > +       unaligned_uint16_t flags;
> > +       unaligned_uint16_t compression;
> > +       unaligned_uint16_t last_modified_time;
> > +       unaligned_uint16_t last_modified_date;
> > +       unaligned_uint32_t crc;
> > +       unaligned_uint32_t compressed_size;
> > +       unaligned_uint32_t uncompressed_size;
> > +       unaligned_uint16_t file_name_length;
> > +       unaligned_uint16_t extra_field_length;
> > +};
> > +
> > +struct zip_archive {
> > +       void *data;
> > +       uint32_t size;
> > +       uint32_t cd_offset;
> > +       uint32_t cd_records;
> > +};
> > +
> 
> $ rg -w 'uint\d\d_t' | wc -l
> 21
> $ rg -w '__u\d\d' | wc -l
> 873
> 
> seems like we overwhelmingly prefer __u32/__u16 in libbpf code base,
> let's use those instead?

Sure.

> > +static void *check_access(struct zip_archive *archive, uint32_t offset, uint32_t size)
> > +{
> > +       if (offset + size > archive->size || offset > offset + size) {
> > +               return NULL;
> > +       }
> > +       return archive->data + offset;
> > +}
> > +
> > +/* Returns 0 on success, -1 on error and -2 if the eocd indicates
> 
> let's use -EINVAL and -ENOTSUP instead of -1 and -2

Sure.

> > + * the archive uses features which are not supported.
> > + */
> > +static int try_parse_end_of_central_directory(struct zip_archive *archive, uint32_t offset)
> > +{
> > +       struct end_of_central_directory_record *eocd =
> > +               check_access(archive, offset, sizeof(struct end_of_central_directory_record));
> > +       uint16_t comment_length, cd_records;
> > +       uint32_t cd_offset, cd_size;
> > +
> > +       if (!eocd || unaligned_uint32_read(eocd->magic) != END_OF_CD_RECORD_MAGIC) {
> > +               return -1;
> > +       }
> > +
> > +       comment_length = unaligned_uint16_read(eocd->comment_length);
> > +       if (offset + sizeof(struct end_of_central_directory_record) + comment_length !=
> > +           archive->size) {
> > +               return -1;
> > +       }
> > +
> > +       cd_records = unaligned_uint16_read(eocd->cd_records);
> > +       if (unaligned_uint16_read(eocd->this_disk) != 0 ||
> > +           unaligned_uint16_read(eocd->cd_disk) != 0 ||
> > +           unaligned_uint16_read(eocd->cd_records_total) != cd_records) {
> > +               /* This is a valid eocd, but we only support single-file non-ZIP64 archives. */
> > +               return -2;
> > +       }
> > +
> > +       cd_offset = unaligned_uint32_read(eocd->cd_offset);
> > +       cd_size = unaligned_uint32_read(eocd->cd_size);
> > +       if (!check_access(archive, cd_offset, cd_size)) {
> > +               return -1;
> > +       }
> > +
> > +       archive->cd_offset = cd_offset;
> > +       archive->cd_records = cd_records;
> > +       return 0;
> > +}
> > +
> > +static int find_central_directory(struct zip_archive *archive)
> > +{
> > +       uint32_t offset;
> > +       int64_t limit;
> > +       int rc = -1;
> > +
> > +       if (archive->size <= sizeof(struct end_of_central_directory_record)) {
> > +               return -1;
> > +       }
> > +
> > +       /* Because the end of central directory ends with a variable length array of
> > +        * up to 0xFFFF bytes we can't know exactly where it starts and need to
> > +        * search for it at the end of the file, scanning the (limit, offset] range.
> > +        */
> > +       offset = archive->size - sizeof(struct end_of_central_directory_record);
> > +       limit = (int64_t)offset - (1 << 16);
> > +
> > +       for (; offset >= 0 && offset > limit && rc == -1; offset--) {
> > +               rc = try_parse_end_of_central_directory(archive, offset);
> > +       }
> > +
> > +       return rc;
> > +}
> > +
> > +struct zip_archive *zip_archive_open(const char *path)
> > +{
> > +       struct zip_archive *archive;
> > +       int fd = open(path, O_RDONLY);
> 
> let's not do complicated operations during variable initialization,
> let's just init fd right before checking it for <0
> 
> > +       off_t size;
> > +       void *data;
> > +
> > +       if (fd < 0) {
> > +               return NULL;
> > +       }
> 
> no {} for single-line if statement
> 
> > +
> > +       size = lseek(fd, 0, SEEK_END);
> > +       if (size == (off_t)-1 || size > UINT32_MAX) {
> > +               close(fd);
> > +               return NULL;
> > +       }
> > +
> > +       data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
> > +       close(fd);
> > +
> > +       if (data == MAP_FAILED) {
> > +               return NULL;
> > +       }
> 
> ditto about {}
> 
> > +
> > +       archive = malloc(sizeof(struct zip_archive));
> > +       if (!archive) {
> > +               munmap(data, size);
> > +               return NULL;
> > +       };
> > +
> > +       archive->data = data;
> > +       archive->size = size;
> > +       if (find_central_directory(archive)) {
> > +               munmap(data, size);
> > +               free(archive);
> > +               archive = NULL;
> 
> return NULL; ?
> 
> > +       }
> > +
> > +       return archive;
> > +}
> > +
> > +void zip_archive_close(struct zip_archive *archive)
> > +{
> > +       munmap(archive->data, archive->size);
> > +       free(archive);
> > +}
> > +
> > +static struct local_file_header *local_file_header_at_offset(struct zip_archive *archive,
> > +                                                            uint32_t offset)
> 
> is there a non-local file in ZIP archive? Seems a bit too verbose,
> just "file_header_at_offset"? and seeing "get_entry_at_offset" below,
> should it be "get_file_header_at_offset" then?

It's called that by the standard. See section 4.3.7  Local file header in
https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT

> > +{
> > +       struct local_file_header *lfh =
> > +               check_access(archive, offset, sizeof(struct local_file_header));
> 
> empty line between variables and code
> 
> > +       if (!lfh || unaligned_uint32_read(lfh->magic) != LOCAL_FILE_HEADER_MAGIC) {
> > +               return NULL;
> > +       }
> 
> here and below, same {} issue
> 
> > +       return lfh;
> > +}
> > +
> > +static int get_entry_at_offset(struct zip_archive *archive, uint32_t offset, struct zip_entry *out)
> > +{
> > +       struct local_file_header *lfh = local_file_header_at_offset(archive, offset);
> 
> same as above, let's not do extensive operations during variable
> initialization, please split
> 
> > +       uint16_t flags, name_length, extra_field_length;
> > +       uint32_t compressed_size;
> > +       const char *name;
> > +       void *data;
> > +
> > +       offset += sizeof(struct local_file_header);
> 
> logically this probably should happen after we checked lfh for NULL?

Sure.

> > +       if (!lfh) {
> > +               return -1;
> 
> here and everywhere, let's use kernel error code constants, e.g., -EINVAL

Sure.

> > +       };
> > +
> > +       flags = unaligned_uint16_read(lfh->flags);
> > +       if ((flags & FLAG_ENCRYPTED) || (flags & FLAG_HAS_DATA_DESCRIPTOR)) {
> > +               return -1;
> > +       }
> > +
> > +       name_length = unaligned_uint16_read(lfh->file_name_length);
> > +       name = check_access(archive, offset, name_length);
> > +       offset += name_length;
> > +       if (!name) {
> > +               return -1;
> > +       }
> > +
> > +       extra_field_length = unaligned_uint16_read(lfh->extra_field_length);
> > +       if (!check_access(archive, offset, extra_field_length)) {
> > +               return -1;
> > +       }
> > +       offset += extra_field_length;
> > +
> > +       compressed_size = unaligned_uint32_read(lfh->compressed_size);
> 
> why not fill out out->data_length instead of all these local variables?

Because the function contract is that 'out' is left untouched on error.

> > +       data = check_access(archive, offset, compressed_size);
> > +       if (!data) {
> > +               return -1;
> > +       }
> > +
> > +       out->compression = unaligned_uint16_read(lfh->compression);
> > +       out->name_length = name_length;
> > +       out->name = name;
> > +       out->data = data;
> > +       out->data_length = compressed_size;
> > +       out->data_offset = offset;
> > +
> > +       return 0;
> > +}
> > +
> > +static struct central_directory_file_header *cd_file_header_at_offset(struct zip_archive *archive,
> > +                                                                     uint32_t offset)
> 
> this function is called just once below, why a helper?

Will inline it.

> > +{
> > +       struct central_directory_file_header *cdfh =
> > +               check_access(archive, offset, sizeof(struct central_directory_file_header));
> 
> empty line
> 
> > +       if (!cdfh || unaligned_uint32_read(cdfh->magic) != CD_FILE_HEADER_MAGIC) {
> > +               return NULL;
> > +       }
> > +       return cdfh;
> > +}
> > +
> > +int zip_archive_find_entry(struct zip_archive *archive, const char *file_name,
> > +                          struct zip_entry *out)
> > +{
> > +       size_t file_name_length = strlen(file_name);
> > +
> 
> no need for empty line, let's keep variable declarations in one block
> 
> > +       uint32_t i, offset = archive->cd_offset;
> > +
> > +       for (i = 0; i < archive->cd_records; ++i) {
> > +               struct central_directory_file_header *cdfh =
> > +                       cd_file_header_at_offset(archive, offset);
> > +               uint16_t cdfh_name_length, cdfh_flags;
> > +               const char *cdfh_name;
> > +
> > +               offset += sizeof(struct central_directory_file_header);
> 
> same, logically offset should be updated after we made sure we have
> cd_file_header
> 
> > +               if (!cdfh) {
> > +                       return -1;
> > +               }
> > +
> > +               cdfh_name_length = unaligned_uint16_read(cdfh->file_name_length);
> > +               cdfh_name = check_access(archive, offset, cdfh_name_length);
> > +               if (!cdfh_name) {
> > +                       return -1;
> > +               }
> > +
> > +               cdfh_flags = unaligned_uint16_read(cdfh->flags);
> > +               if ((cdfh_flags & FLAG_ENCRYPTED) == 0 &&
> > +                   (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 &&
> > +                   file_name_length == cdfh_name_length &&
> > +                   memcmp(file_name, archive->data + offset, file_name_length) == 0) {
> > +                       return get_entry_at_offset(archive, unaligned_uint32_read(cdfh->offset),
> > +                                                  out);
> > +               }
> > +
> > +               offset += cdfh_name_length;
> > +               offset += unaligned_uint16_read(cdfh->extra_field_length);
> > +               offset += unaligned_uint16_read(cdfh->file_comment_length);
> > +       }
> > +
> > +       return -1;
> > +}
> > diff --git a/tools/lib/bpf/zip.h b/tools/lib/bpf/zip.h
> > new file mode 100644
> > index 0000000..a9083f
> > --- /dev/null
> > +++ b/tools/lib/bpf/zip.h
> > @@ -0,0 +1,47 @@
> > +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
> > +
> > +#ifndef __LIBBPF_ZIP_H
> > +#define __LIBBPF_ZIP_H
> > +
> > +#include <stdint.h>
> > +
> > +/* Represents an opened zip archive.
> 
> s/opened/open/
> 
> 
> > + * Only basic ZIP files are supported, in particular the following are not
> > + * supported:
> > + * - encryption
> > + * - streaming
> > + * - multi-part ZIP files
> > + * - ZIP64
> > + */
> > +struct zip_archive;
> > +
> > +/* Carries information on name, compression method, and data corresponding to a
> > + * file in a zip archive.
> > + */
> > +struct zip_entry {
> > +       /* Compression method as defined in pkzip spec. 0 means data is uncompressed. */
> > +       uint16_t compression;
> > +
> > +       /* Non-null terminated name of the file. */
> > +       const char *name;
> > +       /* Length of the file name. */
> > +       uint16_t name_length;
> > +
> > +       /* Pointer to the file data. */
> > +       const void *data;
> > +       /* Length of the file data. */
> > +       uint32_t data_length;
> > +       /* Offset of the file data within the archive. */
> > +       uint32_t data_offset;
> > +};
> > +
> > +/* Open a zip archive. Returns NULL in case of an error. */
> > +struct zip_archive *zip_archive_open(const char *path);
> > +
> > +/* Close a zip archive and release resources. */
> > +void zip_archive_close(struct zip_archive *archive);
> > +
> > +/* Look up an entry corresponding to a file in given zip archive. */
> > +int zip_archive_find_entry(struct zip_archive *archive, const char *name, struct zip_entry *out);
> > +
> > +#endif
> > --
> > 2.30.2
> >
diff mbox series

Patch

diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 5a3dfb..b8b0a63 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,4 +1,4 @@ 
 libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
 	    netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
 	    btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
-	    usdt.o
+	    usdt.o zip.o
diff --git a/tools/lib/bpf/zip.c b/tools/lib/bpf/zip.c
new file mode 100644
index 0000000..59ec79
--- /dev/null
+++ b/tools/lib/bpf/zip.c
@@ -0,0 +1,378 @@ 
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/*
+ * Routines for dealing with .zip archives.
+ *
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ */
+
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "zip.h"
+
+/* Specification of ZIP file format can be found here:
+ * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
+ * For a high level overview of the structure of a ZIP file see
+ * sections 4.3.1 - 4.3.6.
+ *
+ * Data structures appearing in ZIP files do not contain any
+ * padding and they might be misaligned. To allow us to safely
+ * operate on pointers to such structures and their members, without
+ * worrying of platform specific alignment issues, we define
+ * unaligned_uint16_t and unaligned_uint32_t types with no alignment
+ * requirements.
+ */
+typedef struct {
+	uint8_t raw[2];
+} unaligned_uint16_t;
+
+static uint16_t unaligned_uint16_read(unaligned_uint16_t value)
+{
+	uint16_t return_value;
+
+	memcpy(&return_value, value.raw, sizeof(return_value));
+	return return_value;
+}
+
+typedef struct {
+	uint8_t raw[4];
+} unaligned_uint32_t;
+
+static uint32_t unaligned_uint32_read(unaligned_uint32_t value)
+{
+	uint32_t return_value;
+
+	memcpy(&return_value, value.raw, sizeof(return_value));
+	return return_value;
+}
+
+#define END_OF_CD_RECORD_MAGIC 0x06054b50
+
+/* See section 4.3.16 of the spec. */
+struct end_of_central_directory_record {
+	/* Magic value equal to END_OF_CD_RECORD_MAGIC */
+	unaligned_uint32_t magic;
+
+	/* Number of the file containing this structure or 0xFFFF if ZIP64 archive.
+	 * Zip archive might span multiple files (disks).
+	 */
+	unaligned_uint16_t this_disk;
+
+	/* Number of the file containing the beginning of the central directory or
+	 * 0xFFFF if ZIP64 archive.
+	 */
+	unaligned_uint16_t cd_disk;
+
+	/* Number of central directory records on this disk or 0xFFFF if ZIP64
+	 * archive.
+	 */
+	unaligned_uint16_t cd_records;
+
+	/* Number of central directory records on all disks or 0xFFFF if ZIP64
+	 * archive.
+	 */
+	unaligned_uint16_t cd_records_total;
+
+	/* Size of the central directory recrod or 0xFFFFFFFF if ZIP64 archive. */
+	unaligned_uint32_t cd_size;
+
+	/* Offset of the central directory from the beginning of the archive or
+	 * 0xFFFFFFFF if ZIP64 archive.
+	 */
+	unaligned_uint32_t cd_offset;
+
+	/* Length of comment data following end of central driectory record. */
+	unaligned_uint16_t comment_length;
+
+	/* Up to 64k of arbitrary bytes. */
+	/* uint8_t comment[comment_length] */
+};
+
+#define CD_FILE_HEADER_MAGIC 0x02014b50
+#define FLAG_ENCRYPTED (1 << 0)
+#define FLAG_HAS_DATA_DESCRIPTOR (1 << 3)
+
+/* See section 4.3.12 of the spec. */
+struct central_directory_file_header {
+	/* Magic value equal to CD_FILE_HEADER_MAGIC. */
+	unaligned_uint32_t magic;
+	unaligned_uint16_t version;
+	/* Minimum zip version needed to extract the file. */
+	unaligned_uint16_t min_version;
+	unaligned_uint16_t flags;
+	unaligned_uint16_t compression;
+	unaligned_uint16_t last_modified_time;
+	unaligned_uint16_t last_modified_date;
+	unaligned_uint32_t crc;
+	unaligned_uint32_t compressed_size;
+	unaligned_uint32_t uncompressed_size;
+	unaligned_uint16_t file_name_length;
+	unaligned_uint16_t extra_field_length;
+	unaligned_uint16_t file_comment_length;
+	/* Number of the disk where the file starts or 0xFFFF if ZIP64 archive. */
+	unaligned_uint16_t disk;
+	unaligned_uint16_t internal_attributes;
+	unaligned_uint32_t external_attributes;
+	/* Offset from the start of the disk containing the local file header to the
+	 * start of the local file header.
+	 */
+	unaligned_uint32_t offset;
+};
+
+#define LOCAL_FILE_HEADER_MAGIC 0x04034b50
+
+/* See section 4.3.7 of the spec. */
+struct local_file_header {
+	/* Magic value equal to LOCAL_FILE_HEADER_MAGIC. */
+	unaligned_uint32_t magic;
+	/* Minimum zip version needed to extract the file. */
+	unaligned_uint16_t min_version;
+	unaligned_uint16_t flags;
+	unaligned_uint16_t compression;
+	unaligned_uint16_t last_modified_time;
+	unaligned_uint16_t last_modified_date;
+	unaligned_uint32_t crc;
+	unaligned_uint32_t compressed_size;
+	unaligned_uint32_t uncompressed_size;
+	unaligned_uint16_t file_name_length;
+	unaligned_uint16_t extra_field_length;
+};
+
+struct zip_archive {
+	void *data;
+	uint32_t size;
+	uint32_t cd_offset;
+	uint32_t cd_records;
+};
+
+static void *check_access(struct zip_archive *archive, uint32_t offset, uint32_t size)
+{
+	if (offset + size > archive->size || offset > offset + size) {
+		return NULL;
+	}
+	return archive->data + offset;
+}
+
+/* Returns 0 on success, -1 on error and -2 if the eocd indicates
+ * the archive uses features which are not supported.
+ */
+static int try_parse_end_of_central_directory(struct zip_archive *archive, uint32_t offset)
+{
+	struct end_of_central_directory_record *eocd =
+		check_access(archive, offset, sizeof(struct end_of_central_directory_record));
+	uint16_t comment_length, cd_records;
+	uint32_t cd_offset, cd_size;
+
+	if (!eocd || unaligned_uint32_read(eocd->magic) != END_OF_CD_RECORD_MAGIC) {
+		return -1;
+	}
+
+	comment_length = unaligned_uint16_read(eocd->comment_length);
+	if (offset + sizeof(struct end_of_central_directory_record) + comment_length !=
+	    archive->size) {
+		return -1;
+	}
+
+	cd_records = unaligned_uint16_read(eocd->cd_records);
+	if (unaligned_uint16_read(eocd->this_disk) != 0 ||
+	    unaligned_uint16_read(eocd->cd_disk) != 0 ||
+	    unaligned_uint16_read(eocd->cd_records_total) != cd_records) {
+		/* This is a valid eocd, but we only support single-file non-ZIP64 archives. */
+		return -2;
+	}
+
+	cd_offset = unaligned_uint32_read(eocd->cd_offset);
+	cd_size = unaligned_uint32_read(eocd->cd_size);
+	if (!check_access(archive, cd_offset, cd_size)) {
+		return -1;
+	}
+
+	archive->cd_offset = cd_offset;
+	archive->cd_records = cd_records;
+	return 0;
+}
+
+static int find_central_directory(struct zip_archive *archive)
+{
+	uint32_t offset;
+	int64_t limit;
+	int rc = -1;
+
+	if (archive->size <= sizeof(struct end_of_central_directory_record)) {
+		return -1;
+	}
+
+	/* Because the end of central directory ends with a variable length array of
+	 * up to 0xFFFF bytes we can't know exactly where it starts and need to
+	 * search for it at the end of the file, scanning the (limit, offset] range.
+	 */
+	offset = archive->size - sizeof(struct end_of_central_directory_record);
+	limit = (int64_t)offset - (1 << 16);
+
+	for (; offset >= 0 && offset > limit && rc == -1; offset--) {
+		rc = try_parse_end_of_central_directory(archive, offset);
+	}
+
+	return rc;
+}
+
+struct zip_archive *zip_archive_open(const char *path)
+{
+	struct zip_archive *archive;
+	int fd = open(path, O_RDONLY);
+	off_t size;
+	void *data;
+
+	if (fd < 0) {
+		return NULL;
+	}
+
+	size = lseek(fd, 0, SEEK_END);
+	if (size == (off_t)-1 || size > UINT32_MAX) {
+		close(fd);
+		return NULL;
+	}
+
+	data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+	close(fd);
+
+	if (data == MAP_FAILED) {
+		return NULL;
+	}
+
+	archive = malloc(sizeof(struct zip_archive));
+	if (!archive) {
+		munmap(data, size);
+		return NULL;
+	};
+
+	archive->data = data;
+	archive->size = size;
+	if (find_central_directory(archive)) {
+		munmap(data, size);
+		free(archive);
+		archive = NULL;
+	}
+
+	return archive;
+}
+
+void zip_archive_close(struct zip_archive *archive)
+{
+	munmap(archive->data, archive->size);
+	free(archive);
+}
+
+static struct local_file_header *local_file_header_at_offset(struct zip_archive *archive,
+							     uint32_t offset)
+{
+	struct local_file_header *lfh =
+		check_access(archive, offset, sizeof(struct local_file_header));
+	if (!lfh || unaligned_uint32_read(lfh->magic) != LOCAL_FILE_HEADER_MAGIC) {
+		return NULL;
+	}
+	return lfh;
+}
+
+static int get_entry_at_offset(struct zip_archive *archive, uint32_t offset, struct zip_entry *out)
+{
+	struct local_file_header *lfh = local_file_header_at_offset(archive, offset);
+	uint16_t flags, name_length, extra_field_length;
+	uint32_t compressed_size;
+	const char *name;
+	void *data;
+
+	offset += sizeof(struct local_file_header);
+	if (!lfh) {
+		return -1;
+	};
+
+	flags = unaligned_uint16_read(lfh->flags);
+	if ((flags & FLAG_ENCRYPTED) || (flags & FLAG_HAS_DATA_DESCRIPTOR)) {
+		return -1;
+	}
+
+	name_length = unaligned_uint16_read(lfh->file_name_length);
+	name = check_access(archive, offset, name_length);
+	offset += name_length;
+	if (!name) {
+		return -1;
+	}
+
+	extra_field_length = unaligned_uint16_read(lfh->extra_field_length);
+	if (!check_access(archive, offset, extra_field_length)) {
+		return -1;
+	}
+	offset += extra_field_length;
+
+	compressed_size = unaligned_uint32_read(lfh->compressed_size);
+	data = check_access(archive, offset, compressed_size);
+	if (!data) {
+		return -1;
+	}
+
+	out->compression = unaligned_uint16_read(lfh->compression);
+	out->name_length = name_length;
+	out->name = name;
+	out->data = data;
+	out->data_length = compressed_size;
+	out->data_offset = offset;
+
+	return 0;
+}
+
+static struct central_directory_file_header *cd_file_header_at_offset(struct zip_archive *archive,
+								      uint32_t offset)
+{
+	struct central_directory_file_header *cdfh =
+		check_access(archive, offset, sizeof(struct central_directory_file_header));
+	if (!cdfh || unaligned_uint32_read(cdfh->magic) != CD_FILE_HEADER_MAGIC) {
+		return NULL;
+	}
+	return cdfh;
+}
+
+int zip_archive_find_entry(struct zip_archive *archive, const char *file_name,
+			   struct zip_entry *out)
+{
+	size_t file_name_length = strlen(file_name);
+
+	uint32_t i, offset = archive->cd_offset;
+
+	for (i = 0; i < archive->cd_records; ++i) {
+		struct central_directory_file_header *cdfh =
+			cd_file_header_at_offset(archive, offset);
+		uint16_t cdfh_name_length, cdfh_flags;
+		const char *cdfh_name;
+
+		offset += sizeof(struct central_directory_file_header);
+		if (!cdfh) {
+			return -1;
+		}
+
+		cdfh_name_length = unaligned_uint16_read(cdfh->file_name_length);
+		cdfh_name = check_access(archive, offset, cdfh_name_length);
+		if (!cdfh_name) {
+			return -1;
+		}
+
+		cdfh_flags = unaligned_uint16_read(cdfh->flags);
+		if ((cdfh_flags & FLAG_ENCRYPTED) == 0 &&
+		    (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 &&
+		    file_name_length == cdfh_name_length &&
+		    memcmp(file_name, archive->data + offset, file_name_length) == 0) {
+			return get_entry_at_offset(archive, unaligned_uint32_read(cdfh->offset),
+						   out);
+		}
+
+		offset += cdfh_name_length;
+		offset += unaligned_uint16_read(cdfh->extra_field_length);
+		offset += unaligned_uint16_read(cdfh->file_comment_length);
+	}
+
+	return -1;
+}
diff --git a/tools/lib/bpf/zip.h b/tools/lib/bpf/zip.h
new file mode 100644
index 0000000..a9083f
--- /dev/null
+++ b/tools/lib/bpf/zip.h
@@ -0,0 +1,47 @@ 
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __LIBBPF_ZIP_H
+#define __LIBBPF_ZIP_H
+
+#include <stdint.h>
+
+/* Represents an opened zip archive.
+ * Only basic ZIP files are supported, in particular the following are not
+ * supported:
+ * - encryption
+ * - streaming
+ * - multi-part ZIP files
+ * - ZIP64
+ */
+struct zip_archive;
+
+/* Carries information on name, compression method, and data corresponding to a
+ * file in a zip archive.
+ */
+struct zip_entry {
+	/* Compression method as defined in pkzip spec. 0 means data is uncompressed. */
+	uint16_t compression;
+
+	/* Non-null terminated name of the file. */
+	const char *name;
+	/* Length of the file name. */
+	uint16_t name_length;
+
+	/* Pointer to the file data. */
+	const void *data;
+	/* Length of the file data. */
+	uint32_t data_length;
+	/* Offset of the file data within the archive. */
+	uint32_t data_offset;
+};
+
+/* Open a zip archive. Returns NULL in case of an error. */
+struct zip_archive *zip_archive_open(const char *path);
+
+/* Close a zip archive and release resources. */
+void zip_archive_close(struct zip_archive *archive);
+
+/* Look up an entry corresponding to a file in given zip archive. */
+int zip_archive_find_entry(struct zip_archive *archive, const char *name, struct zip_entry *out);
+
+#endif