diff mbox series

trace-cmd library: Add ZSTD support.

Message ID 20220219230151.56282-2-sebastian@breakpoint.cc (mailing list archive)
State Superseded
Headers show
Series trace-cmd library: Add ZSTD support. | expand

Commit Message

Sebastian Andrzej Siewior Feb. 19, 2022, 11:01 p.m. UTC
The zstd support is using the context aware function so there is no need
to for the library to allocate one (including the memory) for every
invocation. This requires to be used in a single threaded environment or
the API needs to be extended to pass the context parameter.

In most cases the input buffer was 40KiB so it does not make sense to
use higher compression levels. Higher compression levels won't
significantly improve the compression ration given that the every 40KiB
block is independent. However higher compression levels will slow down
the compression process.

The upper level stores 4 bytes compressed and decompressed size. In
order to not save the decompressed size twice, the library won't store
the store in each compressed block. This shrinks the frame header a
little. In theory the ZSTD-magic (4 bytes) could be stripped away but
that is little complicated and the 4 bytes shouldn't hurt. We could even
enable check summing to be sure that the compressed block wasn't
accidentally tampered.

Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
---
 Makefile                                | 12 ++++
 lib/trace-cmd/Makefile                  |  5 +-
 lib/trace-cmd/include/trace-cmd-local.h |  4 ++
 lib/trace-cmd/trace-compress-zstd.c     | 91 +++++++++++++++++++++++++
 lib/trace-cmd/trace-compress.c          |  3 +
 tracecmd/Makefile                       |  2 +-
 6 files changed, 115 insertions(+), 2 deletions(-)
 create mode 100644 lib/trace-cmd/trace-compress-zstd.c

Comments

Tzvetomir Stoyanov (VMware) Feb. 21, 2022, 6:08 a.m. UTC | #1
On Sun, Feb 20, 2022 at 1:02 AM Sebastian Andrzej Siewior
<sebastian@breakpoint.cc> wrote:
>

Hi Sebastian,
Thank you for contributing zstd support for trace-cmd! The zlib was
chosen for the PoC implementation of the trace file compression
support, as it is one of the widely available compression libraries.
But as you have already seen, the design allows easily adding multiple
compression algorithms. Support for a new compression library is less
than 100 lines of code, that's why I suggest to keep the zlib support
- even though the default will be zstd.

> The zstd support is using the context aware function so there is no need
> to for the library to allocate one (including the memory) for every
> invocation. This requires to be used in a single threaded environment or
> the API needs to be extended to pass the context parameter.

Good point, the current design is according to zlib - that's why there
is no context. But I like the idea to have a library specific context
and this should be added now, before the first release.

>
> In most cases the input buffer was 40KiB so it does not make sense to
> use higher compression levels. Higher compression levels won't
> significantly improve the compression ration given that the every 40KiB
> block is independent. However higher compression levels will slow down
> the compression process.
>

By default, the buffer is 10 system pages - so it could be larger on
some systems.

> The upper level stores 4 bytes compressed and decompressed size. In
> order to not save the decompressed size twice, the library won't store
> the store in each compressed block. This shrinks the frame header a
> little. In theory the ZSTD-magic (4 bytes) could be stripped away but
> that is little complicated and the 4 bytes shouldn't hurt. We could even
> enable check summing to be sure that the compressed block wasn't
> accidentally tampered.
>
> Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
> ---
>  Makefile                                | 12 ++++
>  lib/trace-cmd/Makefile                  |  5 +-
>  lib/trace-cmd/include/trace-cmd-local.h |  4 ++
>  lib/trace-cmd/trace-compress-zstd.c     | 91 +++++++++++++++++++++++++
>  lib/trace-cmd/trace-compress.c          |  3 +
>  tracecmd/Makefile                       |  2 +-
>  6 files changed, 115 insertions(+), 2 deletions(-)
>  create mode 100644 lib/trace-cmd/trace-compress-zstd.c
>
> diff --git a/Makefile b/Makefile
> index f5c2cdb894f9a..109cbeb29002a 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -307,6 +307,18 @@ CFLAGS += -DHAVE_ZLIB
>  $(info    Have zlib compression support)
>  endif
>
> +TEST_LIBZSTD = $(shell sh -c "$(PKG_CONFIG) --atleast-version 1.4.0 libzstd > /dev/null 2>&1 && echo y")
> +
> +ifeq ("$(TEST_LIBZSTD)", "y")
> +LIBZSTD_CFLAGS = $(shell sh -c "$(PKG_CONFIG) --cflags libzstd")
> +LIBZSTD_LDLAGS = $(shell sh -c "$(PKG_CONFIG) --libs libzstd")
> +CFLAGS += -DHAVE_ZSTD
> +ZSTD_INSTALLED=1
> +$(info    Have ZSTD compression support)
> +endif
> +
> +export LIBZSTD_CFLAGS LIBZSTD_LDLAGS ZSTD_INSTALLED
> +
>  CUNIT_INSTALLED := $(shell if (printf "$(pound)include <CUnit/Basic.h>\n void main(){CU_initialize_registry();}" | $(CC) -o /dev/null -x c - -lcunit >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi)
>  export CUNIT_INSTALLED
>
> diff --git a/lib/trace-cmd/Makefile b/lib/trace-cmd/Makefile
> index 1820c67b48474..da0ad4deeb4f0 100644
> --- a/lib/trace-cmd/Makefile
> +++ b/lib/trace-cmd/Makefile
> @@ -29,6 +29,9 @@ OBJS += trace-compress.o
>  ifeq ($(ZLIB_INSTALLED), 1)
>  OBJS += trace-compress-zlib.o
>  endif
> +ifeq ($(ZSTD_INSTALLED), 1)
> +OBJS += trace-compress-zstd.o
> +endif
>
>  # Additional util objects
>  OBJS += trace-blk-hack.o
> @@ -48,7 +51,7 @@ $(DEPS): | $(bdir)
>  $(LIBTRACECMD_STATIC): $(OBJS)
>         $(Q)$(call do_build_static_lib)
>
> -LIBS = $(LIBTRACEEVENT_LDLAGS) $(LIBTRACEFS_LDLAGS) -lpthread
> +LIBS = $(LIBTRACEEVENT_LDLAGS) $(LIBTRACEFS_LDLAGS) $(LIBZSTD_LDLAGS) -lpthread
>
>  ifeq ($(ZLIB_INSTALLED), 1)
>  LIBS += -lz
> diff --git a/lib/trace-cmd/include/trace-cmd-local.h b/lib/trace-cmd/include/trace-cmd-local.h
> index 48f179d6f524a..8601a15f86f22 100644
> --- a/lib/trace-cmd/include/trace-cmd-local.h
> +++ b/lib/trace-cmd/include/trace-cmd-local.h
> @@ -30,6 +30,10 @@ void tracecmd_info(const char *fmt, ...);
>  int tracecmd_zlib_init(void);
>  #endif
>
> +#ifdef HAVE_ZLIB

I think you mean HAVE_ZSTD here.

> +int tracecmd_zstd_init(void);
> +#endif
> +
>  struct data_file_write {
>         unsigned long long      file_size;
>         unsigned long long      write_size;
> diff --git a/lib/trace-cmd/trace-compress-zstd.c b/lib/trace-cmd/trace-compress-zstd.c
> new file mode 100644
> index 0000000000000..fc5e350f32509
> --- /dev/null
> +++ b/lib/trace-cmd/trace-compress-zstd.c
> @@ -0,0 +1,91 @@
> +// SPDX-License-Identifier: LGPL-2.1
> +/*
> + * Copyright (C) 2022, Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
> + *
> + */
> +#include <stdlib.h>
> +#include <zstd.h>
> +#include <errno.h>
> +
> +#include "trace-cmd-private.h"
> +
> +#define __ZSTD_NAME            "zstd"
> +#define __ZSTD_WEIGTH          5
> +
> +static ZSTD_CCtx *ctx_c;
> +static ZSTD_DCtx *ctx_d;
> +
> +static int zstd_compress(const char *in, unsigned int in_bytes,
> +                        char *out, unsigned int *out_bytes)
> +{
> +       size_t ret;
> +
> +       ret = ZSTD_compress2(ctx_c, out, *out_bytes, in, in_bytes);
> +       if (ZSTD_isError(ret))
> +               return -1;
> +       *out_bytes = ret;
> +       return 0;
> +}
> +
> +static int zstd_decompress(const char *in, unsigned int in_bytes,
> +                          char *out, unsigned int *out_bytes)
> +{
> +       size_t ret;
> +
> +       ret = ZSTD_decompressDCtx(ctx_d, out, *out_bytes, in, in_bytes);
> +       if (ZSTD_isError(ret)) {
> +               errno = -EINVAL;
> +               return -1;
> +       }
> +       *out_bytes = ret;
> +       errno = 0;
> +       return 0;
> +}
> +
> +static unsigned int zstd_compress_bound(unsigned int in_bytes)
> +{
> +       return ZSTD_compressBound(in_bytes);
> +}
> +
> +static bool zstd_is_supported(const char *name, const char *version)
> +{
> +       if (!name)
> +               return false;
> +       if (strcmp(name, __ZSTD_NAME))
> +               return false;
> +
> +       return true;
> +}
> +
> +int tracecmd_zstd_init(void)
> +{
> +       int ret = 0;
> +       size_t r;
> +
> +       ctx_c = ZSTD_createCCtx();
> +       ctx_d = ZSTD_createDCtx();
> +       if (!ctx_c || !ctx_d)
> +               goto err;
> +
> +       r = ZSTD_CCtx_setParameter(ctx_c, ZSTD_c_contentSizeFlag, 0);
> +       if (ZSTD_isError(r))
> +               goto err;
> +
> +       ret = tracecmd_compress_proto_register(__ZSTD_NAME,
> +                                              ZSTD_versionString(),
> +                                              __ZSTD_WEIGTH,
> +                                              zstd_compress,
> +                                              zstd_decompress,
> +                                              zstd_compress_bound,
> +                                              zstd_is_supported);

I think tracecmd_compress_proto_register() should be extend  with two new hooks:
    void *(*new_context)(void),
    void (*free_context)(void *),
and ctx_c,  ctx_d should be allocated and freed there. Currently, that
logic in trace-cmd is in a single thread. That may change in the
future, and then the current design will be a limitation.

> +       if (!ret)
> +               return 0;
> +err:
> +       ZSTD_freeCCtx(ctx_c);
> +       ZSTD_freeDCtx(ctx_d);
> +       ctx_c = NULL;
> +       ctx_d = NULL;
> +       if (ret < 0)
> +               return ret;
> +       return -1;
> +}
> diff --git a/lib/trace-cmd/trace-compress.c b/lib/trace-cmd/trace-compress.c
> index 210d58b602577..a14155a321f45 100644
> --- a/lib/trace-cmd/trace-compress.c
> +++ b/lib/trace-cmd/trace-compress.c
> @@ -390,6 +390,9 @@ void tracecmd_compress_init(void)
>  #ifdef HAVE_ZLIB
>         tracecmd_zlib_init();
>  #endif
> +#ifdef HAVE_ZSTD
> +       tracecmd_zstd_init();
> +#endif
>  }
>
>  static struct compress_proto *compress_proto_select(void)
> diff --git a/tracecmd/Makefile b/tracecmd/Makefile
> index 56742f0afa2f8..355f04723ad7c 100644
> --- a/tracecmd/Makefile
> +++ b/tracecmd/Makefile
> @@ -49,7 +49,7 @@ all_objs := $(sort $(ALL_OBJS))
>  all_deps := $(all_objs:$(bdir)/%.o=$(bdir)/.%.d)
>
>  CONFIG_INCLUDES =
> -CONFIG_LIBS    = -lrt -lpthread $(TRACE_LIBS)
> +CONFIG_LIBS    = -lrt -lpthread $(TRACE_LIBS) $(LIBZSTD_LDLAGS)
>  CONFIG_FLAGS   =
>
>  ifeq ($(ZLIB_INSTALLED), 1)
> --
> 2.35.1
>
Sebastian Andrzej Siewior Feb. 21, 2022, 1:54 p.m. UTC | #2
On 2022-02-21 08:08:51 [+0200], Tzvetomir Stoyanov wrote:
> On Sun, Feb 20, 2022 at 1:02 AM Sebastian Andrzej Siewior
> <sebastian@breakpoint.cc> wrote:
> >
> 
> Hi Sebastian,
Hi Tzvetomir,

> Thank you for contributing zstd support for trace-cmd! The zlib was
> chosen for the PoC implementation of the trace file compression
> support, as it is one of the widely available compression libraries.
> But as you have already seen, the design allows easily adding multiple
> compression algorithms. Support for a new compression library is less
> than 100 lines of code, that's why I suggest to keep the zlib support
> - even though the default will be zstd.

Oki.

> > The zstd support is using the context aware function so there is no need
> > to for the library to allocate one (including the memory) for every
> > invocation. This requires to be used in a single threaded environment or
> > the API needs to be extended to pass the context parameter.
> 
> Good point, the current design is according to zlib - that's why there
> is no context. But I like the idea to have a library specific context
> and this should be added now, before the first release.

cool.

> >
> > In most cases the input buffer was 40KiB so it does not make sense to
> > use higher compression levels. Higher compression levels won't
> > significantly improve the compression ration given that the every 40KiB
> > block is independent. However higher compression levels will slow down
> > the compression process.
> >
> 
> By default, the buffer is 10 system pages - so it could be larger on
> some systems.

Just a hint that if we increase the buffer size then the compression
would benefit from it ;)

> > diff --git a/lib/trace-cmd/include/trace-cmd-local.h b/lib/trace-cmd/include/trace-cmd-local.h
> > index 48f179d6f524a..8601a15f86f22 100644
> > --- a/lib/trace-cmd/include/trace-cmd-local.h
> > +++ b/lib/trace-cmd/include/trace-cmd-local.h
> > @@ -30,6 +30,10 @@ void tracecmd_info(const char *fmt, ...);
> >  int tracecmd_zlib_init(void);
> >  #endif
> >
> > +#ifdef HAVE_ZLIB
> 
> I think you mean HAVE_ZSTD here.

Yeah, fixed in v2, thanks.

> > diff --git a/lib/trace-cmd/trace-compress-zstd.c b/lib/trace-cmd/trace-compress-zstd.c
> > new file mode 100644
> > index 0000000000000..fc5e350f32509
> > --- /dev/null
> > +++ b/lib/trace-cmd/trace-compress-zstd.c> > +int tracecmd_zstd_init(void)
> > +{
> > +       int ret = 0;
> > +       size_t r;
> > +
> > +       ctx_c = ZSTD_createCCtx();
> > +       ctx_d = ZSTD_createDCtx();
> > +       if (!ctx_c || !ctx_d)
> > +               goto err;
> > +
> > +       r = ZSTD_CCtx_setParameter(ctx_c, ZSTD_c_contentSizeFlag, 0);
> > +       if (ZSTD_isError(r))
> > +               goto err;
> > +
> > +       ret = tracecmd_compress_proto_register(__ZSTD_NAME,
> > +                                              ZSTD_versionString(),
> > +                                              __ZSTD_WEIGTH,
> > +                                              zstd_compress,
> > +                                              zstd_decompress,
> > +                                              zstd_compress_bound,
> > +                                              zstd_is_supported);
> 
> I think tracecmd_compress_proto_register() should be extend  with two new hooks:
>     void *(*new_context)(void),
>     void (*free_context)(void *),
> and ctx_c,  ctx_d should be allocated and freed there. Currently, that
> logic in trace-cmd is in a single thread. That may change in the
> future, and then the current design will be a limitation.

I would suggest to use

struct tracecmd_compress_algo {
	const char *name,
	const char *version,
	int weight,
	func_t *compress;
	func_t *decompress;
	func_t *bound;
	func_t *supported;
};

and then
	ret = tracecmd_compress_proto_register(&comp_algo);

so that the function has less args and can be extended without touching
every algo.

As for func_t / calling convetion I would prefer something that is not
that close to zlib. Like

	int comp(void *ctx, const void *src, signed int in_size,
		 void *dst, signed int out_size);

This clearly limits the sizes to 31 bit (with the 40kiB probably okay)
and the return value can be either >= 0 returning the number of bytes
produced or negative for an error. And please no errno touching ;)

Sebastian
Tzvetomir Stoyanov (VMware) Feb. 22, 2022, 3:42 a.m. UTC | #3
On Mon, Feb 21, 2022 at 3:55 PM Sebastian Andrzej Siewior
<sebastian@breakpoint.cc> wrote:
>
[ ... ]
> > > +int tracecmd_zstd_init(void)
> > > +{
> > > +       int ret = 0;
> > > +       size_t r;
> > > +
> > > +       ctx_c = ZSTD_createCCtx();
> > > +       ctx_d = ZSTD_createDCtx();
> > > +       if (!ctx_c || !ctx_d)
> > > +               goto err;
> > > +
> > > +       r = ZSTD_CCtx_setParameter(ctx_c, ZSTD_c_contentSizeFlag, 0);
> > > +       if (ZSTD_isError(r))
> > > +               goto err;
> > > +
> > > +       ret = tracecmd_compress_proto_register(__ZSTD_NAME,
> > > +                                              ZSTD_versionString(),
> > > +                                              __ZSTD_WEIGTH,
> > > +                                              zstd_compress,
> > > +                                              zstd_decompress,
> > > +                                              zstd_compress_bound,
> > > +                                              zstd_is_supported);
> >
> > I think tracecmd_compress_proto_register() should be extend  with two new hooks:
> >     void *(*new_context)(void),
> >     void (*free_context)(void *),
> > and ctx_c,  ctx_d should be allocated and freed there. Currently, that
> > logic in trace-cmd is in a single thread. That may change in the
> > future, and then the current design will be a limitation.
>
> I would suggest to use
>
> struct tracecmd_compress_algo {
>         const char *name,
>         const char *version,
>         int weight,
>         func_t *compress;
>         func_t *decompress;
>         func_t *bound;
>         func_t *supported;
> };
>
> and then
>         ret = tracecmd_compress_proto_register(&comp_algo);
>
> so that the function has less args and can be extended without touching
> every algo.

We prefer to use explicit API arguments, to ensure that on API's
change the old callers of the API will be updated. Using such
structure is more flexible, but even though in most cases the change
will be backward compatible - it could be hard to debug the legacy
code in case the API's behaviour changes because of adding some new
parameter in the structure.

>
> As for func_t / calling convetion I would prefer something that is not
> that close to zlib. Like
>
>         int comp(void *ctx, const void *src, signed int in_size,
>                  void *dst, signed int out_size);
>
> This clearly limits the sizes to 31 bit (with the 40kiB probably okay)
> and the return value can be either >= 0 returning the number of bytes
> produced or negative for an error. And please no errno touching ;)
>

I like the idea to make these hooks more generic, not so close to a
specific library. But there should be some error reporting mechanism,
why not to use errno ? In the best case, the library itself should set
the errno. But zlib has its own error codes, and according to its
documentation not all of the APIs set errno in case of an error.
That's why I added this explicit setting of errno in case of some
Z_ERROR.

Are you interested to submit a patch with these suggestions ? I also
can do it, when Steven merges zstd support and before the next
trace-cmd release.

> Sebastian

--
Tzvetomir (Ceco) Stoyanov
VMware Open Source Technology Center
Steven Rostedt Feb. 22, 2022, 3:53 a.m. UTC | #4
On Tue, 22 Feb 2022 05:42:56 +0200
Tzvetomir Stoyanov <tz.stoyanov@gmail.com> wrote:
> > >
> > > I think tracecmd_compress_proto_register() should be extend  with
> > > two new hooks: void *(*new_context)(void),
> > >     void (*free_context)(void *),
> > > and ctx_c,  ctx_d should be allocated and freed there. Currently,
> > > that logic in trace-cmd is in a single thread. That may change in
> > > the future, and then the current design will be a limitation.  
> >
> > I would suggest to use
> >
> > struct tracecmd_compress_algo {
> >         const char *name,
> >         const char *version,
> >         int weight,
> >         func_t *compress;
> >         func_t *decompress;
> >         func_t *bound;
> >         func_t *supported;
> > };
> >
> > and then
> >         ret = tracecmd_compress_proto_register(&comp_algo);
> >
> > so that the function has less args and can be extended without
> > touching every algo.  
> 
> We prefer to use explicit API arguments, to ensure that on API's
> change the old callers of the API will be updated. Using such
> structure is more flexible, but even though in most cases the change
> will be backward compatible - it could be hard to debug the legacy
> code in case the API's behaviour changes because of adding some new
> parameter in the structure.

The way this is done in the kernel interface (aka. system calls) is to
pass a structure along with the size. That is, the API will know which
version of the API is being called by the size of the structure.

Anything new will have to be appended to the structure, which will
increase it. If the library receives a structure of a smaller size, but
one that was once supported, it could use that information to just do
the old method. In other words, only use the fields of a structure that
fit into the size given.

> 
> >
> > As for func_t / calling convetion I would prefer something that is
> > not that close to zlib. Like
> >
> >         int comp(void *ctx, const void *src, signed int in_size,
> >                  void *dst, signed int out_size);
> >
> > This clearly limits the sizes to 31 bit (with the 40kiB probably
> > okay) and the return value can be either >= 0 returning the number
> > of bytes produced or negative for an error. And please no errno
> > touching ;) 

Oh, we are always touching errno ;-)

> 
> I like the idea to make these hooks more generic, not so close to a
> specific library. But there should be some error reporting mechanism,
> why not to use errno ? In the best case, the library itself should set
> the errno. But zlib has its own error codes, and according to its
> documentation not all of the APIs set errno in case of an error.
> That's why I added this explicit setting of errno in case of some
> Z_ERROR.
> 
> Are you interested to submit a patch with these suggestions ? I also
> can do it, when Steven merges zstd support and before the next
> trace-cmd release.

I'm hoping to get everything merged this week, but probably will not
release trace-cmd until next week. I'm still sorting out the libraries.
But I'm hoping that they are almost done.

-- Steve
Sebastian Andrzej Siewior Feb. 22, 2022, 9:06 p.m. UTC | #5
On 2022-02-22 05:42:56 [+0200], Tzvetomir Stoyanov wrote:
> We prefer to use explicit API arguments, to ensure that on API's
> change the old callers of the API will be updated. Using such
> structure is more flexible, but even though in most cases the change
> will be backward compatible - it could be hard to debug the legacy
> code in case the API's behaviour changes because of adding some new
> parameter in the structure.

It depends. xz, openssl, … have ABIs and structs which change. I assumed
that struct is internal to trace-cmd. But even if it isn't and it is
public then it is part of lib-trace-cmd's ABI. Once you change here
something you need to incremebt your so number and this requires to
recompile all downstream users aka a library transition.
By using C99 initializers you ensure that removed / replaced struct
members don't remain used.

> > As for func_t / calling convetion I would prefer something that is not
> > that close to zlib. Like
> >
> >         int comp(void *ctx, const void *src, signed int in_size,
> >                  void *dst, signed int out_size);
> >
> > This clearly limits the sizes to 31 bit (with the 40kiB probably okay)
> > and the return value can be either >= 0 returning the number of bytes
> > produced or negative for an error. And please no errno touching ;)
> >
> 
> I like the idea to make these hooks more generic, not so close to a
> specific library. 

You reassmeble the zlib thing. Here you have comp/decomp functions
with pointer and size. And instead updating dst' size you have a return
value defined as
 >= 0: decompressed / compressed to
  < 0: error number defined by trace-cmd

> But there should be some error reporting mechanism,
> why not to use errno ? In the best case, the library itself should set
> the errno. 

But why touching errno? errno is defined / used by the OS. Based on the
architecture the kernel returns two values. If sys_open succeeds then it
returns the file handle. On failure -1 and errno is set the actual
error. Some archiectures use two registers (one -1 and the other
positive errno) for that some use one (positive or negative errno). The
C-library then knows what the architecture does and ensures the API
stays the same.
A library like yours should not touch errno. If you encounter an error,
you should define your own error numbers and use those. So your
compression library detects a checksum error during decompression what
do you do? EINVAL because the input is invalid? EBADMSG because the
input is a bad message? But it is defined as "Not a data message" so
maybe not. Maybe EILSEQ becase the data sequence is invalid. So you
basically need to guess and pick something that close to what you have. 
While you could define your own codes and return those.

> But zlib has its own error codes, and according to its
> documentation not all of the APIs set errno in case of an error.
> That's why I added this explicit setting of errno in case of some
> Z_ERROR.

But I though you wanted to somethign generic and be close a specifc
library :)
zlib is not a good example in terms of an API. No matter if you look
zstd or xz _or_ something else recent (as in not from 80s). Both the
compression libs I mentioned perform explicit framing and you as the
user know when something ends. zlib is a little difficult sometimes. You
do have the luxury that you know your input. I remember rsync does some
things to be sure if there is an error or the input is actual over…

> Are you interested to submit a patch with these suggestions ? I also
> can do it, when Steven merges zstd support and before the next
> trace-cmd release.

Well if nobody objects, nobody does it first then maybe as I can't
promise that I can make time before the weekend (and the weekend is
currently full).

Sebastian
diff mbox series

Patch

diff --git a/Makefile b/Makefile
index f5c2cdb894f9a..109cbeb29002a 100644
--- a/Makefile
+++ b/Makefile
@@ -307,6 +307,18 @@  CFLAGS += -DHAVE_ZLIB
 $(info    Have zlib compression support)
 endif
 
+TEST_LIBZSTD = $(shell sh -c "$(PKG_CONFIG) --atleast-version 1.4.0 libzstd > /dev/null 2>&1 && echo y")
+
+ifeq ("$(TEST_LIBZSTD)", "y")
+LIBZSTD_CFLAGS = $(shell sh -c "$(PKG_CONFIG) --cflags libzstd")
+LIBZSTD_LDLAGS = $(shell sh -c "$(PKG_CONFIG) --libs libzstd")
+CFLAGS += -DHAVE_ZSTD
+ZSTD_INSTALLED=1
+$(info    Have ZSTD compression support)
+endif
+
+export LIBZSTD_CFLAGS LIBZSTD_LDLAGS ZSTD_INSTALLED
+
 CUNIT_INSTALLED := $(shell if (printf "$(pound)include <CUnit/Basic.h>\n void main(){CU_initialize_registry();}" | $(CC) -o /dev/null -x c - -lcunit >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi)
 export CUNIT_INSTALLED
 
diff --git a/lib/trace-cmd/Makefile b/lib/trace-cmd/Makefile
index 1820c67b48474..da0ad4deeb4f0 100644
--- a/lib/trace-cmd/Makefile
+++ b/lib/trace-cmd/Makefile
@@ -29,6 +29,9 @@  OBJS += trace-compress.o
 ifeq ($(ZLIB_INSTALLED), 1)
 OBJS += trace-compress-zlib.o
 endif
+ifeq ($(ZSTD_INSTALLED), 1)
+OBJS += trace-compress-zstd.o
+endif
 
 # Additional util objects
 OBJS += trace-blk-hack.o
@@ -48,7 +51,7 @@  $(DEPS): | $(bdir)
 $(LIBTRACECMD_STATIC): $(OBJS)
 	$(Q)$(call do_build_static_lib)
 
-LIBS = $(LIBTRACEEVENT_LDLAGS) $(LIBTRACEFS_LDLAGS) -lpthread
+LIBS = $(LIBTRACEEVENT_LDLAGS) $(LIBTRACEFS_LDLAGS) $(LIBZSTD_LDLAGS) -lpthread
 
 ifeq ($(ZLIB_INSTALLED), 1)
 LIBS += -lz
diff --git a/lib/trace-cmd/include/trace-cmd-local.h b/lib/trace-cmd/include/trace-cmd-local.h
index 48f179d6f524a..8601a15f86f22 100644
--- a/lib/trace-cmd/include/trace-cmd-local.h
+++ b/lib/trace-cmd/include/trace-cmd-local.h
@@ -30,6 +30,10 @@  void tracecmd_info(const char *fmt, ...);
 int tracecmd_zlib_init(void);
 #endif
 
+#ifdef HAVE_ZLIB
+int tracecmd_zstd_init(void);
+#endif
+
 struct data_file_write {
 	unsigned long long	file_size;
 	unsigned long long	write_size;
diff --git a/lib/trace-cmd/trace-compress-zstd.c b/lib/trace-cmd/trace-compress-zstd.c
new file mode 100644
index 0000000000000..fc5e350f32509
--- /dev/null
+++ b/lib/trace-cmd/trace-compress-zstd.c
@@ -0,0 +1,91 @@ 
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2022, Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
+ *
+ */
+#include <stdlib.h>
+#include <zstd.h>
+#include <errno.h>
+
+#include "trace-cmd-private.h"
+
+#define __ZSTD_NAME		"zstd"
+#define __ZSTD_WEIGTH		5
+
+static ZSTD_CCtx *ctx_c;
+static ZSTD_DCtx *ctx_d;
+
+static int zstd_compress(const char *in, unsigned int in_bytes,
+			 char *out, unsigned int *out_bytes)
+{
+	size_t ret;
+
+	ret = ZSTD_compress2(ctx_c, out, *out_bytes, in, in_bytes);
+	if (ZSTD_isError(ret))
+		return -1;
+	*out_bytes = ret;
+	return 0;
+}
+
+static int zstd_decompress(const char *in, unsigned int in_bytes,
+			   char *out, unsigned int *out_bytes)
+{
+	size_t ret;
+
+	ret = ZSTD_decompressDCtx(ctx_d, out, *out_bytes, in, in_bytes);
+	if (ZSTD_isError(ret)) {
+		errno = -EINVAL;
+		return -1;
+	}
+	*out_bytes = ret;
+	errno = 0;
+	return 0;
+}
+
+static unsigned int zstd_compress_bound(unsigned int in_bytes)
+{
+	return ZSTD_compressBound(in_bytes);
+}
+
+static bool zstd_is_supported(const char *name, const char *version)
+{
+	if (!name)
+		return false;
+	if (strcmp(name, __ZSTD_NAME))
+		return false;
+
+	return true;
+}
+
+int tracecmd_zstd_init(void)
+{
+	int ret = 0;
+	size_t r;
+
+	ctx_c = ZSTD_createCCtx();
+	ctx_d = ZSTD_createDCtx();
+	if (!ctx_c || !ctx_d)
+		goto err;
+
+	r = ZSTD_CCtx_setParameter(ctx_c, ZSTD_c_contentSizeFlag, 0);
+	if (ZSTD_isError(r))
+		goto err;
+
+	ret = tracecmd_compress_proto_register(__ZSTD_NAME,
+					       ZSTD_versionString(),
+					       __ZSTD_WEIGTH,
+					       zstd_compress,
+					       zstd_decompress,
+					       zstd_compress_bound,
+					       zstd_is_supported);
+	if (!ret)
+		return 0;
+err:
+	ZSTD_freeCCtx(ctx_c);
+	ZSTD_freeDCtx(ctx_d);
+	ctx_c = NULL;
+	ctx_d = NULL;
+	if (ret < 0)
+		return ret;
+	return -1;
+}
diff --git a/lib/trace-cmd/trace-compress.c b/lib/trace-cmd/trace-compress.c
index 210d58b602577..a14155a321f45 100644
--- a/lib/trace-cmd/trace-compress.c
+++ b/lib/trace-cmd/trace-compress.c
@@ -390,6 +390,9 @@  void tracecmd_compress_init(void)
 #ifdef HAVE_ZLIB
 	tracecmd_zlib_init();
 #endif
+#ifdef HAVE_ZSTD
+	tracecmd_zstd_init();
+#endif
 }
 
 static struct compress_proto *compress_proto_select(void)
diff --git a/tracecmd/Makefile b/tracecmd/Makefile
index 56742f0afa2f8..355f04723ad7c 100644
--- a/tracecmd/Makefile
+++ b/tracecmd/Makefile
@@ -49,7 +49,7 @@  all_objs := $(sort $(ALL_OBJS))
 all_deps := $(all_objs:$(bdir)/%.o=$(bdir)/.%.d)
 
 CONFIG_INCLUDES =
-CONFIG_LIBS	= -lrt -lpthread $(TRACE_LIBS)
+CONFIG_LIBS	= -lrt -lpthread $(TRACE_LIBS) $(LIBZSTD_LDLAGS)
 CONFIG_FLAGS	=
 
 ifeq ($(ZLIB_INSTALLED), 1)