diff mbox series

[2/5] histograms: traceeval initialize

Message ID 20230728190515.23088-2-stevie.6strings@gmail.com (mailing list archive)
State Superseded
Headers show
Series [1/5] histograms: initial histograms interface | expand

Commit Message

Stevie Alvarez July 28, 2023, 7:04 p.m. UTC
From: "Stevie Alvarez (Google)" <stevie.6strings@gmail.com>

traceeval_init() creates a new struct traceeval instance with regards
to the struct traceeval_type array arguments keys and vals. These arrays
define the structure of the histogram, with each describing the expected
structure of inserted arrays of union traceeval_data. The keys and vals
arguments are copied on the heap to ensure that the struct traceeval
instance has access to the definition regardless of how the user
initialized keys and vals.

Signed-off-by: Stevie Alvarez (Google) <stevie.6strings@gmail.com>
---
 Makefile         |   2 +-
 src/Makefile     |   1 +
 src/histograms.c | 285 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 287 insertions(+), 1 deletion(-)
 create mode 100644 src/histograms.c

Comments

Steven Rostedt July 28, 2023, 10:03 p.m. UTC | #1
On Fri, 28 Jul 2023 15:04:37 -0400
Stevie Alvarez <stevie.6strings@gmail.com> wrote:

> From: "Stevie Alvarez (Google)" <stevie.6strings@gmail.com>
> 
> traceeval_init() creates a new struct traceeval instance with regards
> to the struct traceeval_type array arguments keys and vals. These arrays
> define the structure of the histogram, with each describing the expected
> structure of inserted arrays of union traceeval_data. The keys and vals
> arguments are copied on the heap to ensure that the struct traceeval
> instance has access to the definition regardless of how the user
> initialized keys and vals.
> 
> Signed-off-by: Stevie Alvarez (Google) <stevie.6strings@gmail.com>
> ---
>  Makefile         |   2 +-
>  src/Makefile     |   1 +
>  src/histograms.c | 285 +++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 287 insertions(+), 1 deletion(-)
>  create mode 100644 src/histograms.c
> 
> diff --git a/Makefile b/Makefile
> index 4a24d5a..3ea051c 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -172,7 +172,7 @@ libs: $(LIBRARY_A) $(LIBRARY_SO)
>  
>  VALGRIND = $(shell which valgrind)
>  UTEST_DIR = utest
> -UTEST_BINARY = trace-utest
> +UTEST_BINARY = eval-utest
>  
>  test: force $(LIBRARY_STATIC)
>  ifneq ($(CUNIT_INSTALLED),1)
> diff --git a/src/Makefile b/src/Makefile
> index b4b6e52..b32a389 100644
> --- a/src/Makefile
> +++ b/src/Makefile
> @@ -4,6 +4,7 @@ include $(src)/scripts/utils.mk
>  
>  OBJS =
>  OBJS += trace-analysis.o
> +OBJS += histograms.o
>  
>  OBJS := $(OBJS:%.o=$(bdir)/%.o)
>  
> diff --git a/src/histograms.c b/src/histograms.c
> new file mode 100644
> index 0000000..13830e4
> --- /dev/null
> +++ b/src/histograms.c
> @@ -0,0 +1,285 @@
> +
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * libtraceeval histogram interface implementation.
> + *
> + * Copyright (C) 2023 Google Inc, Stevie Alvarez <stevie.6strings@gmail.com>
> + */
> +
> +#include <histograms.h>
> +#include <stdio.h>
> +#include <stdbool.h>
> +#include <string.h>
> +#include <stdarg.h>

Nit, I find it cleaner to have an upside down x-mas tree style of
header includes. This is more of a guideline than anything else.

#include <stdbool.h>
#include <string.h>
#include <strarg.h>
#include <stdio.h>

Just looks cleaner.

Oh, and for the header that is specific for this library, that goes
afterward. Sometimes with a space between it and the other includes.

#include <histograms.h>

And since "histograms" is such a generic term, and there may actually
be a library that has that name, let's make it:

#include <traceeval-hist.h>

for now (it may replace the normal one later).


> +
> +/**
> + * Iterate over @keys, which should be an array of struct traceeval_type's,
> + * until reaching an instance of type TRACEEVAL_TYPE_NONE.
> + * @i should be a declared integer type.
> + */
> +#define for_each_key(i, keys)	\
> +	for (i = 0; (keys)[(i)].type != TRACEEVAL_TYPE_NONE; (i)++)

Nit, the brackets in [(i)] make the parenthesis unneeded. That is, [i]
should always work. But I may be wrong ;-) I just can't think of
something that could make it fail.

> +
> +/** A key-value pair */

Again, just use "/* " and not "/** "

> +struct entry {
> +	union traceeval_data	*keys;
> +	union traceeval_data	*vals;
> +};
> +
> +/** A table of key-value entries */
> +struct hist_table {
> +	struct entry	*map;
> +	size_t		nr_entries;
> +};
> +
> +/** Histogram */
> +struct traceeval {
> +	struct traceeval_type		*def_keys;
> +	struct traceeval_type		*def_vals;

The "def" here keeps making me think it's "default" when it is
"defined". Let's call it "key_types" and "val_types".

That will make it much more obvious to what they are.

Also, I think it's a good idea that we keep track of the size. And then
we don't even need to keep the NONE around.

	size_t				nr_key_types;
	size_t				nr_val_types;

> +	struct hist_table		*hist;
> +};
> +
> +/** Iterate over results of histogram */
> +struct traceeval_iterator {};  // TODO

Looking at this series, it's better to not include these TODOs, and
just add the functions in the later patches.

If a function you add calls one of these function, then you can add the
stub functions. But you don't need to add the stubs if nothing is
referencing them.

> +
> +/**

If you use the kerneldoc comment format, do all the real formatting. That is:

/**
 * function_name - one line desrciption
 * @parameter1: Describe parameter 1
 * @parameter2: Describe parameter 2
 *
 * Description of the body.
 *
 * If it returns a value, say what it returns an the end.
 */

> + * Print error message.
> + * Additional arguments are used with respect to fmt.
> + */
> +static void print_err(const char *fmt, ...)
> +{
> +	va_list ap;
> +
> +	va_start(ap, fmt);
> +	vfprintf(stderr, fmt, ap);
> +	va_end(ap);
> +	fprintf(stderr, "\n");
> +}
> +
> +// TODO
> +int traceeval_compare(struct traceeval *orig, struct traceeval *copy)
> +{
> +	return -1;
> +}
> +
> +/**

Not, static functions do not need to have the kernel doc layout (unless
you want to write one). They can just be a brief description. But then
use "/* " and not "/** ".

> + * Resize a struct traceeval_type array to a size of @size + 1.
> + *
> + * Returns a pointer to the resized array, or NULL if the provided pointer was
> + * freed to due lack of memory.

I would stress that this frees @defs.

 * Returns the pointer to the resized array or NULL if it failed to
 * allocate. On failure, it will free @defs and its contents.

> + */
> +static struct traceeval_type *type_realloc(struct traceeval_type *defs,
> +		size_t size)
> +{
> +	struct traceeval_type *tmp_defs = NULL;

Do not need to initialize it to NULL if you assign it right afterward.

Add a space between the declarations and the code.

> +	tmp_defs = realloc(defs,
> +			(size + 1) * sizeof(struct traceeval_type));
> +	if (!tmp_defs)
> +		goto fail_type_realloc;
> +	return tmp_defs;

Since there's only one error condition and it's right before the
return, remove the label and just do:

	if (tmp_defs)
		return tmp_defs;

	/* Failure, clean up and free defs */

Although, to be safe, I think we should make sure that the newly
allocated memory is zeroed out.

	if (tmp_defs) {
		memset(&tmp_defs[size], 0, sizeof(tmp_defs[size]));
		return tmp_defs;
	}

> +
> +fail_type_realloc:
> +	for (int i = 0; i < size; i++) {

Just FYI, my personal preference is that I always add the "int i" at
the top of the function. But it has recently been declared in the Linux
kernel community that, declaring iterator variables in loops is now
deemed OK to do (as you did above). So I need to start getting use to
this. In other words, keep the above ;-)

> +		if (defs[i].name)
> +			free(defs[i].name);
> +	}
> +	free(defs);
> +	return NULL;
> +}
> +
> +/**
> + * Clone traceeval_type array @defs to the heap. Must be terminated with
> + * an instance of type TRACEEVAL_TYPE_NONE.
> + * Returns NULL if @defs is NULL, or a name is not null terminated.
> + */
> +static struct traceeval_type *type_alloc(const struct traceeval_type *defs)

Let's change this to:

ssize_t type_alloc(const struct traceeval_type *defs, struct traceeval_type **copy)

and return the number of elements and set copy to the copied types.

> +{
> +	struct traceeval_type *new_defs = NULL;
> +	char *name;
> +	size_t size = 0;

Note, use upside-down x-mas tree format for the above too:

	struct traceeval_type *new_defs = NULL;
	size_t size = 0;
	char *name;

I'm thinking we should just scan the array first to get the size. Then
we don't even need the above realloc. We just allocate once after we
know the size.

Again, we should just ignore the NONE type. No need to copy it.

> +
> +	// Empty def is represented with single TRACEEVAL_TYPE_NONE
> +	if (defs == NULL) {
> +		if (!(new_defs = calloc(1, sizeof(struct traceeval_type))))
> +			goto fail_type_alloc;
> +		new_defs->type = TRACEEVAL_TYPE_NONE;
> +		return new_defs;
> +	}

That is, we can do:

	for (size = 0; defs && defs[size].type != TRACEEVAL_TYE_NONE; size++)
		;

	if (size)
		new_defs = calloc(size, sizeof(*new_defs));

Then do the below loop with;

	for (int i; i < size; i++) {


> +
> +	for_each_key(size, defs) {

I just realized if we keep track of the number of keys/vals, we don't
need the for_each_key() macro.

> +		// Resize heap defs and clone
> +		new_defs = type_realloc(new_defs, size);
> +		if (!new_defs)
> +			goto fail_type_alloc;

So we could get rid of the above realloc.

> +
> +		// copy current def data to new_def
> +		new_defs[size] = defs[size];
> +		new_defs[size].name = NULL;
> +		// copy name to heap if it's not NULL or type NONE
> +		if (defs[size].type != TRACEEVAL_TYPE_NONE) {
> +			name = NULL;
> +			if (!defs[size].name)
> +				goto fail_type_alloc_name;
> +
> +			name = strdup(defs[size].name);
> +			if (!name)
> +				goto fail_type_alloc_name;
> +			new_defs[size].name = name;
> +		}
> +	}
> +
> +	// append array terminator
> +	new_defs = type_realloc(new_defs, size);
> +	if (!new_defs)
> +		goto fail_type_alloc;
> +	new_defs[size].type = TRACEEVAL_TYPE_NONE;

And we can get rid of the above too.

> +
> +	return new_defs;

Add a space between returns and failure labels.

> +fail_type_alloc:
> +	if (defs[size].name)
> +		print_err("failed to allocate memory for traceeval_type %s", defs[size].name);
> +	print_err("failed to allocate memory for traceeval_type index %zu", size);
> +	return NULL;
> +
> +fail_type_alloc_name:
> +	if (defs[size].name)
> +		print_err("failed to allocate name for traceeval_type %s", defs[size].name);
> +
> +	print_err("failed to allocate name for traceeval_type index %zu", size);
> +	return NULL;
> +}
> +
> +/**
> + * Create a new histogram over the given keys and values.
> + */
> +struct traceeval *traceeval_init(const struct traceeval_type *keys,
> +				 const struct traceeval_type *vals)
> +{
> +	struct traceeval *teval;
> +	char *err_msg;
> +	struct traceeval_type type = {
> +		.type = TRACEEVAL_TYPE_NONE
> +	};

Note, the above ordering is fine, as the upside-down x-mas tree format
does not apply to arrays that are defined.

Also, let's change the above to:

	struct traceeval_type none_type[] = {
		{ .type = TRACEEVAL_TYPE_NONE }
	};


Both the name change to "none_type" to be more descriptive of what it
is, and also make it an array so we don't need to pass the address
below. But it's really moot if we decide to not add NONE types.

> +
> +	if (!keys)
> +		return NULL;
> +
> +	if (keys->type == TRACEEVAL_TYPE_NONE) {
> +		err_msg = "keys cannot start with type TRACEEVAL_TYPE_NONE";
> +		goto fail_eval_init_unalloced;
> +	}
> +
> +	teval = calloc(1, sizeof(struct traceeval));

My personal preference is to use the variable for the size. I just find
it to be more robust and less error prone, especially if you
accidentally pick the wrong type.

	teval = calloc(1, sizeof(*teval));

> +	if (!teval) {
> +		err_msg = "failed to allocate memory for traceeval instance";
> +		goto fail_eval_init_unalloced;

BTW, you don't need to make the labels so verbose.

		goto fail_nofree;

would be good enough.

> +	}
> +
> +	teval->def_keys = type_alloc(keys);
> +	if (!teval->def_keys) {
> +		err_msg = "failed to allocate user defined keys";
> +		goto fail_eval_init;

and this just:

		goto fail;

> +	}
> +
> +	// if vals is NULL, alloc single type NONE

BTW, since I follow Linux kernel style, // comments are usually looked
down upon. It's better to use

	/* if vals is NULL then just alloc a single NONE type */

> +	if (vals)
> +		teval->def_vals = type_alloc(vals);
> +	else
> +		teval->def_vals = type_alloc(&type);

		teval->def_vals = type_alloc(none_type);

But here we would really just do:

		teval->nr_val_types = 0;

> +
> +	if (!teval->def_vals) {
> +		err_msg = "failed to allocate user defined values";
> +		goto fail_eval_init;
> +	}
> +
> +	teval->hist = calloc(1, sizeof(struct hist_table));
> +	if (!teval->hist) {
> +		err_msg = "failed to allocate memory for histogram";
> +		goto fail_eval_init;
> +	}
> +	teval->hist->nr_entries = 0;

The calloc() above makes the clearing of nr_entries redundant.

> +
> +	return teval;
> +fail_eval_init:
> +	traceeval_release(teval);
> +	/* fall-through */

This isn't a switch statement, no need for the above comment.

> +
> +fail_eval_init_unalloced:
> +	print_err(err_msg);
> +	return NULL;
> +}

Let's nuke all the below functions for this patch and just add the
functions in the later patches.

-- Steve


> +
> +// TODO
> +void traceeval_release(struct traceeval *teval)
> +{
> +
> +}
> +
> +// TODO
> +int traceeval_insert(struct traceeval *teval,
> +		     const union traceeval_data *keys,
> +		     const union traceeval_data *vals)
> +{
> +	return -1;
> +}
> +
> +// TODO
> +int traceeval_query(struct traceeval *teval,
> +		    const union traceeval_data *keys,
> +		    union traceeval_data * const *results)
> +{
> +	return 0;
> +}
> +
> +// TODO
> +int traceeval_find_key(struct traceeval *teval, const char *field)
> +{
> +	return -1;
> +}
> +
> +// TODO
> +int traceeval_find_val(struct traceeval *teval, const char *field)
> +{
> +	return -1;
> +}
> +
> +// TODO
> +void traceeval_results_release(struct traceeval *teval,
> +			       const union traceeval_data *results)
> +{
> +
> +}
> +
> +// TODO
> +struct traceeval_iterator *traceeval_iterator_get(struct traceeval *teval)
> +{
> +	return NULL;
> +}
> +
> +// TODO
> +int traceeval_iterator_sort(struct traceeval_iterator *iter,
> +			    const char *sort_field, int level, bool ascending)
> +{
> +	return -1;
> +}
> +
> +// TODO
> +int traceeval_iterator_next(struct traceeval_iterator *iter,
> +			    const union traceeval_data **keys)
> +{
> +	return 0;
> +}
> +
> +// TODO
> +void traceeval_keys_release(struct traceeval_iterator *iter,
> +			    const union traceeval_data *keys)
> +{
> +
> +}
> +
> +// TODO
> +int traceeval_stat(struct traceeval *teval, const union traceeval_data *keys,
> +		   const char *field, struct traceeval_stat *stat)
> +{
> +	return -1;
> +}
Ross Zwisler Aug. 2, 2023, 9:07 p.m. UTC | #2
On Fri, Jul 28, 2023 at 03:04:37PM -0400, Stevie Alvarez wrote:
> From: "Stevie Alvarez (Google)" <stevie.6strings@gmail.com>
> 
> traceeval_init() creates a new struct traceeval instance with regards
> to the struct traceeval_type array arguments keys and vals. These arrays
> define the structure of the histogram, with each describing the expected
> structure of inserted arrays of union traceeval_data. The keys and vals
> arguments are copied on the heap to ensure that the struct traceeval
> instance has access to the definition regardless of how the user
> initialized keys and vals.
> 
> Signed-off-by: Stevie Alvarez (Google) <stevie.6strings@gmail.com>
> ---
>  Makefile         |   2 +-
>  src/Makefile     |   1 +
>  src/histograms.c | 285 +++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 287 insertions(+), 1 deletion(-)
>  create mode 100644 src/histograms.c
> 
> diff --git a/Makefile b/Makefile
> index 4a24d5a..3ea051c 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -172,7 +172,7 @@ libs: $(LIBRARY_A) $(LIBRARY_SO)
>  
>  VALGRIND = $(shell which valgrind)
>  UTEST_DIR = utest
> -UTEST_BINARY = trace-utest
> +UTEST_BINARY = eval-utest
>  
>  test: force $(LIBRARY_STATIC)
>  ifneq ($(CUNIT_INSTALLED),1)
> diff --git a/src/Makefile b/src/Makefile
> index b4b6e52..b32a389 100644
> --- a/src/Makefile
> +++ b/src/Makefile
> @@ -4,6 +4,7 @@ include $(src)/scripts/utils.mk
>  
>  OBJS =
>  OBJS += trace-analysis.o
> +OBJS += histograms.o
>  
>  OBJS := $(OBJS:%.o=$(bdir)/%.o)
>  
> diff --git a/src/histograms.c b/src/histograms.c
> new file mode 100644
> index 0000000..13830e4
> --- /dev/null
> +++ b/src/histograms.c
> @@ -0,0 +1,285 @@
> +
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * libtraceeval histogram interface implementation.
> + *
> + * Copyright (C) 2023 Google Inc, Stevie Alvarez <stevie.6strings@gmail.com>
> + */
> +
> +#include <histograms.h>
> +#include <stdio.h>
> +#include <stdbool.h>
> +#include <string.h>
> +#include <stdarg.h>
> +
> +/**
> + * Iterate over @keys, which should be an array of struct traceeval_type's,
> + * until reaching an instance of type TRACEEVAL_TYPE_NONE.
> + * @i should be a declared integer type.
> + */
> +#define for_each_key(i, keys)	\
> +	for (i = 0; (keys)[(i)].type != TRACEEVAL_TYPE_NONE; (i)++)
> +
> +/** A key-value pair */
> +struct entry {
> +	union traceeval_data	*keys;
> +	union traceeval_data	*vals;
> +};
> +
> +/** A table of key-value entries */
> +struct hist_table {
> +	struct entry	*map;
> +	size_t		nr_entries;
> +};
> +
> +/** Histogram */
> +struct traceeval {
> +	struct traceeval_type		*def_keys;
> +	struct traceeval_type		*def_vals;
> +	struct hist_table		*hist;
> +};

Should this be called something else?  'struct traceeval' is already defined
in src/trace-analysis.c in this same codebase, and the other def is used all
over the place in include/traceeval.h.

> +
> +/** Iterate over results of histogram */
> +struct traceeval_iterator {};  // TODO
> +
> +/**
> + * Print error message.
> + * Additional arguments are used with respect to fmt.
> + */
> +static void print_err(const char *fmt, ...)
> +{
> +	va_list ap;
> +
> +	va_start(ap, fmt);
> +	vfprintf(stderr, fmt, ap);
> +	va_end(ap);
> +	fprintf(stderr, "\n");
> +}
> +
> +// TODO
> +int traceeval_compare(struct traceeval *orig, struct traceeval *copy)
> +{
> +	return -1;
> +}
> +
> +/**
> + * Resize a struct traceeval_type array to a size of @size + 1.
> + *
> + * Returns a pointer to the resized array, or NULL if the provided pointer was
> + * freed to due lack of memory.
> + */
> +static struct traceeval_type *type_realloc(struct traceeval_type *defs,

Probably should have been feedback for patch 1, but 'traceeval_type' also has
a collision.  'struct traceeval_type' is defined in include/histograms.h, and
'enum traceeval_type' is defined in include/libtraceeval.h.

IMO even if the compiler lets us do this, we shouldn't because it'll confuse
cscope/LSMs/grep and people reading the code.

> +		size_t size)
> +{
> +	struct traceeval_type *tmp_defs = NULL;
> +	tmp_defs = realloc(defs,
> +			(size + 1) * sizeof(struct traceeval_type));
> +	if (!tmp_defs)
> +		goto fail_type_realloc;
> +	return tmp_defs;
> +
> +fail_type_realloc:
> +	for (int i = 0; i < size; i++) {
> +		if (defs[i].name)
> +			free(defs[i].name);

Because the extra entries returned by realloc() are uninitialized, I think you
risk trying to free 'name' values that are non-NULL but not real pointers.

Should you instead try to iterate from 0 until you find an entry with type
TRACEEVAL_TYPE_NONE?  Or do you guarantee elsewhere that all 'name' pointers
are either valid or NULL?  If so, it might be good to do that initialization
in this function so the free path makes sense in the same context and so new
users of this function don't violate that assumption.

> +	}
> +	free(defs);
> +	return NULL;
> +}
> +
> +/**
> + * Clone traceeval_type array @defs to the heap. Must be terminated with
> + * an instance of type TRACEEVAL_TYPE_NONE.
> + * Returns NULL if @defs is NULL, or a name is not null terminated.

It actually returns an single entry TRACEEVAL_TYPE_NONE list if @defs is NULL

> + */
> +static struct traceeval_type *type_alloc(const struct traceeval_type *defs)
> +{
> +	struct traceeval_type *new_defs = NULL;
> +	char *name;
> +	size_t size = 0;
> +
> +	// Empty def is represented with single TRACEEVAL_TYPE_NONE
> +	if (defs == NULL) {
> +		if (!(new_defs = calloc(1, sizeof(struct traceeval_type))))
> +			goto fail_type_alloc;
> +		new_defs->type = TRACEEVAL_TYPE_NONE;
> +		return new_defs;
> +	}
> +
> +	for_each_key(size, defs) {
> +		// Resize heap defs and clone
> +		new_defs = type_realloc(new_defs, size);
> +		if (!new_defs)
> +			goto fail_type_alloc;
> +
> +		// copy current def data to new_def
> +		new_defs[size] = defs[size];
> +		new_defs[size].name = NULL;
> +		// copy name to heap if it's not NULL or type NONE
> +		if (defs[size].type != TRACEEVAL_TYPE_NONE) {
> +			name = NULL;
> +			if (!defs[size].name)
> +				goto fail_type_alloc_name;
> +
> +			name = strdup(defs[size].name);
> +			if (!name)
> +				goto fail_type_alloc_name;
> +			new_defs[size].name = name;
> +		}
> +	}
> +
> +	// append array terminator
> +	new_defs = type_realloc(new_defs, size);
> +	if (!new_defs)
> +		goto fail_type_alloc;
> +	new_defs[size].type = TRACEEVAL_TYPE_NONE;
> +
> +	return new_defs;
> +fail_type_alloc:
> +	if (defs[size].name)
> +		print_err("failed to allocate memory for traceeval_type %s", defs[size].name);
> +	print_err("failed to allocate memory for traceeval_type index %zu", size);
> +	return NULL;
> +
> +fail_type_alloc_name:
> +	if (defs[size].name)
> +		print_err("failed to allocate name for traceeval_type %s", defs[size].name);
> +
> +	print_err("failed to allocate name for traceeval_type index %zu", size);
> +	return NULL;

These two error paths do the same actions (no extra frees or anything) and
just differ in text, so they can probably be collapsed into one.

> +}
> +
> +/**
> + * Create a new histogram over the given keys and values.
> + */
> +struct traceeval *traceeval_init(const struct traceeval_type *keys,
> +				 const struct traceeval_type *vals)
> +{
> +	struct traceeval *teval;
> +	char *err_msg;
> +	struct traceeval_type type = {
> +		.type = TRACEEVAL_TYPE_NONE
> +	};
> +
> +	if (!keys)
> +		return NULL;
> +
> +	if (keys->type == TRACEEVAL_TYPE_NONE) {
> +		err_msg = "keys cannot start with type TRACEEVAL_TYPE_NONE";
> +		goto fail_eval_init_unalloced;
> +	}
> +
> +	teval = calloc(1, sizeof(struct traceeval));
> +	if (!teval) {
> +		err_msg = "failed to allocate memory for traceeval instance";
> +		goto fail_eval_init_unalloced;
> +	}
> +
> +	teval->def_keys = type_alloc(keys);
> +	if (!teval->def_keys) {
> +		err_msg = "failed to allocate user defined keys";
> +		goto fail_eval_init;
> +	}
> +
> +	// if vals is NULL, alloc single type NONE
> +	if (vals)
> +		teval->def_vals = type_alloc(vals);
> +	else
> +		teval->def_vals = type_alloc(&type);

Because type_alloc(NULL) returns a single TRACEEVAL_TYPE_NONE entry, you can
collapse these two cases into just 

  teval->def_vals = type_alloc(vals);

and get rid of the 'type' local variable.

> +
> +	if (!teval->def_vals) {
> +		err_msg = "failed to allocate user defined values";
> +		goto fail_eval_init;
> +	}
> +
> +	teval->hist = calloc(1, sizeof(struct hist_table));

nit: Steven mentioned this too, but I think it's better to say
"sizeof(*teval->hist)".  This prevents the types from getting out of sync, say
if for example we changed the type of teval->hist.

You can see examples of this in the kernel:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/params.c#n639
I think this applies to all the places you are doing sizeof(struct X).

> +	if (!teval->hist) {
> +		err_msg = "failed to allocate memory for histogram";
> +		goto fail_eval_init;
> +	}
> +	teval->hist->nr_entries = 0;
> +
> +	return teval;
> +fail_eval_init:
> +	traceeval_release(teval);
> +	/* fall-through */
> +
> +fail_eval_init_unalloced:
> +	print_err(err_msg);
> +	return NULL;
> +}
Steven Rostedt Aug. 2, 2023, 9:39 p.m. UTC | #3
On Wed, 2 Aug 2023 15:07:21 -0600
Ross Zwisler <zwisler@google.com> wrote:

> > diff --git a/src/histograms.c b/src/histograms.c
> > new file mode 100644
> > index 0000000..13830e4
> > --- /dev/null
> > +++ b/src/histograms.c
> > @@ -0,0 +1,285 @@
> > +
> > +/* SPDX-License-Identifier: MIT */
> > +/*
> > + * libtraceeval histogram interface implementation.
> > + *
> > + * Copyright (C) 2023 Google Inc, Stevie Alvarez <stevie.6strings@gmail.com>
> > + */
> > +
> > +#include <histograms.h>
> > +#include <stdio.h>
> > +#include <stdbool.h>
> > +#include <string.h>
> > +#include <stdarg.h>
> > +
> > +/**
> > + * Iterate over @keys, which should be an array of struct traceeval_type's,
> > + * until reaching an instance of type TRACEEVAL_TYPE_NONE.
> > + * @i should be a declared integer type.
> > + */
> > +#define for_each_key(i, keys)	\
> > +	for (i = 0; (keys)[(i)].type != TRACEEVAL_TYPE_NONE; (i)++)
> > +
> > +/** A key-value pair */
> > +struct entry {
> > +	union traceeval_data	*keys;
> > +	union traceeval_data	*vals;
> > +};
> > +
> > +/** A table of key-value entries */
> > +struct hist_table {
> > +	struct entry	*map;
> > +	size_t		nr_entries;
> > +};
> > +
> > +/** Histogram */
> > +struct traceeval {
> > +	struct traceeval_type		*def_keys;
> > +	struct traceeval_type		*def_vals;
> > +	struct hist_table		*hist;
> > +};  
> 
> Should this be called something else?  'struct traceeval' is already defined
> in src/trace-analysis.c in this same codebase, and the other def is used all
> over the place in include/traceeval.h.

Yeah, we could call it something slightly different for now, but the goal
is that it will supersede the struct traceeval in trace-analysis.c, and the
code there will be removed.

We haven't hit version 1.0 yet because I hated the old API and until 1.0 is
reached, the API is allowed to change.

> 
> > +
> > +/** Iterate over results of histogram */
> > +struct traceeval_iterator {};  // TODO
> > +
> > +/**
> > + * Print error message.
> > + * Additional arguments are used with respect to fmt.
> > + */
> > +static void print_err(const char *fmt, ...)
> > +{
> > +	va_list ap;
> > +
> > +	va_start(ap, fmt);
> > +	vfprintf(stderr, fmt, ap);
> > +	va_end(ap);
> > +	fprintf(stderr, "\n");
> > +}
> > +
> > +// TODO
> > +int traceeval_compare(struct traceeval *orig, struct traceeval *copy)
> > +{
> > +	return -1;
> > +}
> > +
> > +/**
> > + * Resize a struct traceeval_type array to a size of @size + 1.
> > + *
> > + * Returns a pointer to the resized array, or NULL if the provided pointer was
> > + * freed to due lack of memory.
> > + */
> > +static struct traceeval_type *type_realloc(struct traceeval_type *defs,  
> 
> Probably should have been feedback for patch 1, but 'traceeval_type' also has
> a collision.  'struct traceeval_type' is defined in include/histograms.h, and
> 'enum traceeval_type' is defined in include/libtraceeval.h.
> 
> IMO even if the compiler lets us do this, we shouldn't because it'll confuse
> cscope/LSMs/grep and people reading the code.

Again, I think it may be fine, as we will eventually remove the old code.


> 
> > +		size_t size)
> > +{
> > +	struct traceeval_type *tmp_defs = NULL;
> > +	tmp_defs = realloc(defs,
> > +			(size + 1) * sizeof(struct traceeval_type));
> > +	if (!tmp_defs)
> > +		goto fail_type_realloc;
> > +	return tmp_defs;
> > +
> > +fail_type_realloc:
> > +	for (int i = 0; i < size; i++) {
> > +		if (defs[i].name)
> > +			free(defs[i].name);  
> 
> Because the extra entries returned by realloc() are uninitialized, I think you
> risk trying to free 'name' values that are non-NULL but not real pointers.
> 
> Should you instead try to iterate from 0 until you find an entry with type
> TRACEEVAL_TYPE_NONE?  Or do you guarantee elsewhere that all 'name' pointers
> are either valid or NULL?  If so, it might be good to do that initialization
> in this function so the free path makes sense in the same context and so new
> users of this function don't violate that assumption.

I believe I mentioned the same thing (or similar) in my reply.

-- Steve

> 
> > +	}
> > +	free(defs);
> > +	return NULL;
> > +}
> > +
> > +/**
> > + * Clone traceeval_type array @defs to the heap. Must be terminated with
> > + * an instance of type TRACEEVAL_TYPE_NONE.
> > + * Returns NULL if @defs is NULL, or a name is not null terminated.  
>
diff mbox series

Patch

diff --git a/Makefile b/Makefile
index 4a24d5a..3ea051c 100644
--- a/Makefile
+++ b/Makefile
@@ -172,7 +172,7 @@  libs: $(LIBRARY_A) $(LIBRARY_SO)
 
 VALGRIND = $(shell which valgrind)
 UTEST_DIR = utest
-UTEST_BINARY = trace-utest
+UTEST_BINARY = eval-utest
 
 test: force $(LIBRARY_STATIC)
 ifneq ($(CUNIT_INSTALLED),1)
diff --git a/src/Makefile b/src/Makefile
index b4b6e52..b32a389 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -4,6 +4,7 @@  include $(src)/scripts/utils.mk
 
 OBJS =
 OBJS += trace-analysis.o
+OBJS += histograms.o
 
 OBJS := $(OBJS:%.o=$(bdir)/%.o)
 
diff --git a/src/histograms.c b/src/histograms.c
new file mode 100644
index 0000000..13830e4
--- /dev/null
+++ b/src/histograms.c
@@ -0,0 +1,285 @@ 
+
+/* SPDX-License-Identifier: MIT */
+/*
+ * libtraceeval histogram interface implementation.
+ *
+ * Copyright (C) 2023 Google Inc, Stevie Alvarez <stevie.6strings@gmail.com>
+ */
+
+#include <histograms.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdarg.h>
+
+/**
+ * Iterate over @keys, which should be an array of struct traceeval_type's,
+ * until reaching an instance of type TRACEEVAL_TYPE_NONE.
+ * @i should be a declared integer type.
+ */
+#define for_each_key(i, keys)	\
+	for (i = 0; (keys)[(i)].type != TRACEEVAL_TYPE_NONE; (i)++)
+
+/** A key-value pair */
+struct entry {
+	union traceeval_data	*keys;
+	union traceeval_data	*vals;
+};
+
+/** A table of key-value entries */
+struct hist_table {
+	struct entry	*map;
+	size_t		nr_entries;
+};
+
+/** Histogram */
+struct traceeval {
+	struct traceeval_type		*def_keys;
+	struct traceeval_type		*def_vals;
+	struct hist_table		*hist;
+};
+
+/** Iterate over results of histogram */
+struct traceeval_iterator {};  // TODO
+
+/**
+ * Print error message.
+ * Additional arguments are used with respect to fmt.
+ */
+static void print_err(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	fprintf(stderr, "\n");
+}
+
+// TODO
+int traceeval_compare(struct traceeval *orig, struct traceeval *copy)
+{
+	return -1;
+}
+
+/**
+ * Resize a struct traceeval_type array to a size of @size + 1.
+ *
+ * Returns a pointer to the resized array, or NULL if the provided pointer was
+ * freed to due lack of memory.
+ */
+static struct traceeval_type *type_realloc(struct traceeval_type *defs,
+		size_t size)
+{
+	struct traceeval_type *tmp_defs = NULL;
+	tmp_defs = realloc(defs,
+			(size + 1) * sizeof(struct traceeval_type));
+	if (!tmp_defs)
+		goto fail_type_realloc;
+	return tmp_defs;
+
+fail_type_realloc:
+	for (int i = 0; i < size; i++) {
+		if (defs[i].name)
+			free(defs[i].name);
+	}
+	free(defs);
+	return NULL;
+}
+
+/**
+ * Clone traceeval_type array @defs to the heap. Must be terminated with
+ * an instance of type TRACEEVAL_TYPE_NONE.
+ * Returns NULL if @defs is NULL, or a name is not null terminated.
+ */
+static struct traceeval_type *type_alloc(const struct traceeval_type *defs)
+{
+	struct traceeval_type *new_defs = NULL;
+	char *name;
+	size_t size = 0;
+
+	// Empty def is represented with single TRACEEVAL_TYPE_NONE
+	if (defs == NULL) {
+		if (!(new_defs = calloc(1, sizeof(struct traceeval_type))))
+			goto fail_type_alloc;
+		new_defs->type = TRACEEVAL_TYPE_NONE;
+		return new_defs;
+	}
+
+	for_each_key(size, defs) {
+		// Resize heap defs and clone
+		new_defs = type_realloc(new_defs, size);
+		if (!new_defs)
+			goto fail_type_alloc;
+
+		// copy current def data to new_def
+		new_defs[size] = defs[size];
+		new_defs[size].name = NULL;
+		// copy name to heap if it's not NULL or type NONE
+		if (defs[size].type != TRACEEVAL_TYPE_NONE) {
+			name = NULL;
+			if (!defs[size].name)
+				goto fail_type_alloc_name;
+
+			name = strdup(defs[size].name);
+			if (!name)
+				goto fail_type_alloc_name;
+			new_defs[size].name = name;
+		}
+	}
+
+	// append array terminator
+	new_defs = type_realloc(new_defs, size);
+	if (!new_defs)
+		goto fail_type_alloc;
+	new_defs[size].type = TRACEEVAL_TYPE_NONE;
+
+	return new_defs;
+fail_type_alloc:
+	if (defs[size].name)
+		print_err("failed to allocate memory for traceeval_type %s", defs[size].name);
+	print_err("failed to allocate memory for traceeval_type index %zu", size);
+	return NULL;
+
+fail_type_alloc_name:
+	if (defs[size].name)
+		print_err("failed to allocate name for traceeval_type %s", defs[size].name);
+
+	print_err("failed to allocate name for traceeval_type index %zu", size);
+	return NULL;
+}
+
+/**
+ * Create a new histogram over the given keys and values.
+ */
+struct traceeval *traceeval_init(const struct traceeval_type *keys,
+				 const struct traceeval_type *vals)
+{
+	struct traceeval *teval;
+	char *err_msg;
+	struct traceeval_type type = {
+		.type = TRACEEVAL_TYPE_NONE
+	};
+
+	if (!keys)
+		return NULL;
+
+	if (keys->type == TRACEEVAL_TYPE_NONE) {
+		err_msg = "keys cannot start with type TRACEEVAL_TYPE_NONE";
+		goto fail_eval_init_unalloced;
+	}
+
+	teval = calloc(1, sizeof(struct traceeval));
+	if (!teval) {
+		err_msg = "failed to allocate memory for traceeval instance";
+		goto fail_eval_init_unalloced;
+	}
+
+	teval->def_keys = type_alloc(keys);
+	if (!teval->def_keys) {
+		err_msg = "failed to allocate user defined keys";
+		goto fail_eval_init;
+	}
+
+	// if vals is NULL, alloc single type NONE
+	if (vals)
+		teval->def_vals = type_alloc(vals);
+	else
+		teval->def_vals = type_alloc(&type);
+
+	if (!teval->def_vals) {
+		err_msg = "failed to allocate user defined values";
+		goto fail_eval_init;
+	}
+
+	teval->hist = calloc(1, sizeof(struct hist_table));
+	if (!teval->hist) {
+		err_msg = "failed to allocate memory for histogram";
+		goto fail_eval_init;
+	}
+	teval->hist->nr_entries = 0;
+
+	return teval;
+fail_eval_init:
+	traceeval_release(teval);
+	/* fall-through */
+
+fail_eval_init_unalloced:
+	print_err(err_msg);
+	return NULL;
+}
+
+// TODO
+void traceeval_release(struct traceeval *teval)
+{
+
+}
+
+// TODO
+int traceeval_insert(struct traceeval *teval,
+		     const union traceeval_data *keys,
+		     const union traceeval_data *vals)
+{
+	return -1;
+}
+
+// TODO
+int traceeval_query(struct traceeval *teval,
+		    const union traceeval_data *keys,
+		    union traceeval_data * const *results)
+{
+	return 0;
+}
+
+// TODO
+int traceeval_find_key(struct traceeval *teval, const char *field)
+{
+	return -1;
+}
+
+// TODO
+int traceeval_find_val(struct traceeval *teval, const char *field)
+{
+	return -1;
+}
+
+// TODO
+void traceeval_results_release(struct traceeval *teval,
+			       const union traceeval_data *results)
+{
+
+}
+
+// TODO
+struct traceeval_iterator *traceeval_iterator_get(struct traceeval *teval)
+{
+	return NULL;
+}
+
+// TODO
+int traceeval_iterator_sort(struct traceeval_iterator *iter,
+			    const char *sort_field, int level, bool ascending)
+{
+	return -1;
+}
+
+// TODO
+int traceeval_iterator_next(struct traceeval_iterator *iter,
+			    const union traceeval_data **keys)
+{
+	return 0;
+}
+
+// TODO
+void traceeval_keys_release(struct traceeval_iterator *iter,
+			    const union traceeval_data *keys)
+{
+
+}
+
+// TODO
+int traceeval_stat(struct traceeval *teval, const union traceeval_data *keys,
+		   const char *field, struct traceeval_stat *stat)
+{
+	return -1;
+}