diff mbox series

[3/4] Sample list-object-filter extensions

Message ID 8e0d11ea53a080e8212768f370fb8f05eaded312.1630885899.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Compile-time extensions for list-object-filter | expand

Commit Message

Andrew Olsen Sept. 5, 2021, 11:51 p.m. UTC
From: Andrew Olsen <andrew.olsen@koordinates.com>

Basic filter extension example which filters to a random subset of
blobs, and another example which shows how to do the same in C++ and
how to link in another library required by a filter extension.
Documentation changes follow.

Signed-off-by: Andrew Olsen <andrew.olsen@koordinates.com>
---
 contrib/filter-extensions/rand/.gitignore     |   2 +
 contrib/filter-extensions/rand/Makefile       |  28 +++++
 contrib/filter-extensions/rand/rand.c         | 103 ++++++++++++++++++
 contrib/filter-extensions/rand_cpp/.gitignore |   2 +
 contrib/filter-extensions/rand_cpp/Makefile   |  34 ++++++
 .../rand_cpp/adapter_functions.c              |   6 +
 .../rand_cpp/adapter_functions.h              |  10 ++
 contrib/filter-extensions/rand_cpp/rand.cpp   | 103 ++++++++++++++++++
 8 files changed, 288 insertions(+)
 create mode 100644 contrib/filter-extensions/rand/.gitignore
 create mode 100644 contrib/filter-extensions/rand/Makefile
 create mode 100644 contrib/filter-extensions/rand/rand.c
 create mode 100644 contrib/filter-extensions/rand_cpp/.gitignore
 create mode 100644 contrib/filter-extensions/rand_cpp/Makefile
 create mode 100644 contrib/filter-extensions/rand_cpp/adapter_functions.c
 create mode 100644 contrib/filter-extensions/rand_cpp/adapter_functions.h
 create mode 100644 contrib/filter-extensions/rand_cpp/rand.cpp
diff mbox series

Patch

diff --git a/contrib/filter-extensions/rand/.gitignore b/contrib/filter-extensions/rand/.gitignore
new file mode 100644
index 00000000000..9eca6c88cf2
--- /dev/null
+++ b/contrib/filter-extensions/rand/.gitignore
@@ -0,0 +1,2 @@ 
+*.a
+*.o
diff --git a/contrib/filter-extensions/rand/Makefile b/contrib/filter-extensions/rand/Makefile
new file mode 100644
index 00000000000..267221ee952
--- /dev/null
+++ b/contrib/filter-extensions/rand/Makefile
@@ -0,0 +1,28 @@ 
+# Run this via `FILTER_EXTENSIONS=contrib/filter-extensions/rand/rand.a make`
+# from the main git directory. That way we inherit useful variables.
+
+ifneq ($(findstring s,$(MAKEFLAGS)),s)
+ifndef V
+	QUIET_CC       = @echo '   ' CC $@;
+	QUIET_AR       = @echo '   ' AR $@;
+endif
+endif
+
+FILTER_STATIC_LIB = rand.a
+
+all: $(FILTER_STATIC_LIB)
+ifeq ($(MAKELEVEL),0)
+	$(error "Run via parent git make")
+endif
+	@:
+
+$(FILTER_STATIC_LIB): rand.o
+	$(QUIET_AR)$(AR) $(ARFLAGS) $@ $^
+
+rand.o: rand.c
+	$(QUIET_CC)$(CC) -c $(ALL_CFLAGS) $<
+
+clean:
+	$(RM) $(FILTER_STATIC_LIB) rand.o
+
+.PHONY: all clean
diff --git a/contrib/filter-extensions/rand/rand.c b/contrib/filter-extensions/rand/rand.c
new file mode 100644
index 00000000000..af153709345
--- /dev/null
+++ b/contrib/filter-extensions/rand/rand.c
@@ -0,0 +1,103 @@ 
+#include "../../../git-compat-util.h"
+#include "../../../list-objects-filter-extensions.h"
+#include "../../../object.h"
+#include "../../../hash.h"
+#include "../../../trace.h"
+
+
+static struct trace_key trace_filter = TRACE_KEY_INIT(FILTER);
+
+struct rand_context {
+	int percentageMatch;
+	int matchCount;
+	int blobCount;
+	int treeCount;
+	uint64_t started_at;
+};
+
+static int rand_init(
+	const struct repository *r,
+	const char *filter_arg,
+	void **context)
+{
+	struct rand_context *ctx = calloc(1, sizeof(struct rand_context));
+
+	ctx->percentageMatch = atoi(filter_arg);
+	if (ctx->percentageMatch > 100 || ctx->percentageMatch < 0) {
+	fprintf(stderr, "filter-rand: warning: invalid match %%: %s\n",
+		filter_arg);
+	ctx->percentageMatch = 1;  // default 1%
+	}
+	fprintf(stderr, "filter-rand: matching %d%%\n", ctx->percentageMatch);
+	ctx->started_at = getnanotime();
+	(*context) = ctx;
+
+	return 0;
+}
+
+static enum list_objects_filter_result rand_filter_object(
+	const struct repository *r,
+	const enum list_objects_filter_situation filter_situation,
+	struct object *obj,
+	const char *pathname,
+	const char *filename,
+	enum list_objects_filter_omit *omit,
+	void *context)
+{
+	struct rand_context *ctx = (struct rand_context*)(context);
+
+	if ((ctx->blobCount + ctx->treeCount + 1) % 100000 == 0) {
+		fprintf(stderr, "filter-rand: %d...\n",
+			(ctx->blobCount + ctx->treeCount + 1));
+	}
+
+	switch (filter_situation) {
+	default:
+		die("filter-rand: unknown filter_situation: %d", filter_situation);
+
+	case LOFS_BEGIN_TREE:
+		ctx->treeCount++;
+		/* always include all tree objects */
+		return LOFR_MARK_SEEN | LOFR_DO_SHOW;
+
+	case LOFS_END_TREE:
+		return LOFR_ZERO;
+
+	case LOFS_BLOB:
+		ctx->blobCount++;
+
+		if ((rand() % 100) < ctx->percentageMatch) {
+			ctx->matchCount++;
+			trace_printf_key(&trace_filter,
+				"match: %s %s\n",
+				oid_to_hex(&obj->oid),
+				pathname
+			);
+			return LOFR_MARK_SEEN | LOFR_DO_SHOW;
+		} else {
+			*omit = LOFO_OMIT;
+			return LOFR_MARK_SEEN; /* hard omit */
+		}
+	}
+}
+
+static void rand_free(const struct repository *r, void *context)
+{
+	struct rand_context *ctx = (struct rand_context*)(context);
+	double elapsed = (getnanotime() - ctx->started_at)/1E9;
+	int count = ctx->blobCount + ctx->treeCount;
+
+	fprintf(stderr, "filter-rand: done: count=%d (blob=%d tree=%d) "
+		"matched=%d elapsed=%fs rate=%0.1f/s average=%0.1fus\n",
+		count, ctx->blobCount, ctx->treeCount, ctx->matchCount,
+		elapsed, count/elapsed, elapsed/count*1E6);
+
+	free(ctx);
+}
+
+const struct filter_extension filter_extension_rand = {
+	"rand",
+	&rand_init,
+	&rand_filter_object,
+	&rand_free,
+};
diff --git a/contrib/filter-extensions/rand_cpp/.gitignore b/contrib/filter-extensions/rand_cpp/.gitignore
new file mode 100644
index 00000000000..9eca6c88cf2
--- /dev/null
+++ b/contrib/filter-extensions/rand_cpp/.gitignore
@@ -0,0 +1,2 @@ 
+*.a
+*.o
diff --git a/contrib/filter-extensions/rand_cpp/Makefile b/contrib/filter-extensions/rand_cpp/Makefile
new file mode 100644
index 00000000000..278121e3d5a
--- /dev/null
+++ b/contrib/filter-extensions/rand_cpp/Makefile
@@ -0,0 +1,34 @@ 
+# Run this via `FILTER_EXTENSIONS=contrib/filter-extensions/rand_cpp/rand_cpp.a make`
+# from the main git directory. That way we inherit useful variables.
+
+ifneq ($(findstring s,$(MAKEFLAGS)),s)
+ifndef V
+	QUIET_CC       = @echo '   ' CC $@;
+	QUIET_CXX       = @echo '   ' CXX $@;
+	QUIET_AR       = @echo '   ' AR $@;
+endif
+endif
+
+FILTER_STATIC_LIB = rand_cpp.a
+
+ALL_CXXFLAGS += -std=c++11
+
+all: $(FILTER_STATIC_LIB)
+ifeq ($(MAKELEVEL),0)
+	$(error "Run via parent git make")
+endif
+	@:
+
+$(FILTER_STATIC_LIB): rand.o adapter_functions.o
+	$(QUIET_AR)$(AR) $(ARFLAGS) $@ $^
+
+rand.o: rand.cpp
+	$(QUIET_CXX)$(CXX) -c $(ALL_CFLAGS) $(ALL_CXXFLAGS) $<
+
+adapter_functions.o: adapter_functions.c
+	$(QUIET_CC)$(CC) -c $(ALL_CFLAGS) $<
+
+clean:
+	$(RM) $(FILTER_STATIC_LIB) rand.o
+
+.PHONY: all clean
diff --git a/contrib/filter-extensions/rand_cpp/adapter_functions.c b/contrib/filter-extensions/rand_cpp/adapter_functions.c
new file mode 100644
index 00000000000..0d9d2a2aa96
--- /dev/null
+++ b/contrib/filter-extensions/rand_cpp/adapter_functions.c
@@ -0,0 +1,6 @@ 
+#include "../../../git-compat-util.h"
+#include "../../../object.h"
+
+char *obj_to_hex_oid(struct object *obj) {
+    return oid_to_hex(&obj->oid);
+}
diff --git a/contrib/filter-extensions/rand_cpp/adapter_functions.h b/contrib/filter-extensions/rand_cpp/adapter_functions.h
new file mode 100644
index 00000000000..1150c21a258
--- /dev/null
+++ b/contrib/filter-extensions/rand_cpp/adapter_functions.h
@@ -0,0 +1,10 @@ 
+#ifndef RAND_CPP_ADAPTER_FUNCTIONS_H
+#define RAND_CPP_ADAPTER_FUNCTIONS_H
+
+struct object;
+
+uint64_t getnanotime(void);
+
+char *obj_to_hex_oid(struct object *obj);
+
+#endif /* RAND_CPP_ADAPTER_FUNCTIONS_H */
diff --git a/contrib/filter-extensions/rand_cpp/rand.cpp b/contrib/filter-extensions/rand_cpp/rand.cpp
new file mode 100644
index 00000000000..cb608d14ed9
--- /dev/null
+++ b/contrib/filter-extensions/rand_cpp/rand.cpp
@@ -0,0 +1,103 @@ 
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+
+#include <time.h>
+
+extern "C" {
+	#include "../../../list-objects-filter-extensions.h"
+	#include "adapter_functions.h"
+}
+
+namespace {
+
+struct rand_context {
+	int percentageMatch = 0;
+	int matchCount = 0;
+	int blobCount = 0;
+	int treeCount = 0;
+	uint64_t started_at = 0;
+};
+
+static int rand_init(
+	const struct repository *r,
+	const char *filter_arg,
+	void **context)
+{
+	struct rand_context *ctx = new rand_context();
+
+	ctx->percentageMatch = atoi(filter_arg);
+	if (ctx->percentageMatch > 100 || ctx->percentageMatch < 0) {
+		std::cerr << "filter-rand-cpp: warning: invalid match %: " << filter_arg << "\n";
+		ctx->percentageMatch = 1;  // default 1%
+	}
+	std::cerr << "filter-rand-cpp: matching " << ctx->percentageMatch << "%\n";
+	ctx->started_at = getnanotime();
+
+	return 0;
+}
+
+enum list_objects_filter_result rand_filter_object(
+	const struct repository *r,
+	const enum list_objects_filter_situation filter_situation,
+	struct object *obj,
+	const char *pathname,
+	const char *filename,
+	enum list_objects_filter_omit *omit,
+	void *context)
+{
+	struct rand_context *ctx = static_cast<struct rand_context*>(context);
+
+	if ((ctx->blobCount + ctx->treeCount + 1) % 100000 == 0) {
+		std::cerr << "filter-rand-cpp: " << (ctx->blobCount + ctx->treeCount + 1) << "...\n";
+	}
+	switch (filter_situation) {
+	default:
+		std::cerr << "filter-rand-cpp: unknown filter_situation: " << filter_situation << "\n";
+		abort();
+
+	case LOFS_BEGIN_TREE:
+		ctx->treeCount++;
+		/* always include all tree objects */
+		return static_cast<list_objects_filter_result>(LOFR_MARK_SEEN | LOFR_DO_SHOW);
+
+	case LOFS_END_TREE:
+		return LOFR_ZERO;
+
+	case LOFS_BLOB:
+		ctx->blobCount++;
+
+		if ((rand() % 100) < ctx->percentageMatch) {
+			ctx->matchCount++;
+			std::cout << "match: " << obj_to_hex_oid(obj) << pathname << "\n";
+			return static_cast<list_objects_filter_result>(LOFR_MARK_SEEN | LOFR_DO_SHOW);
+		} else {
+			*omit = LOFO_OMIT;
+			return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
+		}
+	}
+}
+
+void rand_free(const struct repository *r, void *context) {
+	struct rand_context *ctx = static_cast<struct rand_context*>(context);
+	double elapsed = (getnanotime() - ctx->started_at)/1E9;
+	int count = ctx->blobCount + ctx->treeCount;
+
+	std::cerr << "filter-rand-cpp: done: count=" << count
+		<< " (blob=" << ctx->blobCount << " tree=" << ctx->treeCount << ")"
+		<< " matched=" << ctx->matchCount
+		<< " elapsed=" << elapsed << "s"
+		<< " rate=" << count/elapsed << "/s"
+		<< " average=" << elapsed/count*1E6 << "us\n";
+
+	delete ctx;
+}
+
+} // namespace
+
+extern const struct filter_extension filter_extension_rand_cpp = {
+	"rand_cpp",
+	&rand_init,
+	&rand_filter_object,
+	&rand_free,
+};