diff mbox series

[v6,04/20] reftable: utility functions

Message ID df8003cb9a7d9e017d358251a2d22c0e72454e03.1618255552.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series reftable library | expand

Commit Message

Han-Wen Nienhuys April 12, 2021, 7:25 p.m. UTC
From: Han-Wen Nienhuys <hanwen@google.com>

This commit provides basic utility classes for the reftable library.

Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
Helped-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 Makefile                            |  25 +++++-
 contrib/buildsystems/CMakeLists.txt |  14 ++-
 reftable/basics.c                   | 128 ++++++++++++++++++++++++++++
 reftable/basics.h                   |  60 +++++++++++++
 reftable/basics_test.c              |  98 +++++++++++++++++++++
 reftable/publicbasics.c             |  58 +++++++++++++
 reftable/reftable-malloc.h          |  18 ++++
 reftable/reftable-tests.h           |  22 +++++
 reftable/system.h                   |  32 +++++++
 reftable/test_framework.c           |  23 +++++
 reftable/test_framework.h           |  53 ++++++++++++
 t/helper/test-reftable.c            |   9 ++
 t/helper/test-tool.c                |   3 +-
 t/helper/test-tool.h                |   1 +
 t/t0032-reftable-unittest.sh        |  15 ++++
 15 files changed, 553 insertions(+), 6 deletions(-)
 create mode 100644 reftable/basics.c
 create mode 100644 reftable/basics.h
 create mode 100644 reftable/basics_test.c
 create mode 100644 reftable/publicbasics.c
 create mode 100644 reftable/reftable-malloc.h
 create mode 100644 reftable/reftable-tests.h
 create mode 100644 reftable/system.h
 create mode 100644 reftable/test_framework.c
 create mode 100644 reftable/test_framework.h
 create mode 100644 t/helper/test-reftable.c
 create mode 100755 t/t0032-reftable-unittest.sh

Comments

Ævar Arnfjörð Bjarmason April 13, 2021, 8:02 a.m. UTC | #1
On Mon, Apr 12 2021, Han-Wen Nienhuys via GitGitGadget wrote:

> +int strbuf_add_void(void *b, const void *data, size_t sz)
> +{
> +	strbuf_add((struct strbuf *)b, data, sz);
> +	return sz;
> +}

Is that cast needed on your compiler? This compiles without warnings for
me without that.

Also, maybe this is the sort of thing that makes sense to split into
general "APIs needed for reftable" patches. E.g. something like the
below (just the strbuf.h change):

diff --git a/reftable/merged_test.c b/reftable/merged_test.c
index 0c301cecced..e49029eed34 100644
--- a/reftable/merged_test.c
+++ b/reftable/merged_test.c
@@ -43,7 +43,7 @@ static void write_test_table(struct strbuf *buf,
 		}
 	}
 
-	w = reftable_new_writer(&strbuf_add_void, buf, &opts);
+	w = reftable_new_writer(&strbuf_add_write, buf, &opts);
 	reftable_writer_set_limits(w, min, max);
 
 	for (i = 0; i < n; i++) {
@@ -241,7 +241,7 @@ static void test_default_write_opts(void)
 	struct reftable_write_options opts = { 0 };
 	struct strbuf buf = STRBUF_INIT;
 	struct reftable_writer *w =
-		reftable_new_writer(&strbuf_add_void, &buf, &opts);
+		reftable_new_writer(&strbuf_add_write, &buf, &opts);
 
 	struct reftable_ref_record rec = {
 		.refname = "master",
diff --git a/reftable/refname_test.c b/reftable/refname_test.c
index 5e005d6af31..e8ecba1fad9 100644
--- a/reftable/refname_test.c
+++ b/reftable/refname_test.c
@@ -31,7 +31,7 @@ static void test_conflict(void)
 	struct reftable_write_options opts = { 0 };
 	struct strbuf buf = STRBUF_INIT;
 	struct reftable_writer *w =
-		reftable_new_writer(&strbuf_add_void, &buf, &opts);
+		reftable_new_writer(&strbuf_add_write, &buf, &opts);
 	struct reftable_ref_record rec = {
 		.refname = "a/b",
 		.value_type = REFTABLE_REF_SYMREF,
diff --git a/reftable/reftable-writer.h b/reftable/reftable-writer.h
index 9d2f8d60555..bdef4813f4c 100644
--- a/reftable/reftable-writer.h
+++ b/reftable/reftable-writer.h
@@ -83,7 +83,7 @@ struct reftable_stats {
 
 /* reftable_new_writer creates a new writer */
 struct reftable_writer *
-reftable_new_writer(int (*writer_func)(void *, const void *, size_t),
+reftable_new_writer(ssize_t (*writer_func)(void *, const void *, size_t),
 		    void *writer_arg, struct reftable_write_options *opts);
 
 /* Set the range of update indices for the records we will add. When writing a
diff --git a/reftable/reftable_test.c b/reftable/reftable_test.c
index 69dbfb09fff..1685b9a07bc 100644
--- a/reftable/reftable_test.c
+++ b/reftable/reftable_test.c
@@ -52,7 +52,7 @@ static void write_table(char ***names, struct strbuf *buf, int N,
 		.hash_id = hash_id,
 	};
 	struct reftable_writer *w =
-		reftable_new_writer(&strbuf_add_void, buf, &opts);
+		reftable_new_writer(&strbuf_add_write, buf, &opts);
 	struct reftable_ref_record ref = { NULL };
 	int i = 0, n;
 	struct reftable_log_record log = { NULL };
@@ -131,7 +131,7 @@ static void test_log_buffer_size(void)
 						   .message = "commit: 9\n",
 					   } };
 	struct reftable_writer *w =
-		reftable_new_writer(&strbuf_add_void, &buf, &opts);
+		reftable_new_writer(&strbuf_add_write, &buf, &opts);
 
 	/* This tests buffer extension for log compression. Must use a random
 	   hash, to ensure that the compressed part is larger than the original.
@@ -169,7 +169,7 @@ static void test_log_write_read(void)
 	struct reftable_block_source source = { NULL };
 	struct strbuf buf = STRBUF_INIT;
 	struct reftable_writer *w =
-		reftable_new_writer(&strbuf_add_void, &buf, &opts);
+		reftable_new_writer(&strbuf_add_write, &buf, &opts);
 	const struct reftable_stats *stats = NULL;
 	reftable_writer_set_limits(w, 0, N);
 	for (i = 0; i < N; i++) {
@@ -437,7 +437,7 @@ static void test_table_refs_for(int indexed)
 
 	struct strbuf buf = STRBUF_INIT;
 	struct reftable_writer *w =
-		reftable_new_writer(&strbuf_add_void, &buf, &opts);
+		reftable_new_writer(&strbuf_add_write, &buf, &opts);
 
 	struct reftable_iterator it = { NULL };
 	int j;
@@ -534,7 +534,7 @@ static void test_table_empty(void)
 	struct reftable_write_options opts = { 0 };
 	struct strbuf buf = STRBUF_INIT;
 	struct reftable_writer *w =
-		reftable_new_writer(&strbuf_add_void, &buf, &opts);
+		reftable_new_writer(&strbuf_add_write, &buf, &opts);
 	struct reftable_block_source source = { NULL };
 	struct reftable_reader *rd = NULL;
 	struct reftable_ref_record rec = { NULL };
diff --git a/reftable/stack.c b/reftable/stack.c
index 3cdb6e8ed33..2e3fd8db1dd 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -39,7 +39,7 @@ static void stack_filename(struct strbuf *dest, struct reftable_stack *st,
 	strbuf_addstr(dest, name);
 }
 
-static int reftable_fd_write(void *arg, const void *data, size_t sz)
+static ssize_t reftable_fd_write(void *arg, const void *data, size_t sz)
 {
 	int *fdp = (int *)arg;
 	return write(*fdp, data, sz);
diff --git a/reftable/test_framework.c b/reftable/test_framework.c
index a5ff4e2a2d2..e5e39fe8b13 100644
--- a/reftable/test_framework.c
+++ b/reftable/test_framework.c
@@ -15,9 +15,3 @@ void set_test_hash(uint8_t *p, int i)
 {
 	memset(p, (uint8_t)i, hash_size(SHA1_ID));
 }
-
-int strbuf_add_void(void *b, const void *data, size_t sz)
-{
-	strbuf_add((struct strbuf *)b, data, sz);
-	return sz;
-}
diff --git a/reftable/test_framework.h b/reftable/test_framework.h
index 5fdc9519a5a..c04925ea11d 100644
--- a/reftable/test_framework.h
+++ b/reftable/test_framework.h
@@ -46,8 +46,4 @@ license that can be found in the LICENSE file or at
 
 void set_test_hash(uint8_t *p, int i);
 
-/* Like strbuf_add, but suitable for passing to reftable_new_writer
- */
-int strbuf_add_void(void *b, const void *data, size_t sz);
-
 #endif
diff --git a/reftable/writer.c b/reftable/writer.c
index d42ca8afac1..ca3b127f83d 100644
--- a/reftable/writer.c
+++ b/reftable/writer.c
@@ -118,7 +118,7 @@ static void writer_reinit_block_writer(struct reftable_writer *w, uint8_t typ)
 static struct strbuf reftable_empty_strbuf = STRBUF_INIT;
 
 struct reftable_writer *
-reftable_new_writer(int (*writer_func)(void *, const void *, size_t),
+reftable_new_writer(ssize_t (*writer_func)(void *, const void *, size_t),
 		    void *writer_arg, struct reftable_write_options *opts)
 {
 	struct reftable_writer *wp =
diff --git a/reftable/writer.h b/reftable/writer.h
index 4921c249d06..09b88673d97 100644
--- a/reftable/writer.h
+++ b/reftable/writer.h
@@ -15,7 +15,7 @@ license that can be found in the LICENSE file or at
 #include "reftable-writer.h"
 
 struct reftable_writer {
-	int (*write)(void *, const void *, size_t);
+	ssize_t (*write)(void *, const void *, size_t);
 	void *write_arg;
 	int pending_padding;
 	struct strbuf last_key;
diff --git a/strbuf.h b/strbuf.h
index 223ee2094af..e3ae5b8a9d3 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -290,6 +290,18 @@ void strbuf_add_commented_lines(struct strbuf *out,
  */
 void strbuf_add(struct strbuf *sb, const void *data, size_t len);
 
+/**
+ * Like strbuf_add() but emulates write() for APIs that need
+ * it. Returns the passed-in `len` as-is. The `void *` is really a
+ * `struct strbuf *`.
+ */
+static inline ssize_t strbuf_add_write(void *sb, const void *data,
+				       size_t len)
+{
+	strbuf_add(sb, data, len);
+	return len;
+}
+
 /**
  * Add a NUL-terminated string to the buffer.
  *
Han-Wen Nienhuys April 13, 2021, 10:58 a.m. UTC | #2
On Tue, Apr 13, 2021 at 10:02 AM Ævar Arnfjörð Bjarmason
<avarab@gmail.com> wrote:
>
>
> On Mon, Apr 12 2021, Han-Wen Nienhuys via GitGitGadget wrote:
>
> > +int strbuf_add_void(void *b, const void *data, size_t sz)
> > +{
> > +     strbuf_add((struct strbuf *)b, data, sz);
> > +     return sz;
> > +}
>
> Is that cast needed on your compiler? This compiles without warnings for
> me without that.

No! thanks.

> Also, maybe this is the sort of thing that makes sense to split into
> general "APIs needed for reftable" patches. E.g. something like the
> below (just the strbuf.h change):

SGTM.
Ævar Arnfjörð Bjarmason April 13, 2021, 12:56 p.m. UTC | #3
On Tue, Apr 13 2021, Han-Wen Nienhuys wrote:

> On Tue, Apr 13, 2021 at 10:02 AM Ævar Arnfjörð Bjarmason
> <avarab@gmail.com> wrote:
>>
>>
>> On Mon, Apr 12 2021, Han-Wen Nienhuys via GitGitGadget wrote:
>>
>> > +int strbuf_add_void(void *b, const void *data, size_t sz)
>> > +{
>> > +     strbuf_add((struct strbuf *)b, data, sz);
>> > +     return sz;
>> > +}
>>
>> Is that cast needed on your compiler? This compiles without warnings for
>> me without that.
>
> No! thanks.

Some more, a combination of redundant casting and casting early to
another variable being easier to read. The latter being a matter of
style/opinion. Some more like that in the code:

    git grep -W '\(\(struct .*\*'

diff:

diff --git a/reftable/blocksource.c b/reftable/blocksource.c
index 25d4d95b52b..91eb0b495f5 100644
--- a/reftable/blocksource.c
+++ b/reftable/blocksource.c
@@ -26,7 +26,7 @@ static void strbuf_close(void *b)
 static int strbuf_read_block(void *v, struct reftable_block *dest, uint64_t off,
 			     uint32_t size)
 {
-	struct strbuf *b = (struct strbuf *)v;
+	struct strbuf *b = v;
 	assert(off + size <= b->len);
 	dest->data = reftable_calloc(size);
 	memcpy(dest->data, b->buf + off, size);
@@ -36,7 +36,8 @@ static int strbuf_read_block(void *v, struct reftable_block *dest, uint64_t off,
 
 static uint64_t strbuf_size(void *b)
 {
-	return ((struct strbuf *)b)->len;
+	struct strbuf *buf = b;
+	return buf->len;
 }
 
 static struct reftable_block_source_vtable strbuf_vtable = {
@@ -91,10 +92,11 @@ static void file_return_block(void *b, struct reftable_block *dest)
 
 static void file_close(void *b)
 {
-	int fd = ((struct file_block_source *)b)->fd;
+	struct file_block_source *bs =  b;
+	int fd = bs->fd;
 	if (fd > 0) {
 		close(fd);
-		((struct file_block_source *)b)->fd = 0;
+		bs->fd = 0;
 	}
 
 	reftable_free(b);
@@ -103,7 +105,7 @@ static void file_close(void *b)
 static int file_read_block(void *v, struct reftable_block *dest, uint64_t off,
 			   uint32_t size)
 {
-	struct file_block_source *b = (struct file_block_source *)v;
+	struct file_block_source *b = v;
 	assert(off + size <= b->size);
 	dest->data = reftable_malloc(size);
 	if (pread(b->fd, dest->data, size, off) != size)
Ævar Arnfjörð Bjarmason April 13, 2021, 1:14 p.m. UTC | #4
On Mon, Apr 12 2021, Han-Wen Nienhuys via GitGitGadget wrote:

> diff --git a/reftable/system.h b/reftable/system.h
> new file mode 100644
> index 000000000000..07277ca06273
> --- /dev/null
> +++ b/reftable/system.h
> @@ -0,0 +1,32 @@
> +/*
> +Copyright 2020 Google LLC
> +
> +Use of this source code is governed by a BSD-style
> +license that can be found in the LICENSE file or at
> +https://developers.google.com/open-source/licenses/bsd
> +*/
> +
> +#ifndef SYSTEM_H
> +#define SYSTEM_H
> +
> +#include "git-compat-util.h"
> +#include "strbuf.h"
> +
> +#include <zlib.h>
> +
> +struct strbuf;
> +/* In git, this is declared in dir.h */
> +int remove_dir_recursively(struct strbuf *path, int flags);
> +
> +#define SHA1_ID 0x73686131
> +#define SHA256_ID 0x73323536
> +#define SHA1_SIZE 20
> +#define SHA256_SIZE 32
> +
> +/* This is uncompress2, which is only available in zlib as of 2017.
> + */
> +int uncompress_return_consumed(Bytef *dest, uLongf *destLen,
> +			       const Bytef *source, uLong *sourceLen);
> +int hash_size(uint32_t id);

Related to the comment in
http://lore.kernel.org/git/87a6q2egvy.fsf@evledraar.gmail.com

I'd think that rather than duplicating magic constants & one thing in
dir.h we'd be better off having some leading patches splitting off the
relevant parts of object-file.c & dir.h, maybe THAT_NAME_minimal.h?

Or: why not simply include dir.h and object.h etc? The compiler/linker
will discard the unused functions, and if the worry is overuse of
git.git functions creeping in I'd think that would be better done via
some test/CI job that checks what objects were used.
Han-Wen Nienhuys April 15, 2021, 3 p.m. UTC | #5
On Tue, Apr 13, 2021 at 3:14 PM Ævar Arnfjörð Bjarmason
<avarab@gmail.com> wrote:
> I'd think that rather than duplicating magic constants & one thing in
> dir.h we'd be better off having some leading patches splitting off the
> relevant parts of object-file.c & dir.h, maybe THAT_NAME_minimal.h?
>
> Or: why not simply include dir.h and object.h etc? The compiler/linker
> will discard the unused functions, and if the worry is overuse of
> git.git functions creeping in I'd think that would be better done via
> some test/CI job that checks what objects were used.

It was useful to see how much (or how little) of Git I needed to make
it work, but it has served its purpose, so I followed your suggestion
to use dir.h and object.h.
diff mbox series

Patch

diff --git a/Makefile b/Makefile
index ffc2ddfd930d..1a6528a40b51 100644
--- a/Makefile
+++ b/Makefile
@@ -735,6 +735,7 @@  TEST_BUILTINS_OBJS += test-read-cache.o
 TEST_BUILTINS_OBJS += test-read-graph.o
 TEST_BUILTINS_OBJS += test-read-midx.o
 TEST_BUILTINS_OBJS += test-ref-store.o
+TEST_BUILTINS_OBJS += test-reftable.o
 TEST_BUILTINS_OBJS += test-regex.o
 TEST_BUILTINS_OBJS += test-repository.o
 TEST_BUILTINS_OBJS += test-revision-walking.o
@@ -812,6 +813,8 @@  TEST_SHELL_PATH = $(SHELL_PATH)
 
 LIB_FILE = libgit.a
 XDIFF_LIB = xdiff/lib.a
+REFTABLE_LIB = reftable/libreftable.a
+REFTABLE_TEST_LIB = reftable/libreftable_test.a
 
 GENERATED_H += command-list.h
 GENERATED_H += config-list.h
@@ -1180,7 +1183,7 @@  THIRD_PARTY_SOURCES += compat/regex/%
 THIRD_PARTY_SOURCES += sha1collisiondetection/%
 THIRD_PARTY_SOURCES += sha1dc/%
 
-GITLIBS = common-main.o $(LIB_FILE) $(XDIFF_LIB)
+GITLIBS = common-main.o $(LIB_FILE) $(XDIFF_LIB) $(REFTABLE_LIB)
 EXTLIBS =
 
 GIT_USER_AGENT = git/$(GIT_VERSION)
@@ -2397,7 +2400,15 @@  XDIFF_OBJS += xdiff/xutils.o
 .PHONY: xdiff-objs
 xdiff-objs: $(XDIFF_OBJS)
 
+REFTABLE_OBJS += reftable/basics.o
+REFTABLE_OBJS += reftable/error.o
+REFTABLE_OBJS += reftable/publicbasics.o
+
+REFTABLE_TEST_OBJS += reftable/test_framework.o
+REFTABLE_TEST_OBJS += reftable/basics_test.o
+
 TEST_OBJS := $(patsubst %$X,%.o,$(TEST_PROGRAMS)) $(patsubst %,t/helper/%,$(TEST_BUILTINS_OBJS))
+
 .PHONY: test-objs
 test-objs: $(TEST_OBJS)
 
@@ -2413,6 +2424,8 @@  OBJECTS += $(PROGRAM_OBJS)
 OBJECTS += $(TEST_OBJS)
 OBJECTS += $(XDIFF_OBJS)
 OBJECTS += $(FUZZ_OBJS)
+OBJECTS += $(REFTABLE_OBJS) $(REFTABLE_TEST_OBJS)
+
 ifndef NO_CURL
 	OBJECTS += http.o http-walker.o remote-curl.o
 endif
@@ -2564,6 +2577,12 @@  $(LIB_FILE): $(LIB_OBJS)
 $(XDIFF_LIB): $(XDIFF_OBJS)
 	$(QUIET_AR)$(RM) $@ && $(AR) $(ARFLAGS) $@ $^
 
+$(REFTABLE_LIB): $(REFTABLE_OBJS)
+	$(QUIET_AR)$(RM) $@ && $(AR) $(ARFLAGS) $@ $^
+
+$(REFTABLE_TEST_LIB): $(REFTABLE_TEST_OBJS)
+	$(QUIET_AR)$(RM) $@ && $(AR) $(ARFLAGS) $@ $^
+
 export DEFAULT_EDITOR DEFAULT_PAGER
 
 Documentation/GIT-EXCLUDED-PROGRAMS: FORCE
@@ -2844,7 +2863,7 @@  perf: all
 
 t/helper/test-tool$X: $(patsubst %,t/helper/%,$(TEST_BUILTINS_OBJS))
 
-t/helper/test-%$X: t/helper/test-%.o GIT-LDFLAGS $(GITLIBS)
+t/helper/test-%$X: t/helper/test-%.o GIT-LDFLAGS $(GITLIBS) $(REFTABLE_TEST_LIB)
 	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(filter %.a,$^) $(LIBS)
 
 check-sha1:: t/helper/test-tool$X
@@ -3174,7 +3193,7 @@  cocciclean:
 clean: profile-clean coverage-clean cocciclean
 	$(RM) *.res
 	$(RM) $(OBJECTS)
-	$(RM) $(LIB_FILE) $(XDIFF_LIB)
+	$(RM) $(LIB_FILE) $(XDIFF_LIB) $(REFTABLE_LIB) $(REFTABLE_TEST_LIB)
 	$(RM) $(ALL_PROGRAMS) $(SCRIPT_LIB) $(BUILT_INS) git$X
 	$(RM) $(TEST_PROGRAMS)
 	$(RM) $(FUZZ_PROGRAMS)
diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt
index 75ed198a6a36..f41d3dab9015 100644
--- a/contrib/buildsystems/CMakeLists.txt
+++ b/contrib/buildsystems/CMakeLists.txt
@@ -611,6 +611,12 @@  parse_makefile_for_sources(libxdiff_SOURCES "XDIFF_OBJS")
 list(TRANSFORM libxdiff_SOURCES PREPEND "${CMAKE_SOURCE_DIR}/")
 add_library(xdiff STATIC ${libxdiff_SOURCES})
 
+#reftable
+parse_makefile_for_sources(reftable_SOURCES "REFTABLE_OBJS")
+
+list(TRANSFORM reftable_SOURCES PREPEND "${CMAKE_SOURCE_DIR}/")
+add_library(reftable STATIC ${reftable_SOURCES})
+
 if(WIN32)
 	if(NOT MSVC)#use windres when compiling with gcc and clang
 		add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/git.res
@@ -633,7 +639,7 @@  endif()
 #link all required libraries to common-main
 add_library(common-main OBJECT ${CMAKE_SOURCE_DIR}/common-main.c)
 
-target_link_libraries(common-main libgit xdiff ${ZLIB_LIBRARIES})
+target_link_libraries(common-main libgit xdiff reftable ${ZLIB_LIBRARIES})
 if(Intl_FOUND)
 	target_link_libraries(common-main ${Intl_LIBRARIES})
 endif()
@@ -873,11 +879,15 @@  if(BUILD_TESTING)
 add_executable(test-fake-ssh ${CMAKE_SOURCE_DIR}/t/helper/test-fake-ssh.c)
 target_link_libraries(test-fake-ssh common-main)
 
+#reftable-tests
+parse_makefile_for_sources(test-reftable_SOURCES "REFTABLE_TEST_OBJS")
+list(TRANSFORM test-reftable_SOURCES PREPEND "${CMAKE_SOURCE_DIR}/")
+
 #test-tool
 parse_makefile_for_sources(test-tool_SOURCES "TEST_BUILTINS_OBJS")
 
 list(TRANSFORM test-tool_SOURCES PREPEND "${CMAKE_SOURCE_DIR}/t/helper/")
-add_executable(test-tool ${CMAKE_SOURCE_DIR}/t/helper/test-tool.c ${test-tool_SOURCES})
+add_executable(test-tool ${CMAKE_SOURCE_DIR}/t/helper/test-tool.c ${test-tool_SOURCES} ${test-reftable_SOURCES})
 target_link_libraries(test-tool common-main)
 
 set_target_properties(test-fake-ssh test-tool
diff --git a/reftable/basics.c b/reftable/basics.c
new file mode 100644
index 000000000000..abd027b98882
--- /dev/null
+++ b/reftable/basics.c
@@ -0,0 +1,128 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+*/
+
+#include "basics.h"
+
+void put_be24(uint8_t *out, uint32_t i)
+{
+	out[0] = (uint8_t)((i >> 16) & 0xff);
+	out[1] = (uint8_t)((i >> 8) & 0xff);
+	out[2] = (uint8_t)(i & 0xff);
+}
+
+uint32_t get_be24(uint8_t *in)
+{
+	return (uint32_t)(in[0]) << 16 | (uint32_t)(in[1]) << 8 |
+	       (uint32_t)(in[2]);
+}
+
+void put_be16(uint8_t *out, uint16_t i)
+{
+	out[0] = (uint8_t)((i >> 8) & 0xff);
+	out[1] = (uint8_t)(i & 0xff);
+}
+
+int binsearch(size_t sz, int (*f)(size_t k, void *args), void *args)
+{
+	size_t lo = 0;
+	size_t hi = sz;
+
+	/* Invariants:
+	 *
+	 *  (hi == sz) || f(hi) == true
+	 *  (lo == 0 && f(0) == true) || fi(lo) == false
+	 */
+	while (hi - lo > 1) {
+		size_t mid = lo + (hi - lo) / 2;
+
+		if (f(mid, args))
+			hi = mid;
+		else
+			lo = mid;
+	}
+
+	if (lo)
+		return hi;
+
+	return f(0, args) ? 0 : 1;
+}
+
+void free_names(char **a)
+{
+	char **p;
+	if (a == NULL) {
+		return;
+	}
+	for (p = a; *p; p++) {
+		reftable_free(*p);
+	}
+	reftable_free(a);
+}
+
+int names_length(char **names)
+{
+	char **p = names;
+	for (; *p; p++) {
+		/* empty */
+	}
+	return p - names;
+}
+
+void parse_names(char *buf, int size, char ***namesp)
+{
+	char **names = NULL;
+	size_t names_cap = 0;
+	size_t names_len = 0;
+
+	char *p = buf;
+	char *end = buf + size;
+	while (p < end) {
+		char *next = strchr(p, '\n');
+		if (next && next < end) {
+			*next = 0;
+		} else {
+			next = end;
+		}
+		if (p < next) {
+			if (names_len == names_cap) {
+				names_cap = 2 * names_cap + 1;
+				names = reftable_realloc(
+					names, names_cap * sizeof(*names));
+			}
+			names[names_len++] = xstrdup(p);
+		}
+		p = next + 1;
+	}
+
+	names = reftable_realloc(names, (names_len + 1) * sizeof(*names));
+	names[names_len] = NULL;
+	*namesp = names;
+}
+
+int names_equal(char **a, char **b)
+{
+	int i = 0;
+	for (; a[i] && b[i]; i++) {
+		if (strcmp(a[i], b[i])) {
+			return 0;
+		}
+	}
+
+	return a[i] == b[i];
+}
+
+int common_prefix_size(struct strbuf *a, struct strbuf *b)
+{
+	int p = 0;
+	for (; p < a->len && p < b->len; p++) {
+		if (a->buf[p] != b->buf[p])
+			break;
+	}
+
+	return p;
+}
diff --git a/reftable/basics.h b/reftable/basics.h
new file mode 100644
index 000000000000..096b36862b9f
--- /dev/null
+++ b/reftable/basics.h
@@ -0,0 +1,60 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+*/
+
+#ifndef BASICS_H
+#define BASICS_H
+
+/*
+ * miscellaneous utilities that are not provided by Git.
+ */
+
+#include "system.h"
+
+/* Bigendian en/decoding of integers */
+
+void put_be24(uint8_t *out, uint32_t i);
+uint32_t get_be24(uint8_t *in);
+void put_be16(uint8_t *out, uint16_t i);
+
+/*
+ * find smallest index i in [0, sz) at which f(i) is true, assuming
+ * that f is ascending. Return sz if f(i) is false for all indices.
+ *
+ * Contrary to bsearch(3), this returns something useful if the argument is not
+ * found.
+ */
+int binsearch(size_t sz, int (*f)(size_t k, void *args), void *args);
+
+/*
+ * Frees a NULL terminated array of malloced strings. The array itself is also
+ * freed.
+ */
+void free_names(char **a);
+
+/* parse a newline separated list of names. `size` is the length of the buffer,
+ * without terminating '\0'. Empty names are discarded. */
+void parse_names(char *buf, int size, char ***namesp);
+
+/* compares two NULL-terminated arrays of strings. */
+int names_equal(char **a, char **b);
+
+/* returns the array size of a NULL-terminated array of strings. */
+int names_length(char **names);
+
+/* Allocation routines; they invoke the functions set through
+ * reftable_set_alloc() */
+void *reftable_malloc(size_t sz);
+void *reftable_realloc(void *p, size_t sz);
+void reftable_free(void *p);
+void *reftable_calloc(size_t sz);
+
+/* Find the longest shared prefix size of `a` and `b` */
+struct strbuf;
+int common_prefix_size(struct strbuf *a, struct strbuf *b);
+
+#endif
diff --git a/reftable/basics_test.c b/reftable/basics_test.c
new file mode 100644
index 000000000000..6d52f0f9d5aa
--- /dev/null
+++ b/reftable/basics_test.c
@@ -0,0 +1,98 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+*/
+
+#include "system.h"
+
+#include "basics.h"
+#include "test_framework.h"
+#include "reftable-tests.h"
+
+struct binsearch_args {
+	int key;
+	int *arr;
+};
+
+static int binsearch_func(size_t i, void *void_args)
+{
+	struct binsearch_args *args = (struct binsearch_args *)void_args;
+
+	return args->key < args->arr[i];
+}
+
+static void test_binsearch(void)
+{
+	int arr[] = { 2, 4, 6, 8, 10 };
+	size_t sz = ARRAY_SIZE(arr);
+	struct binsearch_args args = {
+		.arr = arr,
+	};
+
+	int i = 0;
+	for (i = 1; i < 11; i++) {
+		int res;
+		args.key = i;
+		res = binsearch(sz, &binsearch_func, &args);
+
+		if (res < sz) {
+			EXPECT(args.key < arr[res]);
+			if (res > 0) {
+				EXPECT(args.key >= arr[res - 1]);
+			}
+		} else {
+			EXPECT(args.key == 10 || args.key == 11);
+		}
+	}
+}
+
+static void test_names_length(void)
+{
+	char *a[] = { "a", "b", NULL };
+	EXPECT(names_length(a) == 2);
+}
+
+static void test_parse_names_normal(void)
+{
+	char in[] = "a\nb\n";
+	char **out = NULL;
+	parse_names(in, strlen(in), &out);
+	EXPECT(!strcmp(out[0], "a"));
+	EXPECT(!strcmp(out[1], "b"));
+	EXPECT(out[2] == NULL);
+	free_names(out);
+}
+
+static void test_parse_names_drop_empty(void)
+{
+	char in[] = "a\n\n";
+	char **out = NULL;
+	parse_names(in, strlen(in), &out);
+	EXPECT(!strcmp(out[0], "a"));
+	EXPECT(out[1] == NULL);
+	free_names(out);
+}
+
+static void test_common_prefix(void)
+{
+	struct strbuf s1 = STRBUF_INIT;
+	struct strbuf s2 = STRBUF_INIT;
+	strbuf_addstr(&s1, "abcdef");
+	strbuf_addstr(&s2, "abc");
+	EXPECT(common_prefix_size(&s1, &s2) == 3);
+	strbuf_release(&s1);
+	strbuf_release(&s2);
+}
+
+int basics_test_main(int argc, const char *argv[])
+{
+	RUN_TEST(test_common_prefix);
+	RUN_TEST(test_parse_names_normal);
+	RUN_TEST(test_parse_names_drop_empty);
+	RUN_TEST(test_binsearch);
+	RUN_TEST(test_names_length);
+	return 0;
+}
diff --git a/reftable/publicbasics.c b/reftable/publicbasics.c
new file mode 100644
index 000000000000..25639f61af61
--- /dev/null
+++ b/reftable/publicbasics.c
@@ -0,0 +1,58 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+*/
+
+#include "reftable-malloc.h"
+
+#include "basics.h"
+#include "system.h"
+
+static void *(*reftable_malloc_ptr)(size_t sz) = &malloc;
+static void *(*reftable_realloc_ptr)(void *, size_t) = &realloc;
+static void (*reftable_free_ptr)(void *) = &free;
+
+void *reftable_malloc(size_t sz)
+{
+	return (*reftable_malloc_ptr)(sz);
+}
+
+void *reftable_realloc(void *p, size_t sz)
+{
+	return (*reftable_realloc_ptr)(p, sz);
+}
+
+void reftable_free(void *p)
+{
+	reftable_free_ptr(p);
+}
+
+void *reftable_calloc(size_t sz)
+{
+	void *p = reftable_malloc(sz);
+	memset(p, 0, sz);
+	return p;
+}
+
+void reftable_set_alloc(void *(*malloc)(size_t),
+			void *(*realloc)(void *, size_t), void (*free)(void *))
+{
+	reftable_malloc_ptr = malloc;
+	reftable_realloc_ptr = realloc;
+	reftable_free_ptr = free;
+}
+
+int hash_size(uint32_t id)
+{
+	switch (id) {
+	case 0:
+	case SHA1_ID:
+		return SHA1_SIZE;
+	case SHA256_ID:
+		return SHA256_SIZE;
+	}
+	abort();
+}
diff --git a/reftable/reftable-malloc.h b/reftable/reftable-malloc.h
new file mode 100644
index 000000000000..5f2185f1f343
--- /dev/null
+++ b/reftable/reftable-malloc.h
@@ -0,0 +1,18 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+*/
+
+#ifndef REFTABLE_H
+#define REFTABLE_H
+
+#include <stddef.h>
+
+/* Overrides the functions to use for memory management. */
+void reftable_set_alloc(void *(*malloc)(size_t),
+			void *(*realloc)(void *, size_t), void (*free)(void *));
+
+#endif
diff --git a/reftable/reftable-tests.h b/reftable/reftable-tests.h
new file mode 100644
index 000000000000..5e7698ae654e
--- /dev/null
+++ b/reftable/reftable-tests.h
@@ -0,0 +1,22 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+*/
+
+#ifndef REFTABLE_TESTS_H
+#define REFTABLE_TESTS_H
+
+int basics_test_main(int argc, const char **argv);
+int block_test_main(int argc, const char **argv);
+int merged_test_main(int argc, const char **argv);
+int record_test_main(int argc, const char **argv);
+int refname_test_main(int argc, const char **argv);
+int reftable_test_main(int argc, const char **argv);
+int stack_test_main(int argc, const char **argv);
+int tree_test_main(int argc, const char **argv);
+int reftable_dump_main(int argc, char *const *argv);
+
+#endif
diff --git a/reftable/system.h b/reftable/system.h
new file mode 100644
index 000000000000..07277ca06273
--- /dev/null
+++ b/reftable/system.h
@@ -0,0 +1,32 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+*/
+
+#ifndef SYSTEM_H
+#define SYSTEM_H
+
+#include "git-compat-util.h"
+#include "strbuf.h"
+
+#include <zlib.h>
+
+struct strbuf;
+/* In git, this is declared in dir.h */
+int remove_dir_recursively(struct strbuf *path, int flags);
+
+#define SHA1_ID 0x73686131
+#define SHA256_ID 0x73323536
+#define SHA1_SIZE 20
+#define SHA256_SIZE 32
+
+/* This is uncompress2, which is only available in zlib as of 2017.
+ */
+int uncompress_return_consumed(Bytef *dest, uLongf *destLen,
+			       const Bytef *source, uLong *sourceLen);
+int hash_size(uint32_t id);
+
+#endif
diff --git a/reftable/test_framework.c b/reftable/test_framework.c
new file mode 100644
index 000000000000..a5ff4e2a2d2f
--- /dev/null
+++ b/reftable/test_framework.c
@@ -0,0 +1,23 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+*/
+
+#include "system.h"
+#include "test_framework.h"
+
+#include "basics.h"
+
+void set_test_hash(uint8_t *p, int i)
+{
+	memset(p, (uint8_t)i, hash_size(SHA1_ID));
+}
+
+int strbuf_add_void(void *b, const void *data, size_t sz)
+{
+	strbuf_add((struct strbuf *)b, data, sz);
+	return sz;
+}
diff --git a/reftable/test_framework.h b/reftable/test_framework.h
new file mode 100644
index 000000000000..5fdc9519a5a5
--- /dev/null
+++ b/reftable/test_framework.h
@@ -0,0 +1,53 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+*/
+
+#ifndef TEST_FRAMEWORK_H
+#define TEST_FRAMEWORK_H
+
+#include "system.h"
+#include "reftable-error.h"
+
+#define EXPECT_ERR(c)                                                  \
+	if (c != 0) {                                                  \
+		fflush(stderr);                                        \
+		fflush(stdout);                                        \
+		fprintf(stderr, "%s: %d: error == %d (%s), want 0\n",  \
+			__FILE__, __LINE__, c, reftable_error_str(c)); \
+		abort();                                               \
+	}
+
+#define EXPECT_STREQ(a, b)                                               \
+	if (strcmp(a, b)) {                                              \
+		fflush(stderr);                                          \
+		fflush(stdout);                                          \
+		fprintf(stderr, "%s:%d: %s (%s) != %s (%s)\n", __FILE__, \
+			__LINE__, #a, a, #b, b);                         \
+		abort();                                                 \
+	}
+
+#define EXPECT(c)                                                          \
+	if (!(c)) {                                                        \
+		fflush(stderr);                                            \
+		fflush(stdout);                                            \
+		fprintf(stderr, "%s: %d: failed assertion %s\n", __FILE__, \
+			__LINE__, #c);                                     \
+		abort();                                                   \
+	}
+
+#define RUN_TEST(f)                          \
+	fprintf(stderr, "running %s\n", #f); \
+	fflush(stderr);                      \
+	f();
+
+void set_test_hash(uint8_t *p, int i);
+
+/* Like strbuf_add, but suitable for passing to reftable_new_writer
+ */
+int strbuf_add_void(void *b, const void *data, size_t sz);
+
+#endif
diff --git a/t/helper/test-reftable.c b/t/helper/test-reftable.c
new file mode 100644
index 000000000000..3b58e423e7b1
--- /dev/null
+++ b/t/helper/test-reftable.c
@@ -0,0 +1,9 @@ 
+#include "reftable/reftable-tests.h"
+#include "test-tool.h"
+
+int cmd__reftable(int argc, const char **argv)
+{
+	basics_test_main(argc, argv);
+
+	return 0;
+}
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index 287aa6002307..814d2606f8ca 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -49,13 +49,14 @@  static struct test_cmd cmds[] = {
 	{ "pcre2-config", cmd__pcre2_config },
 	{ "pkt-line", cmd__pkt_line },
 	{ "prio-queue", cmd__prio_queue },
-	{ "proc-receive", cmd__proc_receive},
+	{ "proc-receive", cmd__proc_receive },
 	{ "progress", cmd__progress },
 	{ "reach", cmd__reach },
 	{ "read-cache", cmd__read_cache },
 	{ "read-graph", cmd__read_graph },
 	{ "read-midx", cmd__read_midx },
 	{ "ref-store", cmd__ref_store },
+	{ "reftable", cmd__reftable },
 	{ "regex", cmd__regex },
 	{ "repository", cmd__repository },
 	{ "revision-walking", cmd__revision_walking },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index 9ea4b31011dd..789c26f302dd 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -45,6 +45,7 @@  int cmd__read_cache(int argc, const char **argv);
 int cmd__read_graph(int argc, const char **argv);
 int cmd__read_midx(int argc, const char **argv);
 int cmd__ref_store(int argc, const char **argv);
+int cmd__reftable(int argc, const char **argv);
 int cmd__regex(int argc, const char **argv);
 int cmd__repository(int argc, const char **argv);
 int cmd__revision_walking(int argc, const char **argv);
diff --git a/t/t0032-reftable-unittest.sh b/t/t0032-reftable-unittest.sh
new file mode 100755
index 000000000000..0ed14971a580
--- /dev/null
+++ b/t/t0032-reftable-unittest.sh
@@ -0,0 +1,15 @@ 
+#!/bin/sh
+#
+# Copyright (c) 2020 Google LLC
+#
+
+test_description='reftable unittests'
+
+. ./test-lib.sh
+
+test_expect_success 'unittests' '
+	TMPDIR=$(pwd) && export TMPDIR &&
+	test-tool reftable
+'
+
+test_done