diff mbox series

[v2,05/13] reftable: utility functions

Message ID 4190da597e65bce072fa3c37c9410a56def4b489.1601568663.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series reftable library | expand

Commit Message

Johannes Schindelin via GitGitGadget Oct. 1, 2020, 4:10 p.m. UTC
From: Han-Wen Nienhuys <hanwen@google.com>

This commit provides basic utility classes for the reftable library.

Since the reftable library must compile standalone, there may be some overlap
with git-core utility functions.

Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
---
 Makefile                  |  26 ++++++-
 reftable/basics.c         | 131 +++++++++++++++++++++++++++++++++
 reftable/basics.h         |  48 +++++++++++++
 reftable/blocksource.c    | 148 ++++++++++++++++++++++++++++++++++++++
 reftable/blocksource.h    |  22 ++++++
 reftable/compat.c         | 110 ++++++++++++++++++++++++++++
 reftable/compat.h         |  48 +++++++++++++
 reftable/publicbasics.c   | 100 ++++++++++++++++++++++++++
 reftable/reftable-tests.h |  22 ++++++
 reftable/strbuf.c         | 142 ++++++++++++++++++++++++++++++++++++
 reftable/strbuf.h         |  80 +++++++++++++++++++++
 reftable/strbuf_test.c    |  37 ++++++++++
 reftable/system.h         |  51 +++++++++++++
 t/helper/test-reftable.c  |   8 +++
 t/helper/test-tool.c      |   1 +
 t/helper/test-tool.h      |   1 +
 16 files changed, 972 insertions(+), 3 deletions(-)
 create mode 100644 reftable/basics.c
 create mode 100644 reftable/basics.h
 create mode 100644 reftable/blocksource.c
 create mode 100644 reftable/blocksource.h
 create mode 100644 reftable/compat.c
 create mode 100644 reftable/compat.h
 create mode 100644 reftable/publicbasics.c
 create mode 100644 reftable/reftable-tests.h
 create mode 100644 reftable/strbuf.c
 create mode 100644 reftable/strbuf.h
 create mode 100644 reftable/strbuf_test.c
 create mode 100644 reftable/system.h
 create mode 100644 t/helper/test-reftable.c

Comments

Jonathan Nieder Oct. 2, 2020, 4:12 a.m. UTC | #1
Han-Wen Nienhuys wrote:

> --- /dev/null
> +++ b/reftable/basics.c
> @@ -0,0 +1,131 @@
> +/*
> +Copyright 2020 Google LLC
> +
> +Use of this source code is governed by a BSD-style
> +license that can be found in the LICENSE file or at
> +https://developers.google.com/open-source/licenses/bsd
> +*/
> +
> +#include "basics.h"

Git's source files start with #include-ing "git-compat-util.h"; how do
we approach that here?

E.g. would we pass -include on the command line, or could we #include
some kind of compat-util.h that #include-s "git-compat-util.h" in Git
and does the appropriate steps for enabling feature test macros and
including system headers when used outside?

[...]
> +int binsearch(size_t sz, int (*f)(size_t k, void *args), void *args)

How does this compare to stdlib's bsearch?

[...]
> +void free_names(char **a)
> +{
> +	char **p = a;
> +	if (p == NULL) {
> +		return;
> +	}
> +	while (*p) {
> +		reftable_free(*p);
> +		p++;
> +	}
> +	reftable_free(a);
> +}

Are there other callers that need custom free?

[...]
> +int names_length(char **names)
> +{
> +	int len = 0;
> +	char **p = names;
> +	while (*p) {
> +		p++;
> +		len++;
> +	}
> +	return len;
> +}

The rest are probably easier to evaluate when I look at the callers, and
it's time for me to go to sleep now.  I'll try to find some time soon to
pick the review back up.

Thanks for a thoughtfully put together library so far.

Jonathan
Johannes Schindelin Oct. 2, 2020, 2:01 p.m. UTC | #2
Hi Han-Wen,

On Thu, 1 Oct 2020, Han-Wen Nienhuys via GitGitGadget wrote:

> From: Han-Wen Nienhuys <hanwen@google.com>
>
> This commit provides basic utility classes for the reftable library.
>
> Since the reftable library must compile standalone, there may be some overlap
> with git-core utility functions.

My position on this idea to duplicate functionality in order to somehow
pretend that the reftable code is independent of Git's source code has not
changed.

Be that as it may, the CI build failures impacted my notifications'
signal/noise ratio too much, so here goes (Junio, I would be delighted if
you could apply this on top of your branch):

-- snipsnap --
From 1698c3450b5e47927c2e8fe35cad2634963bad89 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Wed, 30 Sep 2020 16:53:53 +0200
Subject: [PATCH] fixup??? reftable: utility functions

Let's not forget our CMake configuration.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 contrib/buildsystems/CMakeLists.txt | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt
index 1bd53c4ad51..284bf4402d7 100644
--- a/contrib/buildsystems/CMakeLists.txt
+++ b/contrib/buildsystems/CMakeLists.txt
@@ -591,6 +591,12 @@ parse_makefile_for_sources(libxdiff_SOURCES "XDIFF_OBJS")
 list(TRANSFORM libxdiff_SOURCES PREPEND "${CMAKE_SOURCE_DIR}/")
 add_library(xdiff STATIC ${libxdiff_SOURCES})

+#reftable
+parse_makefile_for_sources(reftable_SOURCES "REFTABLE_OBJS")
+
+list(TRANSFORM reftable_SOURCES PREPEND "${CMAKE_SOURCE_DIR}/")
+add_library(reftable STATIC ${reftable_SOURCES})
+
 if(WIN32)
 	if(NOT MSVC)#use windres when compiling with gcc and clang
 		add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/git.res
@@ -613,7 +619,7 @@ endif()
 #link all required libraries to common-main
 add_library(common-main OBJECT ${CMAKE_SOURCE_DIR}/common-main.c)

-target_link_libraries(common-main libgit xdiff ${ZLIB_LIBRARIES})
+target_link_libraries(common-main libgit xdiff reftable ${ZLIB_LIBRARIES})
 if(Intl_FOUND)
 	target_link_libraries(common-main ${Intl_LIBRARIES})
 endif()
@@ -848,11 +854,15 @@ if(BUILD_TESTING)
 add_executable(test-fake-ssh ${CMAKE_SOURCE_DIR}/t/helper/test-fake-ssh.c)
 target_link_libraries(test-fake-ssh common-main)

+#reftable-tests
+parse_makefile_for_sources(test-reftable_SOURCES "REFTABLE_TEST_OBJS")
+list(TRANSFORM test-reftable_SOURCES PREPEND "${CMAKE_SOURCE_DIR}/")
+
 #test-tool
 parse_makefile_for_sources(test-tool_SOURCES "TEST_BUILTINS_OBJS")

 list(TRANSFORM test-tool_SOURCES PREPEND "${CMAKE_SOURCE_DIR}/t/helper/")
-add_executable(test-tool ${CMAKE_SOURCE_DIR}/t/helper/test-tool.c ${test-tool_SOURCES})
+add_executable(test-tool ${CMAKE_SOURCE_DIR}/t/helper/test-tool.c ${test-tool_SOURCES} ${test-reftable_SOURCES})
 target_link_libraries(test-tool common-main)

 set_target_properties(test-fake-ssh test-tool
--
2.28.0.windows.1.18.g5300e52e185
Junio C Hamano Oct. 2, 2020, 8:47 p.m. UTC | #3
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

>> This commit provides basic utility classes for the reftable library.
>>
>> Since the reftable library must compile standalone, there may be some overlap
>> with git-core utility functions.
>
> My position on this idea to duplicate functionality in order to somehow
> pretend that the reftable code is independent of Git's source code has not
> changed.

The above may be sufficient between you and Han-Wen, and a few
selected others who still remember, but please do not forget that we
are collaborating in the open.  It would help those who are learning
open source interactions by watching from sidelines, if you spelled
out what your position is or at least left a pointer to a previous
discussion.

FWIW, I think it is a mistake to try to make this directory so that
it can be lifted out of our code base and used independently, as it
has to create unnecessary friction to the code when used here.  It
is not like other code that we are not their primary intended
audience and we simply "borrow" from them (e.g. xdiff/ & sha1dc/).

The previous paragraph agrees with my guess of your position, but
perhaps you have something else in mind.  I dunno.

> Be that as it may, the CI build failures impacted my notifications'
> signal/noise ratio too much, so here goes (Junio, I would be delighted if
> you could apply this on top of your branch):

Quite honestly, this close to the first preview release for 2.29,
I'd rather drop this round from 'seen' and expect an updated topic,
than piling fixup on top of fixups.

Thanks.
Johannes Schindelin Oct. 3, 2020, 8:07 a.m. UTC | #4
Hi Junio,

On Fri, 2 Oct 2020, Junio C Hamano wrote:

> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>
> >> This commit provides basic utility classes for the reftable library.
> >>
> >> Since the reftable library must compile standalone, there may be some overlap
> >> with git-core utility functions.
> >
> > My position on this idea to duplicate functionality in order to somehow
> > pretend that the reftable code is independent of Git's source code has not
> > changed.
>
> The above may be sufficient between you and Han-Wen, and a few
> selected others who still remember, but please do not forget that we
> are collaborating in the open.  It would help those who are learning
> open source interactions by watching from sidelines, if you spelled
> out what your position is or at least left a pointer to a previous
> discussion.

You're right, sorry.

> FWIW, I think it is a mistake to try to make this directory so that
> it can be lifted out of our code base and used independently, as it
> has to create unnecessary friction to the code when used here.  It
> is not like other code that we are not their primary intended
> audience and we simply "borrow" from them (e.g. xdiff/ & sha1dc/).
>
> The previous paragraph agrees with my guess of your position, but
> perhaps you have something else in mind.  I dunno.

Yes, you were spot on.

I am rather strongly opposed to introducing duplicated functionality.
Rather than spending time on finding half a dozen links to conversations
in the past, let me try to make the case afresh:

I have a very concrete example of the undesirable consequences: the
commit-graph breakage Stolee and I debugged yesterday, where a bug hid in
`in_merge_bases_many()` for two years, hidden by the fact that there was
_duplicate logic_ in `get_reachable_subset()`.

Pretty much _all_ of the code in `reftable/` is _very_ new, has not seen
any real-world testing, and as such cannot really be trusted, in
particular the code that duplicates functionality already present in
libgit.a. Note that I do not claim that Han-Wen cannot be trusted: to the
contrary, I trust him very much (we have a history going all the way back
to Lilypond, the musical typesetter). The code, however, cannot be
trusted, and needs to be reviewed and tested in practice.

The simple fact is that even the thorough review that the commit-graph
patches received did not prevent the `min_generation`/`max_generation` bug
in `in_merge_bases_many()` from entering the code base, and if we had not
had _another_ function doing very, very similar things, that bug would
most likely not have lingered for _this_ long.

Likewise for the reimplementation of the convenient functionality like
`strbuf`. This kind of duplication (in the case of `struct strbuf`, quite
_literally_: there are now confusingly _two_ `strbuf.h` files that even
try to implement the _exact same_ API) is _prone_ to lead to stale, or
long undiscovered, bugs in one of the duplicated implementations.

Which means that we're likely to introduce bugs with those new functions
introduced in `reftable/`. For the reftable functionality, that has to be
accepted. For functions that do the same as equivalents in libgit.a, we do
not have to accept it.

In any case, duplicated functionality is a maintenance burden, and
especially in this case, an unnecessary one at that: unlike xdiff or
compat/regex/, the reftable functionality is co-dependent with `libgit.a`.
Yes, you can implement libreftable as a stand-alone library (including
duplicated functionality from libgit.a, as I pointed out), but you _cannot
use it independently of Git_.

The reftable concerns _Git_ refs.

This is not a compressor that could come in handy in a messenger app, or a
diff engine that could potentially help with a semantic merging tool.

It is code whose entire purpose is to manage boatloads of Git refs
efficiently. Therefore, I see no convincing reason to insist on
maintaining this code outside of git/git.

It would make more sense to maintain parse-options or strbuf or
t/test-lib.sh outside of git/git (because at least that functionality is
independent enough of Git to be potentially useful to other projects) and
we don't. We maintain them inside git/git, and for good reason: it makes
development and maintenance a hell of a lot easier.

And once we agree that reftable is intended to be so integral a part of
Git that it should absolutely have its home inside git/git, all of that
duplicated functionality (with almost totally untested implementations, at
least when it comes to "battle testing in production") can totally
evaporate and does not need to worry us any more.

And then we can start to spend reviewers' time on the actual code
revolving around the actual reftables rather than having to bother with
reviewing, say, a close, but non-identical `strbuf` that might even have
"borrowed" parts of the `strbuf` code and put it under a more permissive
license (I don't know, and I don't want to know, that's part of the reason
why I don't want to review the reftable patches in their current form).

In other words, we can then finally start to actually review the reftable
code on its merits of teaching Git a valuable new feature.

> > Be that as it may, the CI build failures impacted my notifications'
> > signal/noise ratio too much, so here goes (Junio, I would be delighted if
> > you could apply this on top of your branch):
>
> Quite honestly, this close to the first preview release for 2.29,
> I'd rather drop this round from 'seen' and expect an updated topic,
> than piling fixup on top of fixups.

Sounds good to me.

Ciao,
Dscho
Jonathan Tan Oct. 8, 2020, 1:48 a.m. UTC | #5
> From: Han-Wen Nienhuys <hanwen@google.com>
> 
> This commit provides basic utility classes for the reftable library.
> 
> Since the reftable library must compile standalone, there may be some overlap
> with git-core utility functions.
> 
> Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
> ---
>  Makefile                  |  26 ++++++-
>  reftable/basics.c         | 131 +++++++++++++++++++++++++++++++++
>  reftable/basics.h         |  48 +++++++++++++
>  reftable/blocksource.c    | 148 ++++++++++++++++++++++++++++++++++++++
>  reftable/blocksource.h    |  22 ++++++
>  reftable/compat.c         | 110 ++++++++++++++++++++++++++++
>  reftable/compat.h         |  48 +++++++++++++
>  reftable/publicbasics.c   | 100 ++++++++++++++++++++++++++
>  reftable/reftable-tests.h |  22 ++++++
>  reftable/strbuf.c         | 142 ++++++++++++++++++++++++++++++++++++
>  reftable/strbuf.h         |  80 +++++++++++++++++++++
>  reftable/strbuf_test.c    |  37 ++++++++++
>  reftable/system.h         |  51 +++++++++++++
>  t/helper/test-reftable.c  |   8 +++
>  t/helper/test-tool.c      |   1 +
>  t/helper/test-tool.h      |   1 +

I think duplicating things like strbuf is an unnecessary burden if Git
is to maintain this library. Something like "reftable will only import
git-compat-util.h and strbuf.h, and any project that wants to use
reftable must make sure that these functions and data structures are
available" would be more plausible.
Han-Wen Nienhuys Oct. 10, 2020, 5:28 p.m. UTC | #6
On Thu, Oct 8, 2020 at 3:48 AM Jonathan Tan <jonathantanmy@google.com> wrote:
>
> > From: Han-Wen Nienhuys <hanwen@google.com>
> >
> > This commit provides basic utility classes for the reftable library.
> >
> > Since the reftable library must compile standalone, there may be some overlap
> > with git-core utility functions.

> I think duplicating things like strbuf is an unnecessary burden if Git
> is to maintain this library. Something like "reftable will only import
> git-compat-util.h and strbuf.h, and any project that wants to use
> reftable must make sure that these functions and data structures are
> available" would be more plausible.

Sure, but how do we ensure that the directory won't take on
dependencies beyond these headers? I am worried that I will be
involved in a tedious back & forth process to keep updates going into
libgit2 and/or also have to keep maintaining
github.com/google/reftable.

FWIW, the duplication is really tiny: according to

 $ wc $(grep -l REFTABLE_STANDALONE *[ch])

it's just 431 lines of code.

--
Han-Wen Nienhuys - Google Munich
I work 80%. Don't expect answers from me on Fridays.
--
Google Germany GmbH, Erika-Mann-Strasse 33, 80636 Munich
Registergericht und -nummer: Hamburg, HRB 86891
Sitz der Gesellschaft: Hamburg
Geschäftsführer: Paul Manicle, Halimah DeLaine Prado
Han-Wen Nienhuys Oct. 10, 2020, 5:32 p.m. UTC | #7
On Fri, Oct 2, 2020 at 6:12 AM Jonathan Nieder <jrnieder@gmail.com> wrote:
> [...]
> > +int binsearch(size_t sz, int (*f)(size_t k, void *args), void *args)
>
> How does this compare to stdlib's bsearch?

bsearch gives you back NULL if it doesn't find an exact match.

> > +     reftable_free(a);
> > +}
>
> Are there other callers that need custom free?

The libgit2 folks requested the ability to set memory allocation
routines, hence reftable_free().
Johannes Schindelin Oct. 11, 2020, 10:52 a.m. UTC | #8
Hi Han-Wen,

On Sat, 10 Oct 2020, Han-Wen Nienhuys wrote:

> On Thu, Oct 8, 2020 at 3:48 AM Jonathan Tan <jonathantanmy@google.com> wrote:
> >
> > > From: Han-Wen Nienhuys <hanwen@google.com>
> > >
> > > This commit provides basic utility classes for the reftable library.
> > >
> > > Since the reftable library must compile standalone, there may be some overlap
> > > with git-core utility functions.
>
> > I think duplicating things like strbuf is an unnecessary burden if Git
> > is to maintain this library. Something like "reftable will only import
> > git-compat-util.h and strbuf.h, and any project that wants to use
> > reftable must make sure that these functions and data structures are
> > available" would be more plausible.
>
> Sure, but how do we ensure that the directory won't take on
> dependencies beyond these headers? I am worried that I will be
> involved in a tedious back & forth process to keep updates going into
> libgit2 and/or also have to keep maintaining
> github.com/google/reftable.
>
> FWIW, the duplication is really tiny: according to
>
>  $ wc $(grep -l REFTABLE_STANDALONE *[ch])
>
> it's just 431 lines of code.

The `merge_bases_many()` function has only 33 lines of code, partially
duplicating `get_reachable_subset()`. Yet, it had a bug in it for two
years that was not found.

How much worse will the situation be with your 431 lines of code.

Even more so when you consider the fact that you intend to shove the same
duplication down libgit2's throat. It's "triplicating" code.

So I find the argument you made above quite unconvincing.

Ciao,
Dscho
Jonathan Nieder Oct. 12, 2020, 3:19 p.m. UTC | #9
Hi,

Johannes Schindelin wrote:

> The `merge_bases_many()` function has only 33 lines of code, partially
> duplicating `get_reachable_subset()`. Yet, it had a bug in it for two
> years that was not found.
>
> How much worse will the situation be with your 431 lines of code.
>
> Even more so when you consider the fact that you intend to shove the same
> duplication down libgit2's throat. It's "triplicating" code.

Careful: you seem to be making a bunch of assumptions here (for
example, around Han-Wen having some intent around shoving things down
libgit2's throat), and you seem to be focusing on the person instead
of the contribution.

Would you mind restating your point in a way that is a little easier
to process, for example by focusing on the effect that this patch
would have on you as a Git developer?

Thanks,
Jonathan

> So I find the argument you made above quite unconvincing.
Jonathan Nieder Oct. 12, 2020, 3:25 p.m. UTC | #10
(+cc: Patrick Steinhardt from libgit2)
Hi,

Han-Wen Nienhuys wrote[1]:
> On Fri, Oct 2, 2020 at 6:12 AM Jonathan Nieder <jrnieder@gmail.com> wrote:
>> Han-Wen Nienhuys wrote:

>>> +     reftable_free(a);
>>> +}
>>
>> Are there other callers that need custom free?
>
> The libgit2 folks requested the ability to set memory allocation
> routines, hence reftable_free().

Thanks.  Patrick or Han-Wen, can you say a little more about this use
case?  That would help with making sure we are making an API that
meets its needs.

For example, is a custom allocator something that would be set
globally or something attached to a handle?  If the former, would code
that uses xmalloc and free and gets #define-d away when used in
libgit2 work?  If the latter, what does the handle look like?

Thanks,
Jonathan

[1] context: https://lore.kernel.org/git/pull.847.git.git.1600283416.gitgitgadget@gmail.com/T/#r2337ddc80848c80f6c9699d904ab64d4297d2c54
Junio C Hamano Oct. 12, 2020, 4:42 p.m. UTC | #11
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

>> FWIW, the duplication is really tiny: according to
>>
>>  $ wc $(grep -l REFTABLE_STANDALONE *[ch])
>>
>> it's just 431 lines of code.
>
> The `merge_bases_many()` function has only 33 lines of code, partially
> duplicating `get_reachable_subset()`. Yet, it had a bug in it for two
> years that was not found.

It does not affect the current discussion, but what you are giving
is a revisionist view of the world.  The latter function came MUCH
later to do a bit more than the former.  The bug was caused by the
fact that those that added the latter neglected the responsibility
of maintaining the former to the same degree when new feature like
commit-graph were added to the latter.

The root cause was that the latter one did not share the code with
the former one when it was introduced.  That does make it appear
similar to the situation we have at hand with duplicated utility
functions.

> How much worse will the situation be with your 431 lines of code.
>
> Even more so when you consider the fact that you intend to shove the same
> duplication down libgit2's throat. It's "triplicating" code.
>
> So I find the argument you made above quite unconvincing.
>
> Ciao,
> Dscho
Patrick Steinhardt Oct. 12, 2020, 5:05 p.m. UTC | #12
On Mon, Oct 12, 2020 at 08:25:05AM -0700, Jonathan Nieder wrote:
> (+cc: Patrick Steinhardt from libgit2)
> Hi,
> 
> Han-Wen Nienhuys wrote[1]:
> > On Fri, Oct 2, 2020 at 6:12 AM Jonathan Nieder <jrnieder@gmail.com> wrote:
> >> Han-Wen Nienhuys wrote:
> 
> >>> +     reftable_free(a);
> >>> +}
> >>
> >> Are there other callers that need custom free?
> >
> > The libgit2 folks requested the ability to set memory allocation
> > routines, hence reftable_free().
> 
> Thanks.  Patrick or Han-Wen, can you say a little more about this use
> case?  That would help with making sure we are making an API that
> meets its needs.
> 
> For example, is a custom allocator something that would be set
> globally or something attached to a handle?  If the former, would code
> that uses xmalloc and free and gets #define-d away when used in
> libgit2 work?  If the latter, what does the handle look like?

We have global pluggable allocators in libgit2 which can be set up
before calling `git_libgit2_init()`. The user of libgit2 can fill a
`git_allocator` structure with a set of funtcion pointers, most
importantly with implementations of `free` and `malloc`. Those then get
used across all of libgit2 for all subsequent allocations.

In order to be as thorough as possible, we thus also need to replace
these function pointers for libgit2's dependencies. As registration of
the allocator happens at runtime, we need to also be able to replace
function pointers of dependencies at runtime. E.g. for OpenSSL, it
provides an API `CRYPTO_set_mem_functions(malloc, realloc, free)` which
we call on global initialization of the libgit2 library.

So, to answer your questions:

    - The allocator is global and cannot be changed after initialization
      of libgit2.

    - It is pluggable, users can set up their own allocators by filling
      a structure with function pointers for `free`, `malloc`, `realloc`
      etc.

    - Due to the pluggable nature, we need to be able to set up those
      pointers at runtime. We can provide a set of static wrappers
      though which then call into the pluggable functions, so defines
      would probably work for us, too.

I hope that answers all of your questions.

Patrick
Jonathan Nieder Oct. 12, 2020, 5:45 p.m. UTC | #13
Patrick Steinhardt wrote:

> In order to be as thorough as possible, we thus also need to replace
> these function pointers for libgit2's dependencies. As registration of
> the allocator happens at runtime, we need to also be able to replace
> function pointers of dependencies at runtime. E.g. for OpenSSL, it
> provides an API `CRYPTO_set_mem_functions(malloc, realloc, free)` which
> we call on global initialization of the libgit2 library.
>
> So, to answer your questions:
>
>     - The allocator is global and cannot be changed after initialization
>       of libgit2.
>
>     - It is pluggable, users can set up their own allocators by filling
>       a structure with function pointers for `free`, `malloc`, `realloc`
>       etc.
>
>     - Due to the pluggable nature, we need to be able to set up those
>       pointers at runtime. We can provide a set of static wrappers
>       though which then call into the pluggable functions, so defines
>       would probably work for us, too.
>
> I hope that answers all of your questions.

Thanks!  Yes, that's very helpful.

Jonathan
Johannes Schindelin Oct. 12, 2020, 6:44 p.m. UTC | #14
Hi Jonathan,

On Mon, 12 Oct 2020, Jonathan Nieder wrote:

> Johannes Schindelin wrote:
>
> > The `merge_bases_many()` function has only 33 lines of code, partially
> > duplicating `get_reachable_subset()`. Yet, it had a bug in it for two
> > years that was not found.
> >
> > How much worse will the situation be with your 431 lines of code.
> >
> > Even more so when you consider the fact that you intend to shove the same
> > duplication down libgit2's throat. It's "triplicating" code.
>
> Careful: you seem to be making a bunch of assumptions here (for
> example, around Han-Wen having some intent around shoving things down
> libgit2's throat), and you seem to be focusing on the person instead
> of the contribution.
>
> Would you mind restating your point in a way that is a little easier
> to process, for example by focusing on the effect that this patch
> would have on you as a Git developer?

I really hope that Han-Wen did not take this as a personal attack.

In case he did, I'll gladly try to re-phrase my point: It does not matter
how much code is duplicated. The fact that we run into bugs even in
unintentionally duplicated code is reason enough to not lightly accept
this design decision as being set in stone.

Which is why I, you, Emily and Josh pointed that out, and it is the same
reason why Peff and Junio had made the same point in the past.

I was quite unpleasantly surprised to see that all of those objections
seemed to be insufficient, hence my rather forceful reply.

As to the libgit2 angle: libgit2 also has `strbuf` (which it calls
`git_buf` IIRC), and I am certain that it also has those other helper
functions à la `put_be32()`. So yes, the triplication I mentioned is a
very real, undesirable feature of this patch series.

To be honest, I would much rather spend my time reviewing patches that
added `reftable` support using as much of libgit.a's functionality as
possible rather than repeating the same point over and over again: that it
makes very little sense _not_ to use that functionality. Of course, at
this stage I understand that my feedback is not very welcome, so I will
try to refrain from commenting on this patch series further (I do reserve
the right to send patches that fix CI failures, just like I've done quite
a few times up to this point).

Ciao,
Dscho
Johannes Schindelin Oct. 12, 2020, 7:01 p.m. UTC | #15
Hi Junio,

On Mon, 12 Oct 2020, Junio C Hamano wrote:

> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>
> >> FWIW, the duplication is really tiny: according to
> >>
> >>  $ wc $(grep -l REFTABLE_STANDALONE *[ch])
> >>
> >> it's just 431 lines of code.
> >
> > The `merge_bases_many()` function has only 33 lines of code, partially
> > duplicating `get_reachable_subset()`. Yet, it had a bug in it for two
> > years that was not found.
>
> It does not affect the current discussion, but what you are giving
> is a revisionist view of the world.  The latter function came MUCH
> later to do a bit more than the former.  The bug was caused by the
> fact that those that added the latter neglected the responsibility
> of maintaining the former to the same degree when new feature like
> commit-graph were added to the latter.

You're right, I mixed up the direction: by introducing new code, the old
code became under-tested and a bug creeped in that was not detected for
two years.

> The root cause was that the latter one did not share the code with
> the former one when it was introduced.  That does make it appear
> similar to the situation we have at hand with duplicated utility
> functions.

Indeed, and even if this is just one concrete example, experience (and
teachers and instructors and mentors) repeat the lesson that code
duplication should be avoided because it _does_ cause problems.

Thanks for setting the record straight,
Dscho

>
> > How much worse will the situation be with your 431 lines of code.
> >
> > Even more so when you consider the fact that you intend to shove the same
> > duplication down libgit2's throat. It's "triplicating" code.
> >
> > So I find the argument you made above quite unconvincing.
> >
> > Ciao,
> > Dscho
>
Jonathan Nieder Oct. 12, 2020, 7:41 p.m. UTC | #16
Johannes Schindelin wrote:

>                                                          Of course, at
> this stage I understand that my feedback is not very welcome,

Do you mean because you don't feel that your feedback has been
sufficiently acted upon, or is there something else you are alluding
to?

Puzzled,
Jonathan
Johannes Schindelin Oct. 12, 2020, 8:27 p.m. UTC | #17
Hi Jonathan,

On Mon, 12 Oct 2020, Jonathan Nieder wrote:

> Johannes Schindelin wrote:
>
> > Of course, at this stage I understand that my feedback is not very
> > welcome,
>
> Do you mean because you don't feel that your feedback has been
> sufficiently acted upon, or is there something else you are alluding to?

My feedback has not been acted upon, and my arguments have been dismissed
(or there were attempts to dismiss them, I should say, that did not
convince me). At least I saw that when gitster repeated some of my
concerns in #git-devel, there were attempts to address them. While I feel
bad about leaving the burden on gitster's shoulders, at least I am glad
that those concerns are being addressed.

Ciao,
Dscho
Johannes Schindelin Oct. 13, 2020, 12:12 p.m. UTC | #18
Hi Patrick,

On Mon, 12 Oct 2020, Patrick Steinhardt wrote:

> On Mon, Oct 12, 2020 at 08:25:05AM -0700, Jonathan Nieder wrote:
> > (+cc: Patrick Steinhardt from libgit2)
> > Hi,
> >
> > Han-Wen Nienhuys wrote[1]:
> > > On Fri, Oct 2, 2020 at 6:12 AM Jonathan Nieder <jrnieder@gmail.com> wrote:
> > >> Han-Wen Nienhuys wrote:
> >
> > >>> +     reftable_free(a);
> > >>> +}
> > >>
> > >> Are there other callers that need custom free?
> > >
> > > The libgit2 folks requested the ability to set memory allocation
> > > routines, hence reftable_free().
> >
> > Thanks.  Patrick or Han-Wen, can you say a little more about this use
> > case?  That would help with making sure we are making an API that
> > meets its needs.
> >
> > For example, is a custom allocator something that would be set
> > globally or something attached to a handle?  If the former, would code
> > that uses xmalloc and free and gets #define-d away when used in
> > libgit2 work?  If the latter, what does the handle look like?
>
> We have global pluggable allocators in libgit2 which can be set up
> before calling `git_libgit2_init()`. The user of libgit2 can fill a
> `git_allocator` structure with a set of funtcion pointers, most
> importantly with implementations of `free` and `malloc`. Those then get
> used across all of libgit2 for all subsequent allocations.

I did not find out how those are used in the `deps/` part of libgit2's
source code. For example, I see a couple of instances where `malloc()` is
used in `ntlmclient` and in `pcre`.

The thing I was looking for would have been something like

	#define malloc(size) git__malloc(size)
	...

This would also have been what I imagined to be the best strategy to
integrate the reftable code once it is properly embedded in libgit.a (and
of course using libgit.a's API as much as it can).

Somewhat related: I was wondering whether it would make sense for git.git
to rename `strbuf` to `git_buf`? Would that make it easier to exchange
code between the two projects? Or would it just be unnecessary churn?

Ciao,
Dscho
Junio C Hamano Oct. 13, 2020, 3:47 p.m. UTC | #19
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> Somewhat related: I was wondering whether it would make sense for git.git
> to rename `strbuf` to `git_buf`? Would that make it easier to exchange
> code between the two projects? Or would it just be unnecessary churn?

To us, "git_buf" is just as descriptive as "buf" and does not say
anything about the nature of 'buf' (other than apparently it was
invented and widely used here).  "git_strbuf" I can understand, but
why should we?
Johannes Schindelin Oct. 15, 2020, 11:46 a.m. UTC | #20
Hi Junio,

On Tue, 13 Oct 2020, Junio C Hamano wrote:

> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>
> > Somewhat related: I was wondering whether it would make sense for git.git
> > to rename `strbuf` to `git_buf`? Would that make it easier to exchange
> > code between the two projects? Or would it just be unnecessary churn?
>
> To us, "git_buf" is just as descriptive as "buf" and does not say
> anything about the nature of 'buf' (other than apparently it was
> invented and widely used here).  "git_strbuf" I can understand, but
> why should we?

If it makes code sharing between git.git and libgit2 easier, why shouldn't
we ;-)

Obviously, if it doesn't make life easier, we shouldn't bother.

Ciao,
Dscho
Junio C Hamano Oct. 15, 2020, 4:23 p.m. UTC | #21
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> Hi Junio,
>
> On Tue, 13 Oct 2020, Junio C Hamano wrote:
>
>> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>>
>> > Somewhat related: I was wondering whether it would make sense for git.git
>> > to rename `strbuf` to `git_buf`? Would that make it easier to exchange
>> > code between the two projects? Or would it just be unnecessary churn?
>>
>> To us, "git_buf" is just as descriptive as "buf" and does not say
>> anything about the nature of 'buf' (other than apparently it was
>> invented and widely used here).  "git_strbuf" I can understand, but
>> why should we?
>
> If it makes code sharing between git.git and libgit2 easier, why shouldn't
> we ;-)

I see no reasonably explanation why libgit2 is not the one who uses
"#define strbuf git_buf" to make "sharing easier", though.
Johannes Schindelin Oct. 15, 2020, 7:39 p.m. UTC | #22
Hi Junio,

On Thu, 15 Oct 2020, Junio C Hamano wrote:

> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>
> > Hi Junio,
> >
> > On Tue, 13 Oct 2020, Junio C Hamano wrote:
> >
> >> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> >>
> >> > Somewhat related: I was wondering whether it would make sense for git.git
> >> > to rename `strbuf` to `git_buf`? Would that make it easier to exchange
> >> > code between the two projects? Or would it just be unnecessary churn?
> >>
> >> To us, "git_buf" is just as descriptive as "buf" and does not say
> >> anything about the nature of 'buf' (other than apparently it was
> >> invented and widely used here).  "git_strbuf" I can understand, but
> >> why should we?
> >
> > If it makes code sharing between git.git and libgit2 easier, why shouldn't
> > we ;-)
>
> I see no reasonably explanation why libgit2 is not the one who uses
> "#define strbuf git_buf" to make "sharing easier", though.

That's a fair point, thanks for bringing it up!

Ciao,
Dscho
Patrick Steinhardt Oct. 16, 2020, 9:15 a.m. UTC | #23
On Thu, Oct 15, 2020 at 09:23:28AM -0700, Junio C Hamano wrote:
> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> 
> > Hi Junio,
> >
> > On Tue, 13 Oct 2020, Junio C Hamano wrote:
> >
> >> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> >>
> >> > Somewhat related: I was wondering whether it would make sense for git.git
> >> > to rename `strbuf` to `git_buf`? Would that make it easier to exchange
> >> > code between the two projects? Or would it just be unnecessary churn?
> >>
> >> To us, "git_buf" is just as descriptive as "buf" and does not say
> >> anything about the nature of 'buf' (other than apparently it was
> >> invented and widely used here).  "git_strbuf" I can understand, but
> >> why should we?
> >
> > If it makes code sharing between git.git and libgit2 easier, why shouldn't
> > we ;-)
> 
> I see no reasonably explanation why libgit2 is not the one who uses
> "#define strbuf git_buf" to make "sharing easier", though.

It probably wouldn't help much anyway. We already have our own buffer
type which we can't easily replace with yours as it's part of the public
interface. If the need arises, providing a compatibility interface for
it shouldn't be too hard.

Patrick
Ævar Arnfjörð Bjarmason Oct. 23, 2020, 9:13 a.m. UTC | #24
On Sat, Oct 10 2020, Han-Wen Nienhuys wrote:

> On Thu, Oct 8, 2020 at 3:48 AM Jonathan Tan <jonathantanmy@google.com> wrote:
>>
>> > From: Han-Wen Nienhuys <hanwen@google.com>
>> >
>> > This commit provides basic utility classes for the reftable library.
>> >
>> > Since the reftable library must compile standalone, there may be some overlap
>> > with git-core utility functions.
>
>> I think duplicating things like strbuf is an unnecessary burden if Git
>> is to maintain this library. Something like "reftable will only import
>> git-compat-util.h and strbuf.h, and any project that wants to use
>> reftable must make sure that these functions and data structures are
>> available" would be more plausible.
>
> Sure, but how do we ensure that the directory won't take on
> dependencies beyond these headers? I am worried that I will be
> involved in a tedious back & forth process to keep updates going into
> libgit2 and/or also have to keep maintaining
> github.com/google/reftable.
>
> FWIW, the duplication is really tiny: according to
>
>  $ wc $(grep -l REFTABLE_STANDALONE *[ch])
>
> it's just 431 lines of code.

Why import the dead code at all? If I add this to your update script:

    # Not a general solution, but works for the specific code here.
    perl -0777 -pi -e 's[(#ifndef REFTABLE_STANDALONE\n.*?\n#else\n).*?(?=\n#endif)][$1/* Removed upstream code for git.git import */]gs' reftable/system.h
    perl -0777 -pi -e 's[(?<=#ifdef REFTABLE_STANDALONE\n).*?(?=\n#endif)][/* Removed upstream code for git.git import */]gs' reftable/strbuf.c reftable/compat.h
    perl -0777 -pi -e 's[(?<=#ifdef REFTABLE_STANDALONE\n).*?(?=\n#else)][/* Removed upstream code for git.git import */]gs' reftable/compat.c reftable/strbuf.h

It's now 157 lines instead of 431.

I think doing that with a tiny bit more complexity in the update.sh
script is a much lower maintenance burden.

It's not just about the number of lines, but things coming up in grep,
and now unique code really stands out (e.g. strbuf_add_void, should
probably be just added to the main strbuf.h if it's needed...), and of
course the cost of attention of eyeballing an already big series on the
ML & the churn every time we have updates.

Overall I'm all for having this carved out in its own directory at a
cost of a bit more maintenance burden if it can be shared with libgit2 &
be a standalone library.

I am concerned that it seems this code can't be maintained in git.git by
anyone not willing to sign a contract with Google. I sent a tiny PR for
a typo fix at [1] and got directed to sign [2] before someone at Google
could look at it. I see brian raised this before in [3] but there wasn't
a reply to that point.

Is there some summary of how this part of integrating it is supposed to
work going forward?

At first glance that seems like a recipe for perma-forking this pretty
much from day one, i.e.:

 A. Upstream changes happen
 B. We get a patch to the bundled library to this ML
 ==> Google employees maintaining A can't even look at B
 ==> Patch B can't be integrated into A

1. https://github.com/google/reftable/pull/2
2. https://cla.developers.google.com/about/google-individual
3. https://lore.kernel.org/git/20200512233741.GB6605@camp.crustytoothpaste.net/
Junio C Hamano Oct. 23, 2020, 5:36 p.m. UTC | #25
Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:

> It's not just about the number of lines, but things coming up in grep,
> and now unique code really stands out (e.g. strbuf_add_void, should
> probably be just added to the main strbuf.h if it's needed...), and of
> course the cost of attention of eyeballing an already big series on the
> ML & the churn every time we have updates.

These are all valid concerns.

> Overall I'm all for having this carved out in its own directory at a
> cost of a bit more maintenance burden if it can be shared with libgit2 &
> be a standalone library.

Do you mean by "this" the reftable stuff as a whole, or only the
duplicated support library part?  If the latter is split out of what
we import as our reftable/ directory, and stored in a separate
directory that we do not have to import (because we have strbuf and
other stuff) but other people may choose to use (because they may
not have strbuf and other stuff), that may work.  Also, if the
contents of reftable/ directory wants a pluggable allocator, the
main code can be written to call reftable_malloc (or whatever), with
a thin shim to interface with us (i.e. reftable_malloc() would be
implemented as a thin wrapper around xmalloc() for us) which is
stored in a separate directory just for us to interface with the
main reftable library.  For libgit2, there will be a separate
directory that uses a different implementation of reftable_malloc()
that would let them plug their preferred allocator.  An arrangement
like that might work.

I do not offhand know if that kind of overhead is worth the trouble
or if there are better ways, though.

> I am concerned that it seems this code can't be maintained in git.git by
> anyone not willing to sign a contract with Google.

It can be maintained in git.git; the trouble comes when they want to
update us.

I however suspect that, as the primary intended audience, it is hard
to imagine that the reftable library as a standalone project will be
successful without going through the usual reviews and testing in
git.git.  So even though there exists github.com/google/reftable
repository, its contents may not matter very much in practice,
unless they come here and beg for the change we make ourselves
anyway.  Perhaps I am being naive.  I dunno.

> I sent a tiny PR for
> a typo fix at [1] and got directed to sign [2] before someone at Google
> could look at it. I see brian raised this before in [3] but there wasn't
> a reply to that point.
>
> Is there some summary of how this part of integrating it is supposed to
> work going forward?

Yeah, thanks for raising a good point.  We definitely need to figure
this part out.

> At first glance that seems like a recipe for perma-forking this pretty
> much from day one, i.e.:
>
>  A. Upstream changes happen
>  B. We get a patch to the bundled library to this ML
>  ==> Google employees maintaining A can't even look at B
>  ==> Patch B can't be integrated into A
>
> 1. https://github.com/google/reftable/pull/2
> 2. https://cla.developers.google.com/about/google-individual
> 3. https://lore.kernel.org/git/20200512233741.GB6605@camp.crustytoothpaste.net/
diff mbox series

Patch

diff --git a/Makefile b/Makefile
index de53954590..e40d55cd87 100644
--- a/Makefile
+++ b/Makefile
@@ -727,6 +727,7 @@  TEST_BUILTINS_OBJS += test-read-cache.o
 TEST_BUILTINS_OBJS += test-read-graph.o
 TEST_BUILTINS_OBJS += test-read-midx.o
 TEST_BUILTINS_OBJS += test-ref-store.o
+TEST_BUILTINS_OBJS += test-reftable.o
 TEST_BUILTINS_OBJS += test-regex.o
 TEST_BUILTINS_OBJS += test-repository.o
 TEST_BUILTINS_OBJS += test-revision-walking.o
@@ -806,6 +807,8 @@  TEST_SHELL_PATH = $(SHELL_PATH)
 
 LIB_FILE = libgit.a
 XDIFF_LIB = xdiff/lib.a
+REFTABLE_LIB = reftable/libreftable.a
+REFTABLE_TEST_LIB = reftable/libreftable_test.a
 
 GENERATED_H += config-list.h
 GENERATED_H += command-list.h
@@ -1167,7 +1170,7 @@  THIRD_PARTY_SOURCES += compat/regex/%
 THIRD_PARTY_SOURCES += sha1collisiondetection/%
 THIRD_PARTY_SOURCES += sha1dc/%
 
-GITLIBS = common-main.o $(LIB_FILE) $(XDIFF_LIB)
+GITLIBS = common-main.o $(LIB_FILE) $(XDIFF_LIB) $(REFTABLE_LIB)
 EXTLIBS =
 
 GIT_USER_AGENT = git/$(GIT_VERSION)
@@ -2369,10 +2372,21 @@  XDIFF_OBJS += xdiff/xpatience.o
 XDIFF_OBJS += xdiff/xprepare.o
 XDIFF_OBJS += xdiff/xutils.o
 
+REFTABLE_OBJS += reftable/basics.o
+REFTABLE_OBJS += reftable/blocksource.o
+REFTABLE_OBJS += reftable/publicbasics.o
+REFTABLE_OBJS += reftable/compat.o
+REFTABLE_OBJS += reftable/strbuf.o
+
+REFTABLE_TEST_OBJS += reftable/strbuf_test.o
+REFTABLE_TEST_OBJS += reftable/test_framework.o
+
 TEST_OBJS := $(patsubst %$X,%.o,$(TEST_PROGRAMS)) $(patsubst %,t/helper/%,$(TEST_BUILTINS_OBJS))
 OBJECTS := $(LIB_OBJS) $(BUILTIN_OBJS) $(PROGRAM_OBJS) $(TEST_OBJS) \
 	$(XDIFF_OBJS) \
 	$(FUZZ_OBJS) \
+	$(REFTABLE_OBJS) \
+	$(REFTABLE_TEST_OBJS) \
 	common-main.o \
 	git.o
 ifndef NO_CURL
@@ -2524,6 +2538,12 @@  $(LIB_FILE): $(LIB_OBJS)
 $(XDIFF_LIB): $(XDIFF_OBJS)
 	$(QUIET_AR)$(RM) $@ && $(AR) $(ARFLAGS) $@ $^
 
+$(REFTABLE_LIB): $(REFTABLE_OBJS)
+	$(QUIET_AR)$(RM) $@ && $(AR) $(ARFLAGS) $@ $^
+
+$(REFTABLE_TEST_LIB): $(REFTABLE_TEST_OBJS)
+	$(QUIET_AR)$(RM) $@ && $(AR) $(ARFLAGS) $@ $^
+
 export DEFAULT_EDITOR DEFAULT_PAGER
 
 Documentation/GIT-EXCLUDED-PROGRAMS: FORCE
@@ -2802,7 +2822,7 @@  perf: all
 
 t/helper/test-tool$X: $(patsubst %,t/helper/%,$(TEST_BUILTINS_OBJS))
 
-t/helper/test-%$X: t/helper/test-%.o GIT-LDFLAGS $(GITLIBS)
+t/helper/test-%$X: t/helper/test-%.o GIT-LDFLAGS $(GITLIBS) $(REFTABLE_TEST_LIB)
 	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(filter %.a,$^) $(LIBS)
 
 check-sha1:: t/helper/test-tool$X
@@ -3133,7 +3153,7 @@  cocciclean:
 clean: profile-clean coverage-clean cocciclean
 	$(RM) *.res
 	$(RM) $(OBJECTS)
-	$(RM) $(LIB_FILE) $(XDIFF_LIB)
+	$(RM) $(LIB_FILE) $(XDIFF_LIB) $(REFTABLE_LIB) $(REFTABLE_TEST_LIB)
 	$(RM) $(ALL_PROGRAMS) $(SCRIPT_LIB) $(BUILT_INS) git$X
 	$(RM) $(TEST_PROGRAMS)
 	$(RM) $(FUZZ_PROGRAMS)
diff --git a/reftable/basics.c b/reftable/basics.c
new file mode 100644
index 0000000000..c429055d15
--- /dev/null
+++ b/reftable/basics.c
@@ -0,0 +1,131 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+*/
+
+#include "basics.h"
+
+void put_be24(uint8_t *out, uint32_t i)
+{
+	out[0] = (uint8_t)((i >> 16) & 0xff);
+	out[1] = (uint8_t)((i >> 8) & 0xff);
+	out[2] = (uint8_t)(i & 0xff);
+}
+
+uint32_t get_be24(uint8_t *in)
+{
+	return (uint32_t)(in[0]) << 16 | (uint32_t)(in[1]) << 8 |
+	       (uint32_t)(in[2]);
+}
+
+void put_be16(uint8_t *out, uint16_t i)
+{
+	out[0] = (uint8_t)((i >> 8) & 0xff);
+	out[1] = (uint8_t)(i & 0xff);
+}
+
+int binsearch(size_t sz, int (*f)(size_t k, void *args), void *args)
+{
+	size_t lo = 0;
+	size_t hi = sz;
+
+	/* invariant: (hi == sz) || f(hi) == true
+	   (lo == 0 && f(0) == true) || fi(lo) == false
+	 */
+	while (hi - lo > 1) {
+		size_t mid = lo + (hi - lo) / 2;
+
+		int val = f(mid, args);
+		if (val) {
+			hi = mid;
+		} else {
+			lo = mid;
+		}
+	}
+
+	if (lo == 0) {
+		if (f(0, args)) {
+			return 0;
+		} else {
+			return 1;
+		}
+	}
+
+	return hi;
+}
+
+void free_names(char **a)
+{
+	char **p = a;
+	if (p == NULL) {
+		return;
+	}
+	while (*p) {
+		reftable_free(*p);
+		p++;
+	}
+	reftable_free(a);
+}
+
+int names_length(char **names)
+{
+	int len = 0;
+	char **p = names;
+	while (*p) {
+		p++;
+		len++;
+	}
+	return len;
+}
+
+void parse_names(char *buf, int size, char ***namesp)
+{
+	char **names = NULL;
+	size_t names_cap = 0;
+	size_t names_len = 0;
+
+	char *p = buf;
+	char *end = buf + size;
+	while (p < end) {
+		char *next = strchr(p, '\n');
+		if (next != NULL) {
+			*next = 0;
+		} else {
+			next = end;
+		}
+		if (p < next) {
+			if (names_len == names_cap) {
+				names_cap = 2 * names_cap + 1;
+				names = reftable_realloc(
+					names, names_cap * sizeof(char *));
+			}
+			names[names_len++] = xstrdup(p);
+		}
+		p = next + 1;
+	}
+
+	if (names_len == names_cap) {
+		names_cap = 2 * names_cap + 1;
+		names = reftable_realloc(names, names_cap * sizeof(char *));
+	}
+
+	names[names_len] = NULL;
+	*namesp = names;
+}
+
+int names_equal(char **a, char **b)
+{
+	while (*a && *b) {
+		if (strcmp(*a, *b)) {
+			return 0;
+		}
+
+		a++;
+		b++;
+	}
+
+	return *a == *b;
+}
diff --git a/reftable/basics.h b/reftable/basics.h
new file mode 100644
index 0000000000..90639865a7
--- /dev/null
+++ b/reftable/basics.h
@@ -0,0 +1,48 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+*/
+
+#ifndef BASICS_H
+#define BASICS_H
+
+#include "system.h"
+
+/* Bigendian en/decoding of integers */
+
+void put_be24(uint8_t *out, uint32_t i);
+uint32_t get_be24(uint8_t *in);
+void put_be16(uint8_t *out, uint16_t i);
+
+/*
+  find smallest index i in [0, sz) at which f(i) is true, assuming
+  that f is ascending. Return sz if f(i) is false for all indices.
+*/
+int binsearch(size_t sz, int (*f)(size_t k, void *args), void *args);
+
+/*
+  Frees a NULL terminated array of malloced strings. The array itself is also
+  freed.
+ */
+void free_names(char **a);
+
+/* parse a newline separated list of names. Empty names are discarded. */
+void parse_names(char *buf, int size, char ***namesp);
+
+/* compares two NULL-terminated arrays of strings. */
+int names_equal(char **a, char **b);
+
+/* returns the array size of a NULL-terminated array of strings. */
+int names_length(char **names);
+
+/* Allocation routines; they invoke the functions set through
+ * reftable_set_alloc() */
+void *reftable_malloc(size_t sz);
+void *reftable_realloc(void *p, size_t sz);
+void reftable_free(void *p);
+void *reftable_calloc(size_t sz);
+
+#endif
diff --git a/reftable/blocksource.c b/reftable/blocksource.c
new file mode 100644
index 0000000000..7f29b864f9
--- /dev/null
+++ b/reftable/blocksource.c
@@ -0,0 +1,148 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+*/
+
+#include "system.h"
+
+#include "basics.h"
+#include "blocksource.h"
+#include "strbuf.h"
+#include "reftable.h"
+
+static void strbuf_return_block(void *b, struct reftable_block *dest)
+{
+	memset(dest->data, 0xff, dest->len);
+	reftable_free(dest->data);
+}
+
+static void strbuf_close(void *b)
+{
+}
+
+static int strbuf_read_block(void *v, struct reftable_block *dest, uint64_t off,
+			     uint32_t size)
+{
+	struct strbuf *b = (struct strbuf *)v;
+	assert(off + size <= b->len);
+	dest->data = reftable_calloc(size);
+	memcpy(dest->data, b->buf + off, size);
+	dest->len = size;
+	return size;
+}
+
+static uint64_t strbuf_size(void *b)
+{
+	return ((struct strbuf *)b)->len;
+}
+
+static struct reftable_block_source_vtable strbuf_vtable = {
+	.size = &strbuf_size,
+	.read_block = &strbuf_read_block,
+	.return_block = &strbuf_return_block,
+	.close = &strbuf_close,
+};
+
+void block_source_from_strbuf(struct reftable_block_source *bs,
+			      struct strbuf *buf)
+{
+	assert(bs->ops == NULL);
+	bs->ops = &strbuf_vtable;
+	bs->arg = buf;
+}
+
+static void malloc_return_block(void *b, struct reftable_block *dest)
+{
+	memset(dest->data, 0xff, dest->len);
+	reftable_free(dest->data);
+}
+
+static struct reftable_block_source_vtable malloc_vtable = {
+	.return_block = &malloc_return_block,
+};
+
+static struct reftable_block_source malloc_block_source_instance = {
+	.ops = &malloc_vtable,
+};
+
+struct reftable_block_source malloc_block_source(void)
+{
+	return malloc_block_source_instance;
+}
+
+struct file_block_source {
+	int fd;
+	uint64_t size;
+};
+
+static uint64_t file_size(void *b)
+{
+	return ((struct file_block_source *)b)->size;
+}
+
+static void file_return_block(void *b, struct reftable_block *dest)
+{
+	memset(dest->data, 0xff, dest->len);
+	reftable_free(dest->data);
+}
+
+static void file_close(void *b)
+{
+	int fd = ((struct file_block_source *)b)->fd;
+	if (fd > 0) {
+		close(fd);
+		((struct file_block_source *)b)->fd = 0;
+	}
+
+	reftable_free(b);
+}
+
+static int file_read_block(void *v, struct reftable_block *dest, uint64_t off,
+			   uint32_t size)
+{
+	struct file_block_source *b = (struct file_block_source *)v;
+	assert(off + size <= b->size);
+	dest->data = reftable_malloc(size);
+	if (pread(b->fd, dest->data, size, off) != size)
+		return -1;
+	dest->len = size;
+	return size;
+}
+
+static struct reftable_block_source_vtable file_vtable = {
+	.size = &file_size,
+	.read_block = &file_read_block,
+	.return_block = &file_return_block,
+	.close = &file_close,
+};
+
+int reftable_block_source_from_file(struct reftable_block_source *bs,
+				    const char *name)
+{
+	struct stat st = { 0 };
+	int err = 0;
+	int fd = open(name, O_RDONLY);
+	struct file_block_source *p = NULL;
+	if (fd < 0) {
+		if (errno == ENOENT) {
+			return REFTABLE_NOT_EXIST_ERROR;
+		}
+		return -1;
+	}
+
+	err = fstat(fd, &st);
+	if (err < 0)
+		return -1;
+
+	p = reftable_calloc(sizeof(struct file_block_source));
+	p->size = st.st_size;
+	p->fd = fd;
+
+	assert(bs->ops == NULL);
+	bs->ops = &file_vtable;
+	bs->arg = p;
+	return 0;
+}
diff --git a/reftable/blocksource.h b/reftable/blocksource.h
new file mode 100644
index 0000000000..3faf83fa9d
--- /dev/null
+++ b/reftable/blocksource.h
@@ -0,0 +1,22 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+*/
+
+#ifndef BLOCKSOURCE_H
+#define BLOCKSOURCE_H
+
+#include "strbuf.h"
+
+struct reftable_block_source;
+
+/* Create an in-memory block source for reading reftables */
+void block_source_from_strbuf(struct reftable_block_source *bs,
+			      struct strbuf *buf);
+
+struct reftable_block_source malloc_block_source(void);
+
+#endif
diff --git a/reftable/compat.c b/reftable/compat.c
new file mode 100644
index 0000000000..a48c5aa5e3
--- /dev/null
+++ b/reftable/compat.c
@@ -0,0 +1,110 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+
+*/
+
+/* compat.c - compatibility functions for standalone compilation */
+
+#include "system.h"
+#include "basics.h"
+
+#ifdef REFTABLE_STANDALONE
+
+#include <dirent.h>
+
+void put_be32(void *p, uint32_t i)
+{
+	uint8_t *out = (uint8_t *)p;
+
+	out[0] = (uint8_t)((i >> 24) & 0xff);
+	out[1] = (uint8_t)((i >> 16) & 0xff);
+	out[2] = (uint8_t)((i >> 8) & 0xff);
+	out[3] = (uint8_t)((i)&0xff);
+}
+
+uint32_t get_be32(uint8_t *in)
+{
+	return (uint32_t)(in[0]) << 24 | (uint32_t)(in[1]) << 16 |
+	       (uint32_t)(in[2]) << 8 | (uint32_t)(in[3]);
+}
+
+void put_be64(void *p, uint64_t v)
+{
+	uint8_t *out = (uint8_t *)p;
+	int i = sizeof(uint64_t);
+	while (i--) {
+		out[i] = (uint8_t)(v & 0xff);
+		v >>= 8;
+	}
+}
+
+uint64_t get_be64(void *out)
+{
+	uint8_t *bytes = (uint8_t *)out;
+	uint64_t v = 0;
+	int i = 0;
+	for (i = 0; i < sizeof(uint64_t); i++) {
+		v = (v << 8) | (uint8_t)(bytes[i] & 0xff);
+	}
+	return v;
+}
+
+uint16_t get_be16(uint8_t *in)
+{
+	return (uint32_t)(in[0]) << 8 | (uint32_t)(in[1]);
+}
+
+char *xstrdup(const char *s)
+{
+	int l = strlen(s);
+	char *dest = (char *)reftable_malloc(l + 1);
+	strncpy(dest, s, l + 1);
+	return dest;
+}
+
+void sleep_millisec(int millisecs)
+{
+	usleep(millisecs * 1000);
+}
+
+void reftable_clear_dir(const char *dirname)
+{
+	DIR *dir = opendir(dirname);
+	struct dirent *ent = NULL;
+	assert(dir);
+	while ((ent = readdir(dir)) != NULL) {
+		unlinkat(dirfd(dir), ent->d_name, 0);
+	}
+	closedir(dir);
+	rmdir(dirname);
+}
+
+#else
+
+#include "../dir.h"
+
+void reftable_clear_dir(const char *dirname)
+{
+	struct strbuf path = STRBUF_INIT;
+	strbuf_addstr(&path, dirname);
+	remove_dir_recursively(&path, 0);
+	strbuf_release(&path);
+}
+
+#endif
+
+int hash_size(uint32_t id)
+{
+	switch (id) {
+	case 0:
+	case SHA1_ID:
+		return SHA1_SIZE;
+	case SHA256_ID:
+		return SHA256_SIZE;
+	}
+	abort();
+}
diff --git a/reftable/compat.h b/reftable/compat.h
new file mode 100644
index 0000000000..a765c57e96
--- /dev/null
+++ b/reftable/compat.h
@@ -0,0 +1,48 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+*/
+
+#ifndef COMPAT_H
+#define COMPAT_H
+
+#include "system.h"
+
+#ifdef REFTABLE_STANDALONE
+
+/* functions that git-core provides, for standalone compilation */
+#include <stdint.h>
+
+uint64_t get_be64(void *in);
+void put_be64(void *out, uint64_t i);
+
+void put_be32(void *out, uint32_t i);
+uint32_t get_be32(uint8_t *in);
+
+uint16_t get_be16(uint8_t *in);
+
+#define ARRAY_SIZE(a) sizeof((a)) / sizeof((a)[0])
+#define FREE_AND_NULL(x)          \
+	do {                      \
+		reftable_free(x); \
+		(x) = NULL;       \
+	} while (0)
+#define QSORT(arr, n, cmp) qsort(arr, n, sizeof(arr[0]), cmp)
+#define SWAP(a, b)                              \
+	{                                       \
+		char tmp[sizeof(a)];            \
+		assert(sizeof(a) == sizeof(b)); \
+		memcpy(&tmp[0], &a, sizeof(a)); \
+		memcpy(&a, &b, sizeof(a));      \
+		memcpy(&b, &tmp[0], sizeof(a)); \
+	}
+
+char *xstrdup(const char *s);
+
+void sleep_millisec(int millisecs);
+
+#endif
+#endif
diff --git a/reftable/publicbasics.c b/reftable/publicbasics.c
new file mode 100644
index 0000000000..12d547d70d
--- /dev/null
+++ b/reftable/publicbasics.c
@@ -0,0 +1,100 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+*/
+
+#include "reftable.h"
+
+#include "basics.h"
+#include "system.h"
+
+const char *reftable_error_str(int err)
+{
+	static char buf[250];
+	switch (err) {
+	case REFTABLE_IO_ERROR:
+		return "I/O error";
+	case REFTABLE_FORMAT_ERROR:
+		return "corrupt reftable file";
+	case REFTABLE_NOT_EXIST_ERROR:
+		return "file does not exist";
+	case REFTABLE_LOCK_ERROR:
+		return "data is outdated";
+	case REFTABLE_API_ERROR:
+		return "misuse of the reftable API";
+	case REFTABLE_ZLIB_ERROR:
+		return "zlib failure";
+	case REFTABLE_NAME_CONFLICT:
+		return "file/directory conflict";
+	case REFTABLE_REFNAME_ERROR:
+		return "invalid refname";
+	case -1:
+		return "general error";
+	default:
+		snprintf(buf, sizeof(buf), "unknown error code %d", err);
+		return buf;
+	}
+}
+
+int reftable_error_to_errno(int err)
+{
+	switch (err) {
+	case REFTABLE_IO_ERROR:
+		return EIO;
+	case REFTABLE_FORMAT_ERROR:
+		return EFAULT;
+	case REFTABLE_NOT_EXIST_ERROR:
+		return ENOENT;
+	case REFTABLE_LOCK_ERROR:
+		return EBUSY;
+	case REFTABLE_API_ERROR:
+		return EINVAL;
+	case REFTABLE_ZLIB_ERROR:
+		return EDOM;
+	default:
+		return ERANGE;
+	}
+}
+
+static void *(*reftable_malloc_ptr)(size_t sz) = &malloc;
+static void *(*reftable_realloc_ptr)(void *, size_t) = &realloc;
+static void (*reftable_free_ptr)(void *) = &free;
+
+void *reftable_malloc(size_t sz)
+{
+	return (*reftable_malloc_ptr)(sz);
+}
+
+void *reftable_realloc(void *p, size_t sz)
+{
+	return (*reftable_realloc_ptr)(p, sz);
+}
+
+void reftable_free(void *p)
+{
+	reftable_free_ptr(p);
+}
+
+void *reftable_calloc(size_t sz)
+{
+	void *p = reftable_malloc(sz);
+	memset(p, 0, sz);
+	return p;
+}
+
+void reftable_set_alloc(void *(*malloc)(size_t),
+			void *(*realloc)(void *, size_t), void (*free)(void *))
+{
+	reftable_malloc_ptr = malloc;
+	reftable_realloc_ptr = realloc;
+	reftable_free_ptr = free;
+}
+
+int reftable_fd_write(void *arg, const void *data, size_t sz)
+{
+	int *fdp = (int *)arg;
+	return write(*fdp, data, sz);
+}
diff --git a/reftable/reftable-tests.h b/reftable/reftable-tests.h
new file mode 100644
index 0000000000..e38471888f
--- /dev/null
+++ b/reftable/reftable-tests.h
@@ -0,0 +1,22 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+*/
+
+#ifndef REFTABLE_TESTS_H
+#define REFTABLE_TESTS_H
+
+int block_test_main(int argc, const char **argv);
+int merged_test_main(int argc, const char **argv);
+int record_test_main(int argc, const char **argv);
+int refname_test_main(int argc, const char **argv);
+int reftable_test_main(int argc, const char **argv);
+int strbuf_test_main(int argc, const char **argv);
+int stack_test_main(int argc, const char **argv);
+int tree_test_main(int argc, const char **argv);
+int reftable_dump_main(int argc, char *const *argv);
+
+#endif
diff --git a/reftable/strbuf.c b/reftable/strbuf.c
new file mode 100644
index 0000000000..136bf65591
--- /dev/null
+++ b/reftable/strbuf.c
@@ -0,0 +1,142 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+*/
+
+#include "strbuf.h"
+
+#ifdef REFTABLE_STANDALONE
+
+void strbuf_init(struct strbuf *s, size_t alloc)
+{
+	struct strbuf empty = STRBUF_INIT;
+	*s = empty;
+}
+
+void strbuf_grow(struct strbuf *s, size_t extra)
+{
+	size_t newcap = s->len + extra + 1;
+	if (newcap > s->cap) {
+		s->buf = reftable_realloc(s->buf, newcap);
+		s->cap = newcap;
+	}
+}
+
+static void strbuf_resize(struct strbuf *s, int l)
+{
+	int zl = l + 1; /* one uint8_t for 0 termination. */
+	assert(s->canary == STRBUF_CANARY);
+	if (s->cap < zl) {
+		int c = s->cap * 2;
+		if (c < zl) {
+			c = zl;
+		}
+		s->cap = c;
+		s->buf = reftable_realloc(s->buf, s->cap);
+	}
+	s->len = l;
+	s->buf[l] = 0;
+}
+
+void strbuf_setlen(struct strbuf *s, size_t l)
+{
+	assert(s->cap >= l + 1);
+	s->len = l;
+	s->buf[l] = 0;
+}
+
+void strbuf_reset(struct strbuf *s)
+{
+	strbuf_resize(s, 0);
+}
+
+void strbuf_addstr(struct strbuf *d, const char *s)
+{
+	int l1 = d->len;
+	int l2 = strlen(s);
+	assert(d->canary == STRBUF_CANARY);
+
+	strbuf_resize(d, l2 + l1);
+	memcpy(d->buf + l1, s, l2);
+}
+
+void strbuf_addbuf(struct strbuf *s, struct strbuf *a)
+{
+	int end = s->len;
+	assert(s->canary == STRBUF_CANARY);
+	strbuf_resize(s, s->len + a->len);
+	memcpy(s->buf + end, a->buf, a->len);
+}
+
+char *strbuf_detach(struct strbuf *s, size_t *sz)
+{
+	char *p = NULL;
+	p = (char *)s->buf;
+	if (sz)
+		*sz = s->len;
+	s->buf = NULL;
+	s->cap = 0;
+	s->len = 0;
+	return p;
+}
+
+void strbuf_release(struct strbuf *s)
+{
+	assert(s->canary == STRBUF_CANARY);
+	s->cap = 0;
+	s->len = 0;
+	reftable_free(s->buf);
+	s->buf = NULL;
+}
+
+int strbuf_cmp(const struct strbuf *a, const struct strbuf *b)
+{
+	int min = a->len < b->len ? a->len : b->len;
+	int res = memcmp(a->buf, b->buf, min);
+	assert(a->canary == STRBUF_CANARY);
+	assert(b->canary == STRBUF_CANARY);
+	if (res != 0)
+		return res;
+	if (a->len < b->len)
+		return -1;
+	else if (a->len > b->len)
+		return 1;
+	else
+		return 0;
+}
+
+int strbuf_add(struct strbuf *b, const void *data, size_t sz)
+{
+	assert(b->canary == STRBUF_CANARY);
+	strbuf_grow(b, sz);
+	memcpy(b->buf + b->len, data, sz);
+	b->len += sz;
+	b->buf[b->len] = 0;
+	return sz;
+}
+
+#endif
+
+int strbuf_add_void(void *b, const void *data, size_t sz)
+{
+	strbuf_add((struct strbuf *)b, data, sz);
+	return sz;
+}
+
+int common_prefix_size(struct strbuf *a, struct strbuf *b)
+{
+	int p = 0;
+	while (p < a->len && p < b->len) {
+		if (a->buf[p] != b->buf[p]) {
+			break;
+		}
+		p++;
+	}
+
+	return p;
+}
+
+struct strbuf reftable_empty_strbuf = STRBUF_INIT;
diff --git a/reftable/strbuf.h b/reftable/strbuf.h
new file mode 100644
index 0000000000..c2d7aca8dd
--- /dev/null
+++ b/reftable/strbuf.h
@@ -0,0 +1,80 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+*/
+
+#ifndef SLICE_H
+#define SLICE_H
+
+#ifdef REFTABLE_STANDALONE
+
+#include "basics.h"
+
+/*
+  Provides a bounds-checked, growable byte ranges. To use, initialize as "strbuf
+  x = STRBUF_INIT;"
+ */
+struct strbuf {
+	size_t len;
+	size_t cap;
+	char *buf;
+
+	/* Used to enforce initialization with STRBUF_INIT */
+	uint8_t canary;
+};
+
+#define STRBUF_CANARY 0x42
+#define STRBUF_INIT                       \
+	{                                 \
+		0, 0, NULL, STRBUF_CANARY \
+	}
+
+void strbuf_addstr(struct strbuf *dest, const char *src);
+
+/* Deallocate and clear strbuf */
+void strbuf_release(struct strbuf *strbuf);
+
+/* Set strbuf to 0 length, but retain buffer. */
+void strbuf_reset(struct strbuf *strbuf);
+
+/* Initializes a strbuf. Accepts a strbuf with random garbage. */
+void strbuf_init(struct strbuf *strbuf, size_t alloc);
+
+/* Return `buf`, clearing out `s`. Optionally return len (not cap) in `sz`.  */
+char *strbuf_detach(struct strbuf *s, size_t *sz);
+
+/* Set length of the slace to `l`, but don't reallocated. */
+void strbuf_setlen(struct strbuf *s, size_t l);
+
+/* Ensure `l` bytes beyond current length are available */
+void strbuf_grow(struct strbuf *s, size_t l);
+
+/* Signed comparison */
+int strbuf_cmp(const struct strbuf *a, const struct strbuf *b);
+
+/* Append `data` to the `dest` strbuf.  */
+int strbuf_add(struct strbuf *dest, const void *data, size_t sz);
+
+/* Append `add` to `dest. */
+void strbuf_addbuf(struct strbuf *dest, struct strbuf *add);
+
+#else
+
+#include "../git-compat-util.h"
+#include "../strbuf.h"
+
+#endif
+
+extern struct strbuf reftable_empty_strbuf;
+
+/* Like strbuf_add, but suitable for passing to reftable_new_writer
+ */
+int strbuf_add_void(void *b, const void *data, size_t sz);
+
+/* Find the longest shared prefix size of `a` and `b` */
+int common_prefix_size(struct strbuf *a, struct strbuf *b);
+
+#endif
diff --git a/reftable/strbuf_test.c b/reftable/strbuf_test.c
new file mode 100644
index 0000000000..39f561c81a
--- /dev/null
+++ b/reftable/strbuf_test.c
@@ -0,0 +1,37 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+*/
+
+#include "strbuf.h"
+
+#include "system.h"
+
+#include "basics.h"
+#include "test_framework.h"
+#include "reftable-tests.h"
+
+static void test_strbuf(void)
+{
+	struct strbuf s = STRBUF_INIT;
+	struct strbuf t = STRBUF_INIT;
+
+	strbuf_addstr(&s, "abc");
+	assert(0 == strcmp("abc", s.buf));
+
+	strbuf_addstr(&t, "pqr");
+	strbuf_addbuf(&s, &t);
+	assert(0 == strcmp("abcpqr", s.buf));
+
+	strbuf_release(&s);
+	strbuf_release(&t);
+}
+
+int strbuf_test_main(int argc, const char *argv[])
+{
+	add_test_case("test_strbuf", &test_strbuf);
+	return test_main(argc, argv);
+}
diff --git a/reftable/system.h b/reftable/system.h
new file mode 100644
index 0000000000..567eb8a87d
--- /dev/null
+++ b/reftable/system.h
@@ -0,0 +1,51 @@ 
+/*
+Copyright 2020 Google LLC
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file or at
+https://developers.google.com/open-source/licenses/bsd
+*/
+
+#ifndef SYSTEM_H
+#define SYSTEM_H
+
+#ifndef REFTABLE_STANDALONE
+
+#include "git-compat-util.h"
+#include "cache.h"
+#include <zlib.h>
+
+#else
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <zlib.h>
+
+#include "compat.h"
+
+#endif /* REFTABLE_STANDALONE */
+
+void reftable_clear_dir(const char *dirname);
+
+#define SHA1_ID 0x73686131
+#define SHA256_ID 0x73323536
+#define SHA1_SIZE 20
+#define SHA256_SIZE 32
+
+/* This is uncompress2, which is only available in zlib as of 2017.
+ */
+int uncompress_return_consumed(Bytef *dest, uLongf *destLen,
+			       const Bytef *source, uLong *sourceLen);
+int hash_size(uint32_t id);
+
+#endif
diff --git a/t/helper/test-reftable.c b/t/helper/test-reftable.c
new file mode 100644
index 0000000000..7d50aa6bcc
--- /dev/null
+++ b/t/helper/test-reftable.c
@@ -0,0 +1,8 @@ 
+#include "reftable/reftable-tests.h"
+#include "test-tool.h"
+
+int cmd__reftable(int argc, const char **argv)
+{
+	strbuf_test_main(argc, argv);
+	return 0;
+}
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index a0d3966b29..c7ab5bc4a6 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -53,6 +53,7 @@  static struct test_cmd cmds[] = {
 	{ "read-graph", cmd__read_graph },
 	{ "read-midx", cmd__read_midx },
 	{ "ref-store", cmd__ref_store },
+	{ "reftable", cmd__reftable },
 	{ "regex", cmd__regex },
 	{ "repository", cmd__repository },
 	{ "revision-walking", cmd__revision_walking },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index 07034d3f38..fa2b11ace6 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -42,6 +42,7 @@  int cmd__read_cache(int argc, const char **argv);
 int cmd__read_graph(int argc, const char **argv);
 int cmd__read_midx(int argc, const char **argv);
 int cmd__ref_store(int argc, const char **argv);
+int cmd__reftable(int argc, const char **argv);
 int cmd__regex(int argc, const char **argv);
 int cmd__repository(int argc, const char **argv);
 int cmd__revision_walking(int argc, const char **argv);