mbox series

[v5,0/5] Reftable support git-core

Message ID pull.539.v5.git.1581344060.gitgitgadget@gmail.com (mailing list archive)
Headers show
Series Reftable support git-core | expand

Message

Linus Arver via GitGitGadget Feb. 10, 2020, 2:14 p.m. UTC
This adds the reftable library, and hooks it up as a ref backend.

At this point, I am mainly interested in feedback on the spots marked with
XXX in the Git source code.

v3

 * passes gitgitgadget CI.

Han-Wen Nienhuys (5):
  refs.h: clarify reflog iteration order
  create .git/refs in files-backend.c
  refs: document how ref_iterator_advance_fn should handle symrefs
  Add reftable library
  Reftable support for git-core

 .../technical/repository-version.txt          |    7 +
 Makefile                                      |   24 +-
 builtin/clone.c                               |    4 +-
 builtin/init-db.c                             |   57 +-
 cache.h                                       |    4 +-
 refs.c                                        |   20 +-
 refs.h                                        |    8 +-
 refs/files-backend.c                          |    6 +
 refs/refs-internal.h                          |    6 +
 refs/reftable-backend.c                       |  880 +++++++++++++
 reftable/LICENSE                              |   31 +
 reftable/README.md                            |   19 +
 reftable/VERSION                              |    5 +
 reftable/basics.c                             |  196 +++
 reftable/basics.h                             |   37 +
 reftable/block.c                              |  401 ++++++
 reftable/block.h                              |   71 ++
 reftable/blocksource.h                        |   20 +
 reftable/bytes.c                              |    0
 reftable/config.h                             |    1 +
 reftable/constants.h                          |   27 +
 reftable/dump.c                               |   97 ++
 reftable/file.c                               |   97 ++
 reftable/iter.c                               |  229 ++++
 reftable/iter.h                               |   56 +
 reftable/merged.c                             |  286 +++++
 reftable/merged.h                             |   34 +
 reftable/pq.c                                 |  114 ++
 reftable/pq.h                                 |   34 +
 reftable/reader.c                             |  708 +++++++++++
 reftable/reader.h                             |   52 +
 reftable/record.c                             | 1107 +++++++++++++++++
 reftable/record.h                             |   79 ++
 reftable/reftable.h                           |  394 ++++++
 reftable/slice.c                              |  199 +++
 reftable/slice.h                              |   39 +
 reftable/stack.c                              |  983 +++++++++++++++
 reftable/stack.h                              |   40 +
 reftable/system.h                             |   58 +
 reftable/tree.c                               |   66 +
 reftable/tree.h                               |   24 +
 reftable/writer.c                             |  623 ++++++++++
 reftable/writer.h                             |   46 +
 reftable/zlib-compat.c                        |   92 ++
 repository.c                                  |    2 +
 repository.h                                  |    3 +
 setup.c                                       |   12 +-
 47 files changed, 7266 insertions(+), 32 deletions(-)
 create mode 100644 refs/reftable-backend.c
 create mode 100644 reftable/LICENSE
 create mode 100644 reftable/README.md
 create mode 100644 reftable/VERSION
 create mode 100644 reftable/basics.c
 create mode 100644 reftable/basics.h
 create mode 100644 reftable/block.c
 create mode 100644 reftable/block.h
 create mode 100644 reftable/blocksource.h
 create mode 100644 reftable/bytes.c
 create mode 100644 reftable/config.h
 create mode 100644 reftable/constants.h
 create mode 100644 reftable/dump.c
 create mode 100644 reftable/file.c
 create mode 100644 reftable/iter.c
 create mode 100644 reftable/iter.h
 create mode 100644 reftable/merged.c
 create mode 100644 reftable/merged.h
 create mode 100644 reftable/pq.c
 create mode 100644 reftable/pq.h
 create mode 100644 reftable/reader.c
 create mode 100644 reftable/reader.h
 create mode 100644 reftable/record.c
 create mode 100644 reftable/record.h
 create mode 100644 reftable/reftable.h
 create mode 100644 reftable/slice.c
 create mode 100644 reftable/slice.h
 create mode 100644 reftable/stack.c
 create mode 100644 reftable/stack.h
 create mode 100644 reftable/system.h
 create mode 100644 reftable/tree.c
 create mode 100644 reftable/tree.h
 create mode 100644 reftable/writer.c
 create mode 100644 reftable/writer.h
 create mode 100644 reftable/zlib-compat.c


base-commit: 5b0ca878e008e82f91300091e793427205ce3544
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-539%2Fhanwen%2Freftable-v5
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-539/hanwen/reftable-v5
Pull-Request: https://github.com/gitgitgadget/git/pull/539

Range-diff vs v4:

 1:  c00403c94d = 1:  c00403c94d refs.h: clarify reflog iteration order
 2:  4d6da9bc47 = 2:  4d6da9bc47 create .git/refs in files-backend.c
 3:  fbdcdccc88 = 3:  fbdcdccc88 refs: document how ref_iterator_advance_fn should handle symrefs
 4:  02d2ca8b87 ! 4:  546b82fe79 Add reftable library
     @@ -98,11 +98,11 @@
       --- /dev/null
       +++ b/reftable/VERSION
      @@
     -+commit e7c3fc3099d9999bc8d895f84027b0e36348d5e6
     ++commit 6115b50fdb9bc662be39b05f5589bc109282ae7f
      +Author: Han-Wen Nienhuys <hanwen@google.com>
     -+Date:   Thu Feb 6 20:17:40 2020 +0100
     ++Date:   Mon Feb 10 13:59:52 2020 +0100
      +
     -+    C: use inttypes.h header definitions
     ++    README: add a note about the Java implementation
      
       diff --git a/reftable/basics.c b/reftable/basics.c
       new file mode 100644
     @@ -3885,9 +3885,6 @@
      +
      +#include "system.h"
      +
     -+typedef uint8_t byte;
     -+typedef byte bool;
     -+
      +/* block_source is a generic wrapper for a seekable readable file.
      +   It is generally passed around by value.
      + */
     @@ -3900,7 +3897,7 @@
      +   so it can return itself into the pool.
      +*/
      +struct block {
     -+	byte *data;
     ++	uint8_t *data;
      +	int len;
      +	struct block_source source;
      +};
     @@ -3926,14 +3923,14 @@
      +
      +/* write_options sets options for writing a single reftable. */
      +struct write_options {
     -+	/* do not pad out blocks to block size. */
     -+	bool unpadded;
     ++	/* boolean: do not pad out blocks to block size. */
     ++	int unpadded;
      +
      +	/* the blocksize. Should be less than 2^24. */
      +	uint32_t block_size;
      +
     -+	/* do not generate a SHA1 => ref index. */
     -+	bool skip_index_objects;
     ++	/* boolean: do not generate a SHA1 => ref index. */
     ++	int skip_index_objects;
      +
      +	/* how often to write complete keys in each block. */
      +	int restart_interval;
     @@ -3944,13 +3941,13 @@
      +	char *ref_name; /* Name of the ref, malloced. */
      +	uint64_t update_index; /* Logical timestamp at which this value is
      +				  written */
     -+	byte *value; /* SHA1, or NULL. malloced. */
     -+	byte *target_value; /* peeled annotated tag, or NULL. malloced. */
     ++	uint8_t *value; /* SHA1, or NULL. malloced. */
     ++	uint8_t *target_value; /* peeled annotated tag, or NULL. malloced. */
      +	char *target; /* symref, or NULL. malloced. */
      +};
      +
      +/* returns whether 'ref' represents a deletion */
     -+bool ref_record_is_deletion(const struct ref_record *ref);
     ++int ref_record_is_deletion(const struct ref_record *ref);
      +
      +/* prints a ref_record onto stdout */
      +void ref_record_print(struct ref_record *ref, int hash_size);
     @@ -3959,15 +3956,14 @@
      +void ref_record_clear(struct ref_record *ref);
      +
      +/* returns whether two ref_records are the same */
     -+bool ref_record_equal(struct ref_record *a, struct ref_record *b,
     -+		      int hash_size);
     ++int ref_record_equal(struct ref_record *a, struct ref_record *b, int hash_size);
      +
      +/* log_record holds a reflog entry */
      +struct log_record {
      +	char *ref_name;
      +	uint64_t update_index;
     -+	byte *new_hash;
     -+	byte *old_hash;
     ++	uint8_t *new_hash;
     ++	uint8_t *old_hash;
      +	char *name;
      +	char *email;
      +	uint64_t time;
     @@ -3976,14 +3972,13 @@
      +};
      +
      +/* returns whether 'ref' represents the deletion of a log record. */
     -+bool log_record_is_deletion(const struct log_record *log);
     ++int log_record_is_deletion(const struct log_record *log);
      +
      +/* frees and nulls all pointer values. */
      +void log_record_clear(struct log_record *log);
      +
      +/* returns whether two records are equal. */
     -+bool log_record_equal(struct log_record *a, struct log_record *b,
     -+		      int hash_size);
     ++int log_record_equal(struct log_record *a, struct log_record *b, int hash_size);
      +
      +void log_record_print(struct log_record *log, int hash_size);
      +
     @@ -4076,11 +4071,11 @@
      +const char *error_str(int err);
      +
      +/* new_writer creates a new writer */
     -+struct writer *new_writer(int (*writer_func)(void *, byte *, int),
     ++struct writer *new_writer(int (*writer_func)(void *, uint8_t *, int),
      +			  void *writer_arg, struct write_options *opts);
      +
      +/* write to a file descriptor. fdp should be an int* pointing to the fd. */
     -+int fd_writer(void *fdp, byte *data, int size);
     ++int fd_writer(void *fdp, uint8_t *data, int size);
      +
      +/* Set the range of update indices for the records we will add.  When
      +   writing a table into a stack, the min should be at least
     @@ -4163,7 +4158,7 @@
      +void reader_free(struct reader *);
      +
      +/* return an iterator for the refs pointing to oid */
     -+int reader_refs_for(struct reader *r, struct iterator *it, byte *oid,
     ++int reader_refs_for(struct reader *r, struct iterator *it, uint8_t *oid,
      +		    int oid_len);
      +
      +/* return the max_update_index for a table */
     @@ -5613,6 +5608,9 @@
      +	}
      +#endif
      +
     ++typedef uint8_t byte;
     ++typedef int bool;
     ++
      +int uncompress_return_consumed(Bytef *dest, uLongf *destLen,
      +			       const Bytef *source, uLong *sourceLen);
      +
 5:  2786a6bf61 ! 5:  702fb89871 Reftable support for git-core
     @@ -41,6 +41,21 @@
          Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
          Co-authored-by: Jeff King <peff@peff.net>
      
     + diff --git a/Documentation/technical/repository-version.txt b/Documentation/technical/repository-version.txt
     + --- a/Documentation/technical/repository-version.txt
     + +++ b/Documentation/technical/repository-version.txt
     +@@
     + multiple working directory mode, "config" file is shared while
     + "config.worktree" is per-working directory (i.e., it's in
     + GIT_COMMON_DIR/worktrees/<id>/config.worktree)
     ++
     ++==== `refStorage`
     ++
     ++Specifies the file format for the ref database. Values are `files`
     ++(for the traditional packed + loose ref format) and `reftable` for the
     ++binary reftable format. See https://github.com/google/reftable for
     ++more information.
     +
       diff --git a/Makefile b/Makefile
       --- a/Makefile
       +++ b/Makefile
     @@ -109,6 +124,21 @@ $^
       
       Documentation/GIT-EXCLUDED-PROGRAMS: FORCE
      
     + diff --git a/builtin/clone.c b/builtin/clone.c
     + --- a/builtin/clone.c
     + +++ b/builtin/clone.c
     +@@
     + 		}
     + 	}
     + 
     +-	init_db(git_dir, real_git_dir, option_template, INIT_DB_QUIET);
     ++	init_db(git_dir, real_git_dir, option_template,
     ++		DEFAULT_REF_STORAGE, /* XXX */
     ++		INIT_DB_QUIET);
     + 
     + 	if (real_git_dir)
     + 		git_dir = real_git_dir;
     +
       diff --git a/builtin/init-db.c b/builtin/init-db.c
       --- a/builtin/init-db.c
       +++ b/builtin/init-db.c
     @@ -117,7 +147,8 @@ $^
       
       static int create_default_files(const char *template_path,
      -				const char *original_git_dir)
     -+				const char *original_git_dir, int flags)
     ++				const char *original_git_dir,
     ++				const char *ref_storage_format, int flags)
       {
       	struct stat st1;
       	struct strbuf buf = STRBUF_INIT;
     @@ -125,8 +156,7 @@ $^
       	is_bare_repository_cfg = init_is_bare_repository;
       	if (init_shared_repository != -1)
       		set_shared_repository(init_shared_repository);
     -+	if (flags & INIT_DB_REFTABLE)
     -+		the_repository->ref_storage_format = xstrdup("reftable");
     ++	the_repository->ref_storage_format = xstrdup(ref_storage_format);
       
       	/*
       	 * We would have created the above under user's umask -- under
     @@ -167,17 +197,29 @@ $^
      -	xsnprintf(repo_version_string, sizeof(repo_version_string),
      -		  "%d", GIT_REPO_VERSION);
      +	xsnprintf(repo_version_string, sizeof(repo_version_string), "%d",
     -+		  flags & INIT_DB_REFTABLE ? GIT_REPO_VERSION_READ :
     -+					     GIT_REPO_VERSION);
     ++		  !strcmp(ref_storage_format, "reftable") ?
     ++			  GIT_REPO_VERSION_READ :
     ++			  GIT_REPO_VERSION);
       	git_config_set("core.repositoryformatversion", repo_version_string);
       
       	/* Check filemode trustability */
     +@@
     + }
     + 
     + int init_db(const char *git_dir, const char *real_git_dir,
     +-	    const char *template_dir, unsigned int flags)
     ++	    const char *template_dir, const char *ref_storage_format,
     ++	    unsigned int flags)
     + {
     + 	int reinit;
     + 	int exist_ok = flags & INIT_DB_EXIST_OK;
      @@
       	 */
       	check_repository_format();
       
      -	reinit = create_default_files(template_dir, original_git_dir);
     -+	reinit = create_default_files(template_dir, original_git_dir, flags);
     ++	reinit = create_default_files(template_dir, original_git_dir,
     ++				      ref_storage_format, flags);
       
       	create_object_directory();
       
     @@ -185,14 +227,18 @@ $^
       		git_config_set("receive.denyNonFastforwards", "true");
       	}
       
     -+	if (flags & INIT_DB_REFTABLE) {
     -+		git_config_set("extensions.refStorage", "reftable");
     -+	}
     ++	git_config_set("extensions.refStorage", ref_storage_format);
      +
       	if (!(flags & INIT_DB_QUIET)) {
       		int len = strlen(git_dir);
       
      @@
     + int cmd_init_db(int argc, const char **argv, const char *prefix)
     + {
     + 	const char *git_dir;
     ++	const char *ref_storage_format = DEFAULT_REF_STORAGE;
     + 	const char *real_git_dir = NULL;
     + 	const char *work_tree;
       	const char *template_dir = NULL;
       	unsigned int flags = 0;
       	const struct option init_db_options[] = {
     @@ -212,23 +258,38 @@ $^
      +		  N_("specify that the git repository is to be shared amongst several users"),
      +		  PARSE_OPT_OPTARG | PARSE_OPT_NONEG, shared_callback, 0 },
       		OPT_BIT('q', "quiet", &flags, N_("be quiet"), INIT_DB_QUIET),
     -+		OPT_BIT(0, "reftable", &flags, N_("use reftable"),
     -+			INIT_DB_REFTABLE),
     ++		OPT_STRING(0, "ref-storage", &ref_storage_format, N_("backend"),
     ++			   N_("the ref storage format to use")),
       		OPT_STRING(0, "separate-git-dir", &real_git_dir, N_("gitdir"),
       			   N_("separate git dir from working tree")),
       		OPT_END()
     +@@
     + 	}
     + 
     + 	UNLEAK(real_git_dir);
     ++	UNLEAK(ref_storage_format);
     + 	UNLEAK(git_dir);
     + 	UNLEAK(work_tree);
     + 
     + 	flags |= INIT_DB_EXIST_OK;
     +-	return init_db(git_dir, real_git_dir, template_dir, flags);
     ++	return init_db(git_dir, real_git_dir, template_dir, ref_storage_format,
     ++		       flags);
     + }
      
       diff --git a/cache.h b/cache.h
       --- a/cache.h
       +++ b/cache.h
      @@
     - 
     - #define INIT_DB_QUIET 0x0001
       #define INIT_DB_EXIST_OK 0x0002
     -+#define INIT_DB_REFTABLE 0x0004
       
       int init_db(const char *git_dir, const char *real_git_dir,
     - 	    const char *template_dir, unsigned int flags);
     +-	    const char *template_dir, unsigned int flags);
     ++	    const char *template_dir, const char *ref_storage_format,
     ++	    unsigned int flags);
     + 
     + void sanitize_stdfds(void);
     + int daemonize(void);
      @@
       	int is_bare;
       	int hash_algo;
     @@ -274,7 +335,7 @@ $^
      -	r->refs = ref_store_init(r->gitdir, REF_STORE_ALL_CAPS);
      +	r->refs = ref_store_init(r->gitdir,
      +				 r->ref_storage_format ? r->ref_storage_format :
     -+							 "files",
     ++							 DEFAULT_REF_STORAGE,
      +				 REF_STORE_ALL_CAPS);
       	return r->refs;
       }
     @@ -284,7 +345,7 @@ $^
       
       	/* assume that add_submodule_odb() has been called */
      -	refs = ref_store_init(submodule_sb.buf,
     -+	refs = ref_store_init(submodule_sb.buf, "files", /* XXX */
     ++	refs = ref_store_init(submodule_sb.buf, DEFAULT_REF_STORAGE, /* XXX */
       			      REF_STORE_READ | REF_STORE_ODB);
       	register_ref_store_map(&submodule_ref_stores, "submodule",
       			       refs, submodule);
     @@ -292,7 +353,7 @@ $^
       
       struct ref_store *get_worktree_ref_store(const struct worktree *wt)
       {
     -+	const char *format = "files"; /* XXX */
     ++	const char *format = DEFAULT_REF_STORAGE; /* XXX */
       	struct ref_store *refs;
       	const char *id;
       
     @@ -309,6 +370,20 @@ $^
       
       	if (refs)
      
     + diff --git a/refs.h b/refs.h
     + --- a/refs.h
     + +++ b/refs.h
     +@@
     + struct string_list_item;
     + struct worktree;
     + 
     ++/* XXX where should this be? */
     ++#define DEFAULT_REF_STORAGE "files"
     ++
     + /*
     +  * Resolve a reference, recursively following symbolic refererences.
     +  *
     +
       diff --git a/refs/refs-internal.h b/refs/refs-internal.h
       --- a/refs/refs-internal.h
       +++ b/refs/refs-internal.h