diff mbox series

[v2,3/6] bundle-uri: create basic file-copy logic

Message ID abec47564fd9cea5ef9d8f112e90368681a4e066.1656535245.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series bundle URIs: design doc and initial git fetch --bundle-uri implementation | expand

Commit Message

Derrick Stolee June 29, 2022, 8:40 p.m. UTC
From: Derrick Stolee <derrickstolee@github.com>

Before implementing a way to fetch bundles into a repository, create the
basic logic. Assume that the URI is actually a file path. Future logic
will make this more careful to other protocols.

For now, we also only succeed if the content at the URI is a bundle
file, not a bundle list. Bundle lists will be implemented in a future
change.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
---
 Makefile     |  1 +
 bundle-uri.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 bundle-uri.h | 14 ++++++++
 3 files changed, 108 insertions(+)
 create mode 100644 bundle-uri.c
 create mode 100644 bundle-uri.h

Comments

Josh Steadmon July 21, 2022, 9:45 p.m. UTC | #1
On 2022.06.29 20:40, Derrick Stolee via GitGitGadget wrote:
> From: Derrick Stolee <derrickstolee@github.com>
> 
> Before implementing a way to fetch bundles into a repository, create the
> basic logic. Assume that the URI is actually a file path. Future logic
> will make this more careful to other protocols.
> 
> For now, we also only succeed if the content at the URI is a bundle
> file, not a bundle list. Bundle lists will be implemented in a future
> change.
> 
> Signed-off-by: Derrick Stolee <derrickstolee@github.com>
> ---
>  Makefile     |  1 +
>  bundle-uri.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  bundle-uri.h | 14 ++++++++
>  3 files changed, 108 insertions(+)
>  create mode 100644 bundle-uri.c
>  create mode 100644 bundle-uri.h
> 
> diff --git a/Makefile b/Makefile
> index f8bccfab5e9..8f27310836d 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -887,6 +887,7 @@ LIB_OBJS += blob.o
>  LIB_OBJS += bloom.o
>  LIB_OBJS += branch.o
>  LIB_OBJS += bulk-checkin.o
> +LIB_OBJS += bundle-uri.o
>  LIB_OBJS += bundle.o
>  LIB_OBJS += cache-tree.o
>  LIB_OBJS += cbtree.o
> diff --git a/bundle-uri.c b/bundle-uri.c
> new file mode 100644
> index 00000000000..f0abbf434c8
> --- /dev/null
> +++ b/bundle-uri.c
> @@ -0,0 +1,93 @@
> +#include "cache.h"
> +#include "bundle-uri.h"
> +#include "bundle.h"
> +#include "object-store.h"
> +#include "refs.h"
> +#include "run-command.h"
> +
> +static void find_temp_filename(struct strbuf *name)
> +{
> +	int fd;
> +	/*
> +	 * Find a temporary filename that is available. This is briefly
> +	 * racy, but unlikely to collide.
> +	 */
> +	fd = odb_mkstemp(name, "bundles/tmp_uri_XXXXXX");
> +	if (fd < 0)
> +		die(_("failed to create temporary file"));
> +	close(fd);
> +	unlink(name->buf);

Is there a reason why we unlink() here? If we allow the empty file to
remain on-disk until we write to it, wouldn't that prevent odb_mkstemp()
from being racy?


> +}
> +
> +static int copy_uri_to_file(const char *uri, const char *file)

Nitpick: from a brief glance, it seems that most other copy* functions
take the destination as the first parameter, and the source second. I
don't feel strongly about it, because to me src followed by dst feels
more natural, but perhaps we should be consistent with other functions.


> +{
> +	/* Copy as a file */
> +	return copy_file(file, uri, 0444);
> +}
> +
> +static int unbundle_from_file(struct repository *r, const char *file)
> +{
> +	int result = 0;
> +	int bundle_fd;
> +	struct bundle_header header = BUNDLE_HEADER_INIT;
> +	struct strvec extra_index_pack_args = STRVEC_INIT;
> +	struct string_list_item *refname;
> +	struct strbuf bundle_ref = STRBUF_INIT;
> +	size_t bundle_prefix_len;
> +
> +	if ((bundle_fd = read_bundle_header(file, &header)) < 0)
> +		return 1;
> +
> +	if ((result = unbundle(r, &header, bundle_fd, &extra_index_pack_args)))

Can we just pass NULL here instead of creating an empty
extra_index_pack_args?


> +		return 1;
> +
> +	/*
> +	 * Convert all refs/heads/ from the bundle into refs/bundles/
> +	 * in the local repository.
> +	 */
> +	strbuf_addstr(&bundle_ref, "refs/bundles/");
> +	bundle_prefix_len = bundle_ref.len;
> +
> +	for_each_string_list_item(refname, &header.references) {
> +		struct object_id *oid = refname->util;
> +		struct object_id old_oid;
> +		const char *branch_name;
> +		int has_old;
> +
> +		if (!skip_prefix(refname->string, "refs/heads/", &branch_name))
> +			continue;
> +
> +		strbuf_setlen(&bundle_ref, bundle_prefix_len);
> +		strbuf_addstr(&bundle_ref, branch_name);
> +
> +		has_old = !read_ref(bundle_ref.buf, &old_oid);
> +		update_ref("fetched bundle", bundle_ref.buf, oid,
> +			   has_old ? &old_oid : NULL,
> +			   REF_SKIP_OID_VERIFICATION,
> +			   UPDATE_REFS_MSG_ON_ERR);
> +	}
> +
> +	bundle_header_release(&header);

We also need to release bundle_ref (and extra_index_pack_args if we
decide to keep it).


> +	return result;
> +}
> +
> +int fetch_bundle_uri(struct repository *r, const char *uri)
> +{
> +	int result = 0;
> +	struct strbuf filename = STRBUF_INIT;
> +
> +	find_temp_filename(&filename);
> +	if ((result = copy_uri_to_file(uri, filename.buf)))
> +		goto cleanup;
> +
> +	if ((result = !is_bundle(filename.buf, 0)))
> +		goto cleanup;
> +
> +	if ((result = unbundle_from_file(r, filename.buf)))
> +		goto cleanup;
> +
> +cleanup:
> +	unlink(filename.buf);
> +	strbuf_release(&filename);
> +	return result;
> +}
> diff --git a/bundle-uri.h b/bundle-uri.h
> new file mode 100644
> index 00000000000..8a152f1ef14
> --- /dev/null
> +++ b/bundle-uri.h
> @@ -0,0 +1,14 @@
> +#ifndef BUNDLE_URI_H
> +#define BUNDLE_URI_H
> +
> +struct repository;
> +
> +/**
> + * Fetch data from the given 'uri' and unbundle the bundle data found
> + * based on that information.
> + *
> + * Returns non-zero if no bundle information is found at the given 'uri'.
> + */
> +int fetch_bundle_uri(struct repository *r, const char *uri);
> +
> +#endif
> -- 
> gitgitgadget
>
Derrick Stolee July 22, 2022, 1:18 p.m. UTC | #2
On 7/21/2022 5:45 PM, Josh Steadmon wrote:
> On 2022.06.29 20:40, Derrick Stolee via GitGitGadget wrote:
>> From: Derrick Stolee <derrickstolee@github.com>
>> +static void find_temp_filename(struct strbuf *name)
>> +{
>> +	int fd;
>> +	/*
>> +	 * Find a temporary filename that is available. This is briefly
>> +	 * racy, but unlikely to collide.
>> +	 */
>> +	fd = odb_mkstemp(name, "bundles/tmp_uri_XXXXXX");
>> +	if (fd < 0)
>> +		die(_("failed to create temporary file"));
>> +	close(fd);
>> +	unlink(name->buf);
> 
> Is there a reason why we unlink() here? If we allow the empty file to
> remain on-disk until we write to it, wouldn't that prevent odb_mkstemp()
> from being racy?

I still need to test this, but that should work. Thanks!

>> +static int copy_uri_to_file(const char *uri, const char *file)
> 
> Nitpick: from a brief glance, it seems that most other copy* functions
> take the destination as the first parameter, and the source second. I
> don't feel strongly about it, because to me src followed by dst feels
> more natural, but perhaps we should be consistent with other functions.

Yeah, this is definitely my personal option that (src, dst) feels more
natural to me and I need to do a mental swap whenever dealing with the
standard methods. However, it's best to be consistent, and...
 
> 
>> +{
>> +	/* Copy as a file */
>> +	return copy_file(file, uri, 0444);

...we have exactly that standard usage right here.

>> +	if ((result = unbundle(r, &header, bundle_fd, &extra_index_pack_args)))
> 
> Can we just pass NULL here instead of creating an empty
> extra_index_pack_args?

This isn't the first time I've populated an option instead of just
passing NULL. I'll work on fixing that bad habit.

Thanks,
-Stolee
diff mbox series

Patch

diff --git a/Makefile b/Makefile
index f8bccfab5e9..8f27310836d 100644
--- a/Makefile
+++ b/Makefile
@@ -887,6 +887,7 @@  LIB_OBJS += blob.o
 LIB_OBJS += bloom.o
 LIB_OBJS += branch.o
 LIB_OBJS += bulk-checkin.o
+LIB_OBJS += bundle-uri.o
 LIB_OBJS += bundle.o
 LIB_OBJS += cache-tree.o
 LIB_OBJS += cbtree.o
diff --git a/bundle-uri.c b/bundle-uri.c
new file mode 100644
index 00000000000..f0abbf434c8
--- /dev/null
+++ b/bundle-uri.c
@@ -0,0 +1,93 @@ 
+#include "cache.h"
+#include "bundle-uri.h"
+#include "bundle.h"
+#include "object-store.h"
+#include "refs.h"
+#include "run-command.h"
+
+static void find_temp_filename(struct strbuf *name)
+{
+	int fd;
+	/*
+	 * Find a temporary filename that is available. This is briefly
+	 * racy, but unlikely to collide.
+	 */
+	fd = odb_mkstemp(name, "bundles/tmp_uri_XXXXXX");
+	if (fd < 0)
+		die(_("failed to create temporary file"));
+	close(fd);
+	unlink(name->buf);
+}
+
+static int copy_uri_to_file(const char *uri, const char *file)
+{
+	/* Copy as a file */
+	return copy_file(file, uri, 0444);
+}
+
+static int unbundle_from_file(struct repository *r, const char *file)
+{
+	int result = 0;
+	int bundle_fd;
+	struct bundle_header header = BUNDLE_HEADER_INIT;
+	struct strvec extra_index_pack_args = STRVEC_INIT;
+	struct string_list_item *refname;
+	struct strbuf bundle_ref = STRBUF_INIT;
+	size_t bundle_prefix_len;
+
+	if ((bundle_fd = read_bundle_header(file, &header)) < 0)
+		return 1;
+
+	if ((result = unbundle(r, &header, bundle_fd, &extra_index_pack_args)))
+		return 1;
+
+	/*
+	 * Convert all refs/heads/ from the bundle into refs/bundles/
+	 * in the local repository.
+	 */
+	strbuf_addstr(&bundle_ref, "refs/bundles/");
+	bundle_prefix_len = bundle_ref.len;
+
+	for_each_string_list_item(refname, &header.references) {
+		struct object_id *oid = refname->util;
+		struct object_id old_oid;
+		const char *branch_name;
+		int has_old;
+
+		if (!skip_prefix(refname->string, "refs/heads/", &branch_name))
+			continue;
+
+		strbuf_setlen(&bundle_ref, bundle_prefix_len);
+		strbuf_addstr(&bundle_ref, branch_name);
+
+		has_old = !read_ref(bundle_ref.buf, &old_oid);
+		update_ref("fetched bundle", bundle_ref.buf, oid,
+			   has_old ? &old_oid : NULL,
+			   REF_SKIP_OID_VERIFICATION,
+			   UPDATE_REFS_MSG_ON_ERR);
+	}
+
+	bundle_header_release(&header);
+	return result;
+}
+
+int fetch_bundle_uri(struct repository *r, const char *uri)
+{
+	int result = 0;
+	struct strbuf filename = STRBUF_INIT;
+
+	find_temp_filename(&filename);
+	if ((result = copy_uri_to_file(uri, filename.buf)))
+		goto cleanup;
+
+	if ((result = !is_bundle(filename.buf, 0)))
+		goto cleanup;
+
+	if ((result = unbundle_from_file(r, filename.buf)))
+		goto cleanup;
+
+cleanup:
+	unlink(filename.buf);
+	strbuf_release(&filename);
+	return result;
+}
diff --git a/bundle-uri.h b/bundle-uri.h
new file mode 100644
index 00000000000..8a152f1ef14
--- /dev/null
+++ b/bundle-uri.h
@@ -0,0 +1,14 @@ 
+#ifndef BUNDLE_URI_H
+#define BUNDLE_URI_H
+
+struct repository;
+
+/**
+ * Fetch data from the given 'uri' and unbundle the bundle data found
+ * based on that information.
+ *
+ * Returns non-zero if no bundle information is found at the given 'uri'.
+ */
+int fetch_bundle_uri(struct repository *r, const char *uri);
+
+#endif