diff mbox series

[v2,06/16] refs/files: batch refname availability checks for normal transactions

Message ID 20250219-pks-update-ref-optimization-v2-6-e696e7220b22@pks.im (mailing list archive)
State New
Headers show
Series refs: batch refname availability checks | expand

Commit Message

Patrick Steinhardt Feb. 19, 2025, 1:23 p.m. UTC
Same as the "reftable" backend that we have adapted in the preceding
commit to use batched refname availability checks we can also do so for
the "files" backend. Things are a bit more intricate here though, as we
call `refs_verify_refname_available()` in a set of different contexts:

  1. `lock_raw_ref()` when it hits either EEXISTS or EISDIR when creating
     a new reference, mostly to create a nice, user-readable error
     message. This is nothing we have to care about too much, as we only
     hit this code path at most once when we hit a conflict.

  2. `lock_raw_ref()` when it _could_ create the lockfile to check
     whether it is conflicting with any packed refs. In the general case,
     this code path will be hit once for every (successful) reference
     update.

  3. `lock_ref_oid_basic()`, but it is only executed when copying or
     renaming references or when expiring reflogs. It will thus not be
     called in contexts where we have many references queued up.

  4. `refs_refname_ref_available()`, but again only when copying or
     renaming references. It is thus not interesting due to the same
     reason as the previous case.

  5. `files_transaction_finish_initial()`, which is only executed when
     creating a new repository or migrating references.

So out of these, only (2) and (5) are viable candidates to use the
batched checks.

Adapt `lock_raw_ref()` accordingly by queueing up reference names that
need to be checked for availability and then checking them after we have
processed all updates. This check is done before we (optionally) lock
the `packed-refs` file, which is somewhat flawed because it means that
the `packed-refs` could still change after the availability check and
thus create an undetected conflict. But unconditionally locking the file
would change semantics that users are likely to rely on, so we keep the
current locking sequence intact, even if it's suboptmial.

The refactoring of `files_transaction_finish_initial()` will be done in
the next commit.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/files-backend.c | 42 +++++++++++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 11 deletions(-)
diff mbox series

Patch

diff --git a/refs/files-backend.c b/refs/files-backend.c
index 29f08dced40..6ce79cf0791 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -678,6 +678,7 @@  static void unlock_ref(struct ref_lock *lock)
  */
 static int lock_raw_ref(struct files_ref_store *refs,
 			const char *refname, int mustexist,
+			struct string_list *refnames_to_check,
 			const struct string_list *extras,
 			struct ref_lock **lock_p,
 			struct strbuf *referent,
@@ -855,16 +856,11 @@  static int lock_raw_ref(struct files_ref_store *refs,
 		}
 
 		/*
-		 * If the ref did not exist and we are creating it,
-		 * make sure there is no existing packed ref that
-		 * conflicts with refname:
+		 * If the ref did not exist and we are creating it, we have to
+		 * make sure there is no existing packed ref that conflicts
+		 * with refname. This check is deferred so that we can batch it.
 		 */
-		if (refs_verify_refname_available(
-				    refs->packed_ref_store, refname,
-				    extras, NULL, 0, err)) {
-			ret = TRANSACTION_NAME_CONFLICT;
-			goto error_return;
-		}
+		string_list_insert(refnames_to_check, refname);
 	}
 
 	ret = 0;
@@ -2569,6 +2565,7 @@  static int lock_ref_for_update(struct files_ref_store *refs,
 			       struct ref_update *update,
 			       struct ref_transaction *transaction,
 			       const char *head_ref,
+			       struct string_list *refnames_to_check,
 			       struct string_list *affected_refnames,
 			       struct strbuf *err)
 {
@@ -2597,7 +2594,7 @@  static int lock_ref_for_update(struct files_ref_store *refs,
 		lock->count++;
 	} else {
 		ret = lock_raw_ref(refs, update->refname, mustexist,
-				   affected_refnames,
+				   refnames_to_check, affected_refnames,
 				   &lock, &referent,
 				   &update->type, err);
 		if (ret) {
@@ -2811,6 +2808,7 @@  static int files_transaction_prepare(struct ref_store *ref_store,
 	size_t i;
 	int ret = 0;
 	struct string_list affected_refnames = STRING_LIST_INIT_NODUP;
+	struct string_list refnames_to_check = STRING_LIST_INIT_NODUP;
 	char *head_ref = NULL;
 	int head_type;
 	struct files_transaction_backend_data *backend_data;
@@ -2898,7 +2896,8 @@  static int files_transaction_prepare(struct ref_store *ref_store,
 		struct ref_update *update = transaction->updates[i];
 
 		ret = lock_ref_for_update(refs, update, transaction,
-					  head_ref, &affected_refnames, err);
+					  head_ref, &refnames_to_check,
+					  &affected_refnames, err);
 		if (ret)
 			goto cleanup;
 
@@ -2930,6 +2929,26 @@  static int files_transaction_prepare(struct ref_store *ref_store,
 		}
 	}
 
+	/*
+	 * Verify that none of the loose reference that we're about to write
+	 * conflict with any existing packed references. Ideally, we'd do this
+	 * check after the packed-refs are locked so that the file cannot
+	 * change underneath our feet. But introducing such a lock now would
+	 * probably do more harm than good as users rely on there not being a
+	 * global lock with the "files" backend.
+	 *
+	 * Another alternative would be to do the check after the (optional)
+	 * lock, but that would extend the time we spend in the globally-locked
+	 * state.
+	 *
+	 * So instead, we accept the race for now.
+	 */
+	if (refs_verify_refnames_available(refs->packed_ref_store, &refnames_to_check,
+					   &affected_refnames, NULL, 0, err)) {
+		ret = TRANSACTION_NAME_CONFLICT;
+		goto cleanup;
+	}
+
 	if (packed_transaction) {
 		if (packed_refs_lock(refs->packed_ref_store, 0, err)) {
 			ret = TRANSACTION_GENERIC_ERROR;
@@ -2972,6 +2991,7 @@  static int files_transaction_prepare(struct ref_store *ref_store,
 cleanup:
 	free(head_ref);
 	string_list_clear(&affected_refnames, 0);
+	string_list_clear(&refnames_to_check, 0);
 
 	if (ret)
 		files_transaction_cleanup(refs, transaction);