diff mbox series

[v2,08/15] midx: allow marking a pack as preferred

Message ID 223b89909416ec7c5505f9cedaa80bf86ecc7b2e.1614193703.git.me@ttaylorr.com (mailing list archive)
State Superseded
Headers show
Series midx: implement a multi-pack reverse index | expand

Commit Message

Taylor Blau Feb. 24, 2021, 7:09 p.m. UTC
When multiple packs in the multi-pack index contain the same object, the
MIDX machinery must make a choice about which pack it associates with
that object. Prior to this patch, the lowest-ordered[1] pack was always
selected.

Pack selection for duplicate objects is relatively unimportant today,
but it will become important for multi-pack bitmaps. This is because we
can only invoke the pack-reuse mechanism when all of the bits for reused
objects come from the reuse pack (in order to ensure that all reused
deltas can find their base objects in the same pack).

To encourage the pack selection process to prefer one pack over another
(the pack to be preferred is the one a caller would like to later use as
a reuse pack), introduce the concept of a "preferred pack". When
provided, the MIDX code will always prefer an object found in a
preferred pack over any other.

No format changes are required to store the preferred pack, since it
will be able to be inferred with a corresponding MIDX bitmap, by looking
up the pack associated with the object in the first bit position (this
ordering is described in detail in a subsequent commit).

[1]: the ordering is specified by MIDX internals; for our purposes we
can consider the "lowest ordered" pack to be "the one with the
most-recent mtime.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 Documentation/git-multi-pack-index.txt       | 14 ++-
 Documentation/technical/multi-pack-index.txt |  5 +-
 builtin/multi-pack-index.c                   | 18 +++-
 builtin/repack.c                             |  2 +-
 midx.c                                       | 99 ++++++++++++++++++--
 midx.h                                       |  2 +-
 t/t5319-multi-pack-index.sh                  | 39 ++++++++
 7 files changed, 161 insertions(+), 18 deletions(-)

Comments

Jonathan Tan March 2, 2021, 4:17 a.m. UTC | #1
> @@ -589,12 +619,17 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
>  				nth_midxed_pack_midx_entry(m,
>  							   &entries_by_fanout[nr_fanout],
>  							   cur_object);
> +				if (nth_midxed_pack_int_id(m, cur_object) == preferred_pack)
> +					entries_by_fanout[nr_fanout].preferred = 1;
> +				else
> +					entries_by_fanout[nr_fanout].preferred = 0;
>  				nr_fanout++;
>  			}
>  		}
>  
>  		for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) {
>  			uint32_t start = 0, end;
> +			int preferred = cur_pack == preferred_pack;
>  
>  			if (cur_fanout)
>  				start = get_pack_fanout(info[cur_pack].p, cur_fanout - 1);
> @@ -602,7 +637,11 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
>  
>  			for (cur_object = start; cur_object < end; cur_object++) {
>  				ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
> -				fill_pack_entry(cur_pack, info[cur_pack].p, cur_object, &entries_by_fanout[nr_fanout]);
> +				fill_pack_entry(cur_pack,
> +						info[cur_pack].p,
> +						cur_object,
> +						&entries_by_fanout[nr_fanout],
> +						preferred);
>  				nr_fanout++;
>  			}
>  		}

I was initially confused that "preferred" was set twice, but this makes
sense - the first one is when an existing midx is reused, and the second
one is for objects in packs that the midx (if it exists) does not cover.

> @@ -828,7 +869,19 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
>  	if (ctx.m && ctx.nr == ctx.m->num_packs && !packs_to_drop)
>  		goto cleanup;
>  
> -	ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr);
> +	if (preferred_pack_name) {
> +		for (i = 0; i < ctx.nr; i++) {
> +			if (!cmp_idx_or_pack_name(preferred_pack_name,
> +						  ctx.info[i].pack_name)) {
> +				ctx.preferred_pack_idx = i;
> +				break;
> +			}
> +		}
> +	} else
> +		ctx.preferred_pack_idx = -1;

Looks safer to put "ctx.preferred_pack_idx = -1" before the "if", just
in case the given pack name does not exist?

> @@ -889,6 +942,31 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
>  			pack_name_concat_len += strlen(ctx.info[i].pack_name) + 1;
>  	}
>  
> +	/*
> +	 * Recompute the preferred_pack_idx (if applicable) according to the
> +	 * permuted pack order.
> +	 */
> +	ctx.preferred_pack_idx = -1;
> +	if (preferred_pack_name) {
> +		ctx.preferred_pack_idx = lookup_idx_or_pack_name(ctx.info,
> +							     ctx.nr,
> +							     preferred_pack_name);
> +		if (ctx.preferred_pack_idx < 0)
> +			warning(_("unknown preferred pack: '%s'"),
> +				preferred_pack_name);
> +		else {
> +			uint32_t orig = ctx.info[ctx.preferred_pack_idx].orig_pack_int_id;
> +			uint32_t perm = ctx.pack_perm[orig];
> +
> +			if (perm == PACK_EXPIRED) {
> +				warning(_("preferred pack '%s' is expired"),
> +					preferred_pack_name);
> +				ctx.preferred_pack_idx = -1;
> +			} else
> +				ctx.preferred_pack_idx = perm;
> +		}
> +	}

I couldn't figure out why the preferred pack index needs to be
recalculated here, since the pack entries would have already been
sorted. Also, the tests still pass when I comment this part out. A
comment describing what's going on would be helpful.

All previous patches look good to me.
Taylor Blau March 2, 2021, 7:09 p.m. UTC | #2
On Mon, Mar 01, 2021 at 08:17:53PM -0800, Jonathan Tan wrote:
> I was initially confused that "preferred" was set twice, but this makes
> sense - the first one is when an existing midx is reused, and the second
> one is for objects in packs that the midx (if it exists) does not cover.

Yep. Those two paths permeate a lot of the MIDX writer code, since it
wants to reuse work from an existing MIDX if it can find one.

> > @@ -828,7 +869,19 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
> >  	if (ctx.m && ctx.nr == ctx.m->num_packs && !packs_to_drop)
> >  		goto cleanup;
> >
> > -	ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr);
> > +	if (preferred_pack_name) {
> > +		for (i = 0; i < ctx.nr; i++) {
> > +			if (!cmp_idx_or_pack_name(preferred_pack_name,
> > +						  ctx.info[i].pack_name)) {
> > +				ctx.preferred_pack_idx = i;
> > +				break;
> > +			}
> > +		}
> > +	} else
> > +		ctx.preferred_pack_idx = -1;
>
> Looks safer to put "ctx.preferred_pack_idx = -1" before the "if", just
> in case the given pack name does not exist?

Agreed.

> > @@ -889,6 +942,31 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
> >  			pack_name_concat_len += strlen(ctx.info[i].pack_name) + 1;
> >  	}
> >
> > +	/*
> > +	 * Recompute the preferred_pack_idx (if applicable) according to the
> > +	 * permuted pack order.
> > +	 */
> > +	ctx.preferred_pack_idx = -1;
> > +	if (preferred_pack_name) {
> > +		ctx.preferred_pack_idx = lookup_idx_or_pack_name(ctx.info,
> > +							     ctx.nr,
> > +							     preferred_pack_name);
> > +		if (ctx.preferred_pack_idx < 0)
> > +			warning(_("unknown preferred pack: '%s'"),
> > +				preferred_pack_name);
> > +		else {
> > +			uint32_t orig = ctx.info[ctx.preferred_pack_idx].orig_pack_int_id;
> > +			uint32_t perm = ctx.pack_perm[orig];
> > +
> > +			if (perm == PACK_EXPIRED) {
> > +				warning(_("preferred pack '%s' is expired"),
> > +					preferred_pack_name);
> > +				ctx.preferred_pack_idx = -1;
> > +			} else
> > +				ctx.preferred_pack_idx = perm;
> > +		}
> > +	}
>
> I couldn't figure out why the preferred pack index needs to be
> recalculated here, since the pack entries would have already been
> sorted. Also, the tests still pass when I comment this part out. A
> comment describing what's going on would be helpful.

Funny you mention that; I was wondering the same thing myself the other
day when reading these patches again before deploying them to a couple
of testing repositories at GitHub.

It is totally unnecessary: since we have already marked objects from the
preferred pack in get_sorted_entries(), the rest of the code doesn't
care if the preferred pack was permuted or not.

But we *do* care if the pack which was preferred expired. The 'git
repack --geometric --write-midx' caller (which will appear in a later
series) should never do that, so emitting a warning() is worthwhile. I
think ultimately you want something like this squashed in:

--- >8 ---

diff --git a/midx.c b/midx.c
index d2c56c4bc6..46f55ff6cf 100644
--- a/midx.c
+++ b/midx.c
@@ -582,7 +582,7 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
 						  struct pack_info *info,
 						  uint32_t nr_packs,
 						  uint32_t *nr_objects,
-						  uint32_t preferred_pack)
+						  int preferred_pack)
 {
 	uint32_t cur_fanout, cur_pack, cur_object;
 	uint32_t alloc_fanout, alloc_objects, total_objects = 0;
@@ -869,6 +869,7 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
 	if (ctx.m && ctx.nr == ctx.m->num_packs && !packs_to_drop)
 		goto cleanup;

+	ctx.preferred_pack_idx = -1;
 	if (preferred_pack_name) {
 		for (i = 0; i < ctx.nr; i++) {
 			if (!cmp_idx_or_pack_name(preferred_pack_name,
@@ -877,8 +878,7 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
 				break;
 			}
 		}
-	} else
-		ctx.preferred_pack_idx = -1;
+	}

 	ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr,
 					 ctx.preferred_pack_idx);
@@ -942,28 +942,21 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
 			pack_name_concat_len += strlen(ctx.info[i].pack_name) + 1;
 	}

-	/*
-	 * Recompute the preferred_pack_idx (if applicable) according to the
-	 * permuted pack order.
-	 */
-	ctx.preferred_pack_idx = -1;
+	/* Check that the preferred pack wasn't expired (if given). */
 	if (preferred_pack_name) {
-		ctx.preferred_pack_idx = lookup_idx_or_pack_name(ctx.info,
-							     ctx.nr,
-							     preferred_pack_name);
-		if (ctx.preferred_pack_idx < 0)
+		int preferred_idx = lookup_idx_or_pack_name(ctx.info,
+							    ctx.nr,
+							    preferred_pack_name);
+		if (preferred_idx < 0)
 			warning(_("unknown preferred pack: '%s'"),
 				preferred_pack_name);
 		else {
-			uint32_t orig = ctx.info[ctx.preferred_pack_idx].orig_pack_int_id;
+			uint32_t orig = ctx.info[preferred_idx].orig_pack_int_id;
 			uint32_t perm = ctx.pack_perm[orig];

-			if (perm == PACK_EXPIRED) {
+			if (perm == PACK_EXPIRED)
 				warning(_("preferred pack '%s' is expired"),
 					preferred_pack_name);
-				ctx.preferred_pack_idx = -1;
-			} else
-				ctx.preferred_pack_idx = perm;
 		}
 	}
Jonathan Tan March 4, 2021, 2 a.m. UTC | #3
> Funny you mention that; I was wondering the same thing myself the other
> day when reading these patches again before deploying them to a couple
> of testing repositories at GitHub.
> 
> It is totally unnecessary: since we have already marked objects from the
> preferred pack in get_sorted_entries(), the rest of the code doesn't
> care if the preferred pack was permuted or not.
> 
> But we *do* care if the pack which was preferred expired. The 'git
> repack --geometric --write-midx' caller (which will appear in a later
> series) should never do that, so emitting a warning() is worthwhile.

Ah, this makes sense.

> I
> think ultimately you want something like this squashed in:
> 
> --- >8 ---
> 
> diff --git a/midx.c b/midx.c
> index d2c56c4bc6..46f55ff6cf 100644
> --- a/midx.c
> +++ b/midx.c
> @@ -582,7 +582,7 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
>  						  struct pack_info *info,
>  						  uint32_t nr_packs,
>  						  uint32_t *nr_objects,
> -						  uint32_t preferred_pack)
> +						  int preferred_pack)

Why this change?

>  {
>  	uint32_t cur_fanout, cur_pack, cur_object;
>  	uint32_t alloc_fanout, alloc_objects, total_objects = 0;
> @@ -869,6 +869,7 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
>  	if (ctx.m && ctx.nr == ctx.m->num_packs && !packs_to_drop)
>  		goto cleanup;
> 
> +	ctx.preferred_pack_idx = -1;
>  	if (preferred_pack_name) {
>  		for (i = 0; i < ctx.nr; i++) {
>  			if (!cmp_idx_or_pack_name(preferred_pack_name,
> @@ -877,8 +878,7 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
>  				break;
>  			}
>  		}
> -	} else
> -		ctx.preferred_pack_idx = -1;
> +	}
> 
>  	ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr,
>  					 ctx.preferred_pack_idx);
> @@ -942,28 +942,21 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
>  			pack_name_concat_len += strlen(ctx.info[i].pack_name) + 1;
>  	}
> 
> -	/*
> -	 * Recompute the preferred_pack_idx (if applicable) according to the
> -	 * permuted pack order.
> -	 */
> -	ctx.preferred_pack_idx = -1;
> +	/* Check that the preferred pack wasn't expired (if given). */
>  	if (preferred_pack_name) {
> -		ctx.preferred_pack_idx = lookup_idx_or_pack_name(ctx.info,
> -							     ctx.nr,
> -							     preferred_pack_name);
> -		if (ctx.preferred_pack_idx < 0)
> +		int preferred_idx = lookup_idx_or_pack_name(ctx.info,
> +							    ctx.nr,
> +							    preferred_pack_name);
> +		if (preferred_idx < 0)
>  			warning(_("unknown preferred pack: '%s'"),
>  				preferred_pack_name);
>  		else {
> -			uint32_t orig = ctx.info[ctx.preferred_pack_idx].orig_pack_int_id;
> +			uint32_t orig = ctx.info[preferred_idx].orig_pack_int_id;
>  			uint32_t perm = ctx.pack_perm[orig];
> 
> -			if (perm == PACK_EXPIRED) {
> +			if (perm == PACK_EXPIRED)
>  				warning(_("preferred pack '%s' is expired"),
>  					preferred_pack_name);
> -				ctx.preferred_pack_idx = -1;
> -			} else
> -				ctx.preferred_pack_idx = perm;
>  		}
>  	}

The rest makes sense.
>
Taylor Blau March 4, 2021, 3:04 a.m. UTC | #4
On Wed, Mar 03, 2021 at 06:00:17PM -0800, Jonathan Tan wrote:
> > I
> > think ultimately you want something like this squashed in:
> >
> > --- >8 ---
> >
> > diff --git a/midx.c b/midx.c
> > index d2c56c4bc6..46f55ff6cf 100644
> > --- a/midx.c
> > +++ b/midx.c
> > @@ -582,7 +582,7 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
> >  						  struct pack_info *info,
> >  						  uint32_t nr_packs,
> >  						  uint32_t *nr_objects,
> > -						  uint32_t preferred_pack)
> > +						  int preferred_pack)
>
> Why this change?

This was wrong in the original patch: ctx.preferred_pack is an integer,
and is set to -1 when no preferred pack was specified.

It's certainly unlikely that we'd have 2^31 packs, but silently
converting a signed type to an unsigned one is misleading.

> The rest makes sense.

Thanks for taking a look.

Taylor
diff mbox series

Patch

diff --git a/Documentation/git-multi-pack-index.txt b/Documentation/git-multi-pack-index.txt
index eb0caa0439..ffd601bc17 100644
--- a/Documentation/git-multi-pack-index.txt
+++ b/Documentation/git-multi-pack-index.txt
@@ -9,7 +9,8 @@  git-multi-pack-index - Write and verify multi-pack-indexes
 SYNOPSIS
 --------
 [verse]
-'git multi-pack-index' [--object-dir=<dir>] [--[no-]progress] <subcommand>
+'git multi-pack-index' [--object-dir=<dir>] [--[no-]progress]
+	[--preferred-pack=<pack>] <subcommand>
 
 DESCRIPTION
 -----------
@@ -30,7 +31,16 @@  OPTIONS
 The following subcommands are available:
 
 write::
-	Write a new MIDX file.
+	Write a new MIDX file. The following options are available for
+	the `write` sub-command:
++
+--
+	--preferred-pack=<pack>::
+		Optionally specify the tie-breaking pack used when
+		multiple packs contain the same object. If not given,
+		ties are broken in favor of the pack with the lowest
+		mtime.
+--
 
 verify::
 	Verify the contents of the MIDX file.
diff --git a/Documentation/technical/multi-pack-index.txt b/Documentation/technical/multi-pack-index.txt
index e8e377a59f..fb688976c4 100644
--- a/Documentation/technical/multi-pack-index.txt
+++ b/Documentation/technical/multi-pack-index.txt
@@ -43,8 +43,9 @@  Design Details
   a change in format.
 
 - The MIDX keeps only one record per object ID. If an object appears
-  in multiple packfiles, then the MIDX selects the copy in the most-
-  recently modified packfile.
+  in multiple packfiles, then the MIDX selects the copy in the
+  preferred packfile, otherwise selecting from the most-recently
+  modified packfile.
 
 - If there exist packfiles in the pack directory not registered in
   the MIDX, then those packfiles are loaded into the `packed_git`
diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c
index 5b05e5ce39..2329dc5ec0 100644
--- a/builtin/multi-pack-index.c
+++ b/builtin/multi-pack-index.c
@@ -4,10 +4,11 @@ 
 #include "parse-options.h"
 #include "midx.h"
 #include "trace2.h"
+#include "object-store.h"
 
 static char const * const builtin_multi_pack_index_write_usage[] = {
 #define BUILTIN_MIDX_WRITE_USAGE \
-	N_("git multi-pack-index [<options>] write")
+	N_("git multi-pack-index [<options>] write [--preferred-pack=<pack>]")
 	BUILTIN_MIDX_WRITE_USAGE,
 	NULL
 };
@@ -43,6 +44,7 @@  static char const * const builtin_multi_pack_index_usage[] = {
 
 static struct opts_multi_pack_index {
 	const char *object_dir;
+	const char *preferred_pack;
 	unsigned long batch_size;
 	unsigned flags;
 } opts;
@@ -63,7 +65,16 @@  static struct option *add_common_options(struct option *prev)
 
 static int cmd_multi_pack_index_write(int argc, const char **argv)
 {
-	struct option *options = common_opts;
+	struct option *options;
+	static struct option builtin_multi_pack_index_write_options[] = {
+		OPT_STRING(0, "preferred-pack", &opts.preferred_pack,
+			   N_("preferred-pack"),
+			   N_("pack for reuse when computing a multi-pack bitmap")),
+		OPT_END(),
+	};
+
+	options = parse_options_dup(builtin_multi_pack_index_write_options);
+	options = add_common_options(options);
 
 	trace2_cmd_mode(argv[0]);
 
@@ -74,7 +85,8 @@  static int cmd_multi_pack_index_write(int argc, const char **argv)
 		usage_with_options(builtin_multi_pack_index_write_usage,
 				   options);
 
-	return write_midx_file(opts.object_dir, opts.flags);
+	return write_midx_file(opts.object_dir, opts.preferred_pack,
+			       opts.flags);
 }
 
 static int cmd_multi_pack_index_verify(int argc, const char **argv)
diff --git a/builtin/repack.c b/builtin/repack.c
index 01440de2d5..9f00806805 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -523,7 +523,7 @@  int cmd_repack(int argc, const char **argv, const char *prefix)
 	remove_temporary_files();
 
 	if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0))
-		write_midx_file(get_object_directory(), 0);
+		write_midx_file(get_object_directory(), NULL, 0);
 
 	string_list_clear(&names, 0);
 	string_list_clear(&rollback, 0);
diff --git a/midx.c b/midx.c
index 971faa8cfc..d2c56c4bc6 100644
--- a/midx.c
+++ b/midx.c
@@ -431,6 +431,24 @@  static int pack_info_compare(const void *_a, const void *_b)
 	return strcmp(a->pack_name, b->pack_name);
 }
 
+static int lookup_idx_or_pack_name(struct pack_info *info,
+				   uint32_t nr,
+				   const char *pack_name)
+{
+	uint32_t lo = 0, hi = nr;
+	while (lo < hi) {
+		uint32_t mi = lo + (hi - lo) / 2;
+		int cmp = cmp_idx_or_pack_name(pack_name, info[mi].pack_name);
+		if (cmp < 0)
+			hi = mi;
+		else if (cmp > 0)
+			lo = mi + 1;
+		else
+			return mi;
+	}
+	return -1;
+}
+
 struct write_midx_context {
 	struct pack_info *info;
 	uint32_t nr;
@@ -445,6 +463,8 @@  struct write_midx_context {
 	uint32_t *pack_perm;
 	unsigned large_offsets_needed:1;
 	uint32_t num_large_offsets;
+
+	int preferred_pack_idx;
 };
 
 static void add_pack_to_midx(const char *full_path, size_t full_path_len,
@@ -489,6 +509,7 @@  struct pack_midx_entry {
 	uint32_t pack_int_id;
 	time_t pack_mtime;
 	uint64_t offset;
+	unsigned preferred : 1;
 };
 
 static int midx_oid_compare(const void *_a, const void *_b)
@@ -500,6 +521,12 @@  static int midx_oid_compare(const void *_a, const void *_b)
 	if (cmp)
 		return cmp;
 
+	/* Sort objects in a preferred pack first when multiple copies exist. */
+	if (a->preferred > b->preferred)
+		return -1;
+	if (a->preferred < b->preferred)
+		return 1;
+
 	if (a->pack_mtime > b->pack_mtime)
 		return -1;
 	else if (a->pack_mtime < b->pack_mtime)
@@ -527,7 +554,8 @@  static int nth_midxed_pack_midx_entry(struct multi_pack_index *m,
 static void fill_pack_entry(uint32_t pack_int_id,
 			    struct packed_git *p,
 			    uint32_t cur_object,
-			    struct pack_midx_entry *entry)
+			    struct pack_midx_entry *entry,
+			    int preferred)
 {
 	if (nth_packed_object_id(&entry->oid, p, cur_object) < 0)
 		die(_("failed to locate object %d in packfile"), cur_object);
@@ -536,6 +564,7 @@  static void fill_pack_entry(uint32_t pack_int_id,
 	entry->pack_mtime = p->mtime;
 
 	entry->offset = nth_packed_object_offset(p, cur_object);
+	entry->preferred = !!preferred;
 }
 
 /*
@@ -552,7 +581,8 @@  static void fill_pack_entry(uint32_t pack_int_id,
 static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
 						  struct pack_info *info,
 						  uint32_t nr_packs,
-						  uint32_t *nr_objects)
+						  uint32_t *nr_objects,
+						  uint32_t preferred_pack)
 {
 	uint32_t cur_fanout, cur_pack, cur_object;
 	uint32_t alloc_fanout, alloc_objects, total_objects = 0;
@@ -589,12 +619,17 @@  static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
 				nth_midxed_pack_midx_entry(m,
 							   &entries_by_fanout[nr_fanout],
 							   cur_object);
+				if (nth_midxed_pack_int_id(m, cur_object) == preferred_pack)
+					entries_by_fanout[nr_fanout].preferred = 1;
+				else
+					entries_by_fanout[nr_fanout].preferred = 0;
 				nr_fanout++;
 			}
 		}
 
 		for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) {
 			uint32_t start = 0, end;
+			int preferred = cur_pack == preferred_pack;
 
 			if (cur_fanout)
 				start = get_pack_fanout(info[cur_pack].p, cur_fanout - 1);
@@ -602,7 +637,11 @@  static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
 
 			for (cur_object = start; cur_object < end; cur_object++) {
 				ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
-				fill_pack_entry(cur_pack, info[cur_pack].p, cur_object, &entries_by_fanout[nr_fanout]);
+				fill_pack_entry(cur_pack,
+						info[cur_pack].p,
+						cur_object,
+						&entries_by_fanout[nr_fanout],
+						preferred);
 				nr_fanout++;
 			}
 		}
@@ -777,7 +816,9 @@  static int write_midx_large_offsets(struct hashfile *f,
 }
 
 static int write_midx_internal(const char *object_dir, struct multi_pack_index *m,
-			       struct string_list *packs_to_drop, unsigned flags)
+			       struct string_list *packs_to_drop,
+			       const char *preferred_pack_name,
+			       unsigned flags)
 {
 	char *midx_name;
 	uint32_t i;
@@ -828,7 +869,19 @@  static int write_midx_internal(const char *object_dir, struct multi_pack_index *
 	if (ctx.m && ctx.nr == ctx.m->num_packs && !packs_to_drop)
 		goto cleanup;
 
-	ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr);
+	if (preferred_pack_name) {
+		for (i = 0; i < ctx.nr; i++) {
+			if (!cmp_idx_or_pack_name(preferred_pack_name,
+						  ctx.info[i].pack_name)) {
+				ctx.preferred_pack_idx = i;
+				break;
+			}
+		}
+	} else
+		ctx.preferred_pack_idx = -1;
+
+	ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr,
+					 ctx.preferred_pack_idx);
 
 	ctx.large_offsets_needed = 0;
 	for (i = 0; i < ctx.entries_nr; i++) {
@@ -889,6 +942,31 @@  static int write_midx_internal(const char *object_dir, struct multi_pack_index *
 			pack_name_concat_len += strlen(ctx.info[i].pack_name) + 1;
 	}
 
+	/*
+	 * Recompute the preferred_pack_idx (if applicable) according to the
+	 * permuted pack order.
+	 */
+	ctx.preferred_pack_idx = -1;
+	if (preferred_pack_name) {
+		ctx.preferred_pack_idx = lookup_idx_or_pack_name(ctx.info,
+							     ctx.nr,
+							     preferred_pack_name);
+		if (ctx.preferred_pack_idx < 0)
+			warning(_("unknown preferred pack: '%s'"),
+				preferred_pack_name);
+		else {
+			uint32_t orig = ctx.info[ctx.preferred_pack_idx].orig_pack_int_id;
+			uint32_t perm = ctx.pack_perm[orig];
+
+			if (perm == PACK_EXPIRED) {
+				warning(_("preferred pack '%s' is expired"),
+					preferred_pack_name);
+				ctx.preferred_pack_idx = -1;
+			} else
+				ctx.preferred_pack_idx = perm;
+		}
+	}
+
 	if (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT)
 		pack_name_concat_len += MIDX_CHUNK_ALIGNMENT -
 					(pack_name_concat_len % MIDX_CHUNK_ALIGNMENT);
@@ -947,9 +1025,12 @@  static int write_midx_internal(const char *object_dir, struct multi_pack_index *
 	return result;
 }
 
-int write_midx_file(const char *object_dir, unsigned flags)
+int write_midx_file(const char *object_dir,
+		    const char *preferred_pack_name,
+		    unsigned flags)
 {
-	return write_midx_internal(object_dir, NULL, NULL, flags);
+	return write_midx_internal(object_dir, NULL, NULL, preferred_pack_name,
+				   flags);
 }
 
 void clear_midx_file(struct repository *r)
@@ -1184,7 +1265,7 @@  int expire_midx_packs(struct repository *r, const char *object_dir, unsigned fla
 	free(count);
 
 	if (packs_to_drop.nr)
-		result = write_midx_internal(object_dir, m, &packs_to_drop, flags);
+		result = write_midx_internal(object_dir, m, &packs_to_drop, NULL, flags);
 
 	string_list_clear(&packs_to_drop, 0);
 	return result;
@@ -1373,7 +1454,7 @@  int midx_repack(struct repository *r, const char *object_dir, size_t batch_size,
 		goto cleanup;
 	}
 
-	result = write_midx_internal(object_dir, m, NULL, flags);
+	result = write_midx_internal(object_dir, m, NULL, NULL, flags);
 	m = NULL;
 
 cleanup:
diff --git a/midx.h b/midx.h
index b18cf53bc4..e7fea61109 100644
--- a/midx.h
+++ b/midx.h
@@ -47,7 +47,7 @@  int fill_midx_entry(struct repository *r, const struct object_id *oid, struct pa
 int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name);
 int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local);
 
-int write_midx_file(const char *object_dir, unsigned flags);
+int write_midx_file(const char *object_dir, const char *preferred_pack_name, unsigned flags);
 void clear_midx_file(struct repository *r);
 int verify_midx_file(struct repository *r, const char *object_dir, unsigned flags);
 int expire_midx_packs(struct repository *r, const char *object_dir, unsigned flags);
diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh
index b4afab1dfc..fd94ba9053 100755
--- a/t/t5319-multi-pack-index.sh
+++ b/t/t5319-multi-pack-index.sh
@@ -31,6 +31,14 @@  midx_read_expect () {
 	test_cmp expect actual
 }
 
+midx_expect_object_offset () {
+	OID="$1"
+	OFFSET="$2"
+	OBJECT_DIR="$3"
+	test-tool read-midx --show-objects $OBJECT_DIR >actual &&
+	grep "^$OID $OFFSET" actual
+}
+
 test_expect_success 'setup' '
 	test_oid_cache <<-EOF
 	idxoff sha1:2999
@@ -234,6 +242,37 @@  test_expect_success 'warn on improper hash version' '
 	)
 '
 
+test_expect_success 'midx picks objects from preferred pack' '
+	test_when_finished rm -rf preferred.git &&
+	git init --bare preferred.git &&
+	(
+		cd preferred.git &&
+
+		a=$(echo "a" | git hash-object -w --stdin) &&
+		b=$(echo "b" | git hash-object -w --stdin) &&
+		c=$(echo "c" | git hash-object -w --stdin) &&
+
+		# Set up two packs, duplicating the object "B" at different
+		# offsets.
+		git pack-objects objects/pack/test-AB <<-EOF &&
+		$a
+		$b
+		EOF
+		bc=$(git pack-objects objects/pack/test-BC <<-EOF
+		$b
+		$c
+		EOF
+		) &&
+
+		git multi-pack-index --object-dir=objects \
+			write --preferred-pack=test-BC-$bc.idx 2>err &&
+		test_must_be_empty err &&
+
+		ofs=$(git show-index <objects/pack/test-BC-$bc.idx | grep $b |
+			cut -d" " -f1) &&
+		midx_expect_object_offset $b $ofs objects
+	)
+'
 
 test_expect_success 'verify multi-pack-index success' '
 	git multi-pack-index verify --object-dir=$objdir