diff mbox series

revision: reallocate TOPO_WALK object flags

Message ID 29f8b1fc-fac7-12c6-4bfe-28aed7e709c3@web.de (mailing list archive)
State New, archived
Headers show
Series revision: reallocate TOPO_WALK object flags | expand

Commit Message

René Scharfe June 24, 2020, 1:05 p.m. UTC
The bit fields in struct object have an unfortunate layout.  Here's what
pahole reports on x86_64 GNU/Linux:

struct object {
	unsigned int               parsed:1;             /*     0: 0  4 */
	unsigned int               type:3;               /*     0: 1  4 */

	/* XXX 28 bits hole, try to pack */

	/* Force alignment to the next boundary: */
	unsigned int               :0;

	unsigned int               flags:29;             /*     4: 0  4 */

	/* XXX 3 bits hole, try to pack */

	struct object_id           oid;                  /*     8    32 */

	/* size: 40, cachelines: 1, members: 4 */
	/* sum members: 32 */
	/* sum bitfield members: 33 bits, bit holes: 2, sum bit holes: 31 bits */
	/* last cacheline: 40 bytes */
};

Notice the 1+3+29=33 bits in bit fields and 28+3=31 bits in holes.

There are holes inside the flags bit field as well -- while some object
flags are used for more than one purpose, 22, 23 and 24 are still free.
Use  23 and 24 instead of 27 and 28 for TOPO_WALK_EXPLORED and
TOPO_WALK_INDEGREE.  This allows us to reduce FLAG_BITS by one so that
all bitfields combined fit into a single 32-bit slot:

struct object {
	unsigned int               parsed:1;             /*     0: 0  4 */
	unsigned int               type:3;               /*     0: 1  4 */
	unsigned int               flags:28;             /*     0: 4  4 */
	struct object_id           oid;                  /*     4    32 */

	/* size: 36, cachelines: 1, members: 4 */
	/* last cacheline: 36 bytes */
};

With this tight packing the size of struct object is reduced by 10%.
Other architectures probably benefit as well.

Signed-off-by: René Scharfe <l.s.r@web.de>
---
 object.h   | 4 ++--
 revision.h | 7 ++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

--
2.27.0

Comments

Derrick Stolee June 24, 2020, 1:51 p.m. UTC | #1
On 6/24/2020 9:05 AM, René Scharfe wrote:
> The bit fields in struct object have an unfortunate layout.  Here's what
> pahole reports on x86_64 GNU/Linux:
> 
> struct object {
> 	unsigned int               parsed:1;             /*     0: 0  4 */
> 	unsigned int               type:3;               /*     0: 1  4 */
> 
> 	/* XXX 28 bits hole, try to pack */
> 
> 	/* Force alignment to the next boundary: */
> 	unsigned int               :0;
> 
> 	unsigned int               flags:29;             /*     4: 0  4 */
> 
> 	/* XXX 3 bits hole, try to pack */
> 
> 	struct object_id           oid;                  /*     8    32 */
> 
> 	/* size: 40, cachelines: 1, members: 4 */
> 	/* sum members: 32 */
> 	/* sum bitfield members: 33 bits, bit holes: 2, sum bit holes: 31 bits */
> 	/* last cacheline: 40 bytes */
> };
> 
> Notice the 1+3+29=33 bits in bit fields and 28+3=31 bits in holes.

This is certainly unfortunate.

> There are holes inside the flags bit field as well -- while some object
> flags are used for more than one purpose, 22, 23 and 24 are still free.
> Use  23 and 24 instead of 27 and 28 for TOPO_WALK_EXPLORED and
> TOPO_WALK_INDEGREE.  This allows us to reduce FLAG_BITS by one so that
> all bitfields combined fit into a single 32-bit slot:
> 
> struct object {
> 	unsigned int               parsed:1;             /*     0: 0  4 */
> 	unsigned int               type:3;               /*     0: 1  4 */
> 	unsigned int               flags:28;             /*     0: 4  4 */
> 	struct object_id           oid;                  /*     4    32 */
> 
> 	/* size: 36, cachelines: 1, members: 4 */
> 	/* last cacheline: 36 bytes */
> };
> 
> With this tight packing the size of struct object is reduced by 10%.
> Other architectures probably benefit as well.

Excellent improvement!

> - * revision.h:               0---------10         15                   25----28
> + * revision.h:               0---------10         15             23------26
>   * fetch-pack.c:             01
>   * negotiator/default.c:       2--5
>   * walker.c:                 0-2
> @@ -79,7 +79,7 @@ struct object_array {
>   * builtin/show-branch.c:    0-------------------------------------------26

The only collision is here in builtin/show-branch.c, and when I added
the TOPO_WALK_* bits, I didn't understand that these bits were
sufficiently independent from the topo-walk logic that we could add
collisions here.

I think I realized this behavior when doing the commit-reach.c refactor,
but I never revisited these flag values.

>   * builtin/unpack-objects.c:                                 2021
>   */
> -#define FLAG_BITS  29
> +#define FLAG_BITS  28
> 
>  /*
>   * The object type is stored in 3 bits.
> diff --git a/revision.h b/revision.h
> index 93491b79d4..f412ae85eb 100644
> --- a/revision.h
> +++ b/revision.h
> @@ -37,6 +37,10 @@
> 
>  /* WARNING: This is also used as REACHABLE in commit-graph.c. */
>  #define PULL_MERGE	(1u<<15)
> +
> +#define TOPO_WALK_EXPLORED	(1u<<23)
> +#define TOPO_WALK_INDEGREE	(1u<<24)
> +
>  /*
>   * Indicates object was reached by traversal. i.e. not given by user on
>   * command-line or stdin.
> @@ -48,9 +52,6 @@
>  #define TRACK_LINEAR	(1u<<26)
>  #define ALL_REV_FLAGS	(((1u<<11)-1) | NOT_USER_GIVEN | TRACK_LINEAR | PULL_MERGE)
> 
> -#define TOPO_WALK_EXPLORED	(1u<<27)
> -#define TOPO_WALK_INDEGREE	(1u<<28)
> -
>  #define DECORATE_SHORT_REFS	1
>  #define DECORATE_FULL_REFS	2

Thankfully, the changes are very simple!

Thanks,
-Stolee
Junio C Hamano June 24, 2020, 4:06 p.m. UTC | #2
Derrick Stolee <stolee@gmail.com> writes:

>> @@ -79,7 +79,7 @@ struct object_array {
>>   * builtin/show-branch.c:    0-------------------------------------------26
>
> The only collision is here in builtin/show-branch.c, and when I added
> the TOPO_WALK_* bits, I didn't understand that these bits were
> sufficiently independent from the topo-walk logic that we could add
> collisions here.

The show-branch subcommand does its own walking without using any of
the usual revision machinery, and uses a bit per starting point (so
the number of starting points is limited by the wordsize), so it
should be safe.

It would be wonderful if the bits used by it can be moved to a slab
so that it can work from more starting points, or deprecate the
subcommand altogether.  With better visualization tools (including
"log --graph --oneline"), I suspect that it outlived its usefulness.
Jeff King June 24, 2020, 7:39 p.m. UTC | #3
On Wed, Jun 24, 2020 at 03:05:38PM +0200, René Scharfe wrote:

> The bit fields in struct object have an unfortunate layout.  Here's what
> pahole reports on x86_64 GNU/Linux:
> 
> struct object {
> 	unsigned int               parsed:1;             /*     0: 0  4 */
> 	unsigned int               type:3;               /*     0: 1  4 */
> 
> 	/* XXX 28 bits hole, try to pack */
> 
> 	/* Force alignment to the next boundary: */
> 	unsigned int               :0;
> 
> 	unsigned int               flags:29;             /*     4: 0  4 */
> 
> 	/* XXX 3 bits hole, try to pack */
> 
> 	struct object_id           oid;                  /*     8    32 */
> 
> 	/* size: 40, cachelines: 1, members: 4 */
> 	/* sum members: 32 */
> 	/* sum bitfield members: 33 bits, bit holes: 2, sum bit holes: 31 bits */
> 	/* last cacheline: 40 bytes */
> };
> 
> Notice the 1+3+29=33 bits in bit fields and 28+3=31 bits in holes.

Good catch. The original FLAG_BITS was intended to pack with parsed and
type, and we should have caught this in review when it got bumped in
b45424181e (revision.c: generation-based topo-order algorithm,
2018-11-01).

The patch looks correct to me.

> There are holes inside the flags bit field as well -- while some object
> flags are used for more than one purpose, 22, 23 and 24 are still free.
> Use  23 and 24 instead of 27 and 28 for TOPO_WALK_EXPLORED and
> TOPO_WALK_INDEGREE.  This allows us to reduce FLAG_BITS by one so that
> all bitfields combined fit into a single 32-bit slot:
> 
> struct object {
> 	unsigned int               parsed:1;             /*     0: 0  4 */
> 	unsigned int               type:3;               /*     0: 1  4 */
> 	unsigned int               flags:28;             /*     0: 4  4 */
> 	struct object_id           oid;                  /*     4    32 */
> 
> 	/* size: 36, cachelines: 1, members: 4 */
> 	/* last cacheline: 36 bytes */
> };

With 20-byte hashes, this put us at 24 bytes, which is 8-byte aligned. I
had always assumed once we moved to 32-byte hashes that we'd be stuck
with a 40-byte "struct object" to keep 8-byte alignment on 64-bit
systems. But it seems that at least on x86_64 Linux, we're happy with
4-byte alignment. That's useful to know (I had wondered if we might be
able to put those extra bytes to a good use, but I guess not).

>  /*
>   * object flag allocation:
> - * revision.h:               0---------10         15                   25----28
> + * revision.h:               0---------10         15             23------26
>   * fetch-pack.c:             01
>   * negotiator/default.c:       2--5
>   * walker.c:                 0-2

Definitely not a new problem, but I think we should consider "rotating"
this table. The point of it is for two branches that get merged to cause
a conflict if they allocate the same bit to two uses. And we _might_ get
see such a conflict if the allocations are on adjacent lines, but we
wouldn't if they're far away. E.g., imagine one side does:

  -* revision.h    0---------10
  +* revision.h    0----------11
   * fetch-pack.c  01
   * foo.c           2
   * bar.c            3

and the other does:

   * revision.h    0---------10
   * fetch-pack.c  01
   * foo.c           2
   * bar.c            3
  +* uh-oh.c                  11

Now we have two possibly conflicting uses of bit 11 (a semantic
conflict), but no matching textual conflict.

Whereas if we wrote:

  * bit 0: revision.h, fetch-pack.c
  * bit 1: revision.h
  * bit 2: revision.h, foo.c
  * bit 3: revision.h, bar.c
  [etc...]

then we'd get a conflict on the "bit 11" line. It does mean that
unrelated modules get written on the same line, but that's the point.
They're only allowed to be on the same line after somebody determines
that they're mutually exclusive and won't stomp on each other.

-Peff
diff mbox series

Patch

diff --git a/object.h b/object.h
index a496d2e4e1..b207f550b2 100644
--- a/object.h
+++ b/object.h
@@ -59,7 +59,7 @@  struct object_array {

 /*
  * object flag allocation:
- * revision.h:               0---------10         15                   25----28
+ * revision.h:               0---------10         15             23------26
  * fetch-pack.c:             01
  * negotiator/default.c:       2--5
  * walker.c:                 0-2
@@ -79,7 +79,7 @@  struct object_array {
  * builtin/show-branch.c:    0-------------------------------------------26
  * builtin/unpack-objects.c:                                 2021
  */
-#define FLAG_BITS  29
+#define FLAG_BITS  28

 /*
  * The object type is stored in 3 bits.
diff --git a/revision.h b/revision.h
index 93491b79d4..f412ae85eb 100644
--- a/revision.h
+++ b/revision.h
@@ -37,6 +37,10 @@ 

 /* WARNING: This is also used as REACHABLE in commit-graph.c. */
 #define PULL_MERGE	(1u<<15)
+
+#define TOPO_WALK_EXPLORED	(1u<<23)
+#define TOPO_WALK_INDEGREE	(1u<<24)
+
 /*
  * Indicates object was reached by traversal. i.e. not given by user on
  * command-line or stdin.
@@ -48,9 +52,6 @@ 
 #define TRACK_LINEAR	(1u<<26)
 #define ALL_REV_FLAGS	(((1u<<11)-1) | NOT_USER_GIVEN | TRACK_LINEAR | PULL_MERGE)

-#define TOPO_WALK_EXPLORED	(1u<<27)
-#define TOPO_WALK_INDEGREE	(1u<<28)
-
 #define DECORATE_SHORT_REFS	1
 #define DECORATE_FULL_REFS	2