@@ -80,7 +80,6 @@ struct survey_report_object_size_summary {
typedef int (*survey_top_size_cmp)(struct survey_report_object_size_summary *s1,
struct survey_report_object_size_summary *s2);
-MAYBE_UNUSED
static int cmp_by_nr(struct survey_report_object_size_summary *s1,
struct survey_report_object_size_summary *s2)
{
@@ -91,7 +90,6 @@ static int cmp_by_nr(struct survey_report_object_size_summary *s1,
return 0;
}
-MAYBE_UNUSED
static int cmp_by_disk_size(struct survey_report_object_size_summary *s1,
struct survey_report_object_size_summary *s2)
{
@@ -102,7 +100,6 @@ static int cmp_by_disk_size(struct survey_report_object_size_summary *s1,
return 0;
}
-MAYBE_UNUSED
static int cmp_by_inflated_size(struct survey_report_object_size_summary *s1,
struct survey_report_object_size_summary *s2)
{
@@ -126,7 +123,6 @@ struct survey_report_top_sizes {
size_t alloc;
};
-MAYBE_UNUSED
static void init_top_sizes(struct survey_report_top_sizes *top,
size_t limit, const char *name,
survey_top_size_cmp cmp)
@@ -146,7 +142,6 @@ static void clear_top_sizes(struct survey_report_top_sizes *top)
free(top->data);
}
-MAYBE_UNUSED
static void maybe_insert_into_top_size(struct survey_report_top_sizes *top,
struct survey_report_object_size_summary *summary)
{
@@ -182,6 +177,10 @@ struct survey_report {
struct survey_report_object_summary reachable_objects;
struct survey_report_object_size_summary *by_type;
+
+ struct survey_report_top_sizes *top_paths_by_count;
+ struct survey_report_top_sizes *top_paths_by_disk;
+ struct survey_report_top_sizes *top_paths_by_inflate;
};
#define REPORT_TYPE_COMMIT 0
@@ -423,6 +422,13 @@ static void survey_report_object_sizes(const char *title,
clear_table(&table);
}
+static void survey_report_plaintext_sorted_size(
+ struct survey_report_top_sizes *top)
+{
+ survey_report_object_sizes(top->name, _("Path"),
+ top->data, top->nr);
+}
+
static void survey_report_plaintext(struct survey_context *ctx)
{
printf("GIT SURVEY for \"%s\"\n", ctx->repo->worktree);
@@ -433,6 +439,21 @@ static void survey_report_plaintext(struct survey_context *ctx)
_("Object Type"),
ctx->report.by_type,
REPORT_TYPE_COUNT);
+
+ survey_report_plaintext_sorted_size(
+ &ctx->report.top_paths_by_count[REPORT_TYPE_TREE]);
+ survey_report_plaintext_sorted_size(
+ &ctx->report.top_paths_by_count[REPORT_TYPE_BLOB]);
+
+ survey_report_plaintext_sorted_size(
+ &ctx->report.top_paths_by_disk[REPORT_TYPE_TREE]);
+ survey_report_plaintext_sorted_size(
+ &ctx->report.top_paths_by_disk[REPORT_TYPE_BLOB]);
+
+ survey_report_plaintext_sorted_size(
+ &ctx->report.top_paths_by_inflate[REPORT_TYPE_TREE]);
+ survey_report_plaintext_sorted_size(
+ &ctx->report.top_paths_by_inflate[REPORT_TYPE_BLOB]);
}
static void survey_report_json(struct survey_context *ctx)
@@ -673,7 +694,8 @@ static void increment_totals(struct survey_context *ctx,
static void increment_object_totals(struct survey_context *ctx,
struct oid_array *oids,
- enum object_type type)
+ enum object_type type,
+ const char *path)
{
struct survey_report_object_size_summary *total;
struct survey_report_object_size_summary summary = { 0 };
@@ -701,6 +723,27 @@ static void increment_object_totals(struct survey_context *ctx,
total->disk_size += summary.disk_size;
total->inflated_size += summary.inflated_size;
total->num_missing += summary.num_missing;
+
+ if (type == OBJ_TREE || type == OBJ_BLOB) {
+ int index = type == OBJ_TREE ?
+ REPORT_TYPE_TREE : REPORT_TYPE_BLOB;
+ struct survey_report_top_sizes *top;
+
+ /*
+ * Temporarily store (const char *) here, but it will
+ * be duped if inserted and will not be freed.
+ */
+ summary.label = (char *)path;
+
+ top = ctx->report.top_paths_by_count;
+ maybe_insert_into_top_size(&top[index], &summary);
+
+ top = ctx->report.top_paths_by_disk;
+ maybe_insert_into_top_size(&top[index], &summary);
+
+ top = ctx->report.top_paths_by_inflate;
+ maybe_insert_into_top_size(&top[index], &summary);
+ }
}
static int survey_objects_path_walk_fn(const char *path,
@@ -712,7 +755,7 @@ static int survey_objects_path_walk_fn(const char *path,
increment_object_counts(&ctx->report.reachable_objects,
type, oids->nr);
- increment_object_totals(ctx, oids, type);
+ increment_object_totals(ctx, oids, type, path);
ctx->progress_nr += oids->nr;
display_progress(ctx->progress, ctx->progress_nr);
@@ -757,6 +800,34 @@ static int iterate_tag_chain(struct survey_context *ctx,
return -1;
}
+static void initialize_report(struct survey_context *ctx)
+{
+ const int top_limit = 100;
+
+ CALLOC_ARRAY(ctx->report.by_type, REPORT_TYPE_COUNT);
+ ctx->report.by_type[REPORT_TYPE_COMMIT].label = xstrdup(_("Commits"));
+ ctx->report.by_type[REPORT_TYPE_TREE].label = xstrdup(_("Trees"));
+ ctx->report.by_type[REPORT_TYPE_BLOB].label = xstrdup(_("Blobs"));
+
+ CALLOC_ARRAY(ctx->report.top_paths_by_count, REPORT_TYPE_COUNT);
+ init_top_sizes(&ctx->report.top_paths_by_count[REPORT_TYPE_TREE],
+ top_limit, _("TOP DIRECTORIES BY COUNT"), cmp_by_nr);
+ init_top_sizes(&ctx->report.top_paths_by_count[REPORT_TYPE_BLOB],
+ top_limit, _("TOP FILES BY COUNT"), cmp_by_nr);
+
+ CALLOC_ARRAY(ctx->report.top_paths_by_disk, REPORT_TYPE_COUNT);
+ init_top_sizes(&ctx->report.top_paths_by_disk[REPORT_TYPE_TREE],
+ top_limit, _("TOP DIRECTORIES BY DISK SIZE"), cmp_by_disk_size);
+ init_top_sizes(&ctx->report.top_paths_by_disk[REPORT_TYPE_BLOB],
+ top_limit, _("TOP FILES BY DISK SIZE"), cmp_by_disk_size);
+
+ CALLOC_ARRAY(ctx->report.top_paths_by_inflate, REPORT_TYPE_COUNT);
+ init_top_sizes(&ctx->report.top_paths_by_inflate[REPORT_TYPE_TREE],
+ top_limit, _("TOP DIRECTORIES BY INFLATED SIZE"), cmp_by_inflated_size);
+ init_top_sizes(&ctx->report.top_paths_by_inflate[REPORT_TYPE_BLOB],
+ top_limit, _("TOP FILES BY INFLATED SIZE"), cmp_by_inflated_size);
+}
+
static void survey_phase_objects(struct survey_context *ctx)
{
struct rev_info revs = REV_INFO_INIT;
@@ -774,10 +845,7 @@ static void survey_phase_objects(struct survey_context *ctx)
info.blobs = 1;
info.tags = 1;
- CALLOC_ARRAY(ctx->report.by_type, REPORT_TYPE_COUNT);
- ctx->report.by_type[REPORT_TYPE_COMMIT].label = xstrdup(_("Commits"));
- ctx->report.by_type[REPORT_TYPE_TREE].label = xstrdup(_("Trees"));
- ctx->report.by_type[REPORT_TYPE_BLOB].label = xstrdup(_("Blobs"));
+ initialize_report(ctx);
repo_init_revisions(ctx->repo, &revs, "");
@@ -60,7 +60,17 @@ test_expect_success 'git survey (default)' '
Blobs | 10 | 191 | 101
EOF
- test_cmp expect out
+ lines=$(wc -l <expect) &&
+ head -n $lines out >out-trimmed &&
+ test_cmp expect out-trimmed &&
+
+ for type in "DIRECTORIES" "FILES"
+ do
+ for metric in "COUNT" "DISK SIZE" "INFLATED SIZE"
+ do
+ grep "TOP $type BY $metric" out || return 1
+ done || return 1
+ done
'
test_done