diff mbox series

[5/5] t-reftable-readwrite: add tests for print functions

Message ID 20240807141608.4524-6-chandrapratap3519@gmail.com (mailing list archive)
State Superseded
Headers show
Series t: port reftable/readwrite_test.c to the unit testing framework | expand

Commit Message

Chandra Pratap Aug. 7, 2024, 2:12 p.m. UTC
reftable/reftable-reader.h lists two print functions useful in
debugging, reftable_reader_print_file() and
reftable_reader_print_blocks(). As of now, both these functions
are left unexercised by all of the reftable tests. Add a test
function to exercise both these functions. This has the added
benefit of testing reftable_block_source_from_file(), which
currently remains untested as well.

Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Chandra Pratap <chandrapratap3519@gmail.com>
---
 t/unit-tests/t-reftable-readwrite.c | 75 +++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

Comments

Patrick Steinhardt Aug. 8, 2024, 8:12 a.m. UTC | #1
On Wed, Aug 07, 2024 at 07:42:01PM +0530, Chandra Pratap wrote:
> +static void t_table_print(void)
> +{
> +	char name[100];
> +	struct reftable_write_options opts = {
> +		.block_size = 512,
> +		.hash_id = GIT_SHA1_FORMAT_ID,
> +	};
> +	struct reftable_ref_record ref = { 0 };
> +	struct reftable_log_record log = { 0 };
> +	struct reftable_writer *w = NULL;
> +	struct tempfile *tmp = NULL;
> +	size_t i, N = 3;
> +	int n, fd;
> +
> +	xsnprintf(name, sizeof(name), "t-reftable-readwrite-%d-XXXXXX", __LINE__);

Is it really required to include the line number in this file? This
feels unnecessarily defensive to me as `mks_tempfile_t()` should already
make sure that we get a unique filename. So if we drop that, we could
skip this call to `xsnprintf()`.

> +	tmp = mks_tempfile_t(name);
> +	fd = get_tempfile_fd(tmp);
> +	w = reftable_new_writer(&fd_write, &fd_flush, &fd, &opts);
> +	reftable_writer_set_limits(w, 0, update_index);
> +
> +	for (i = 0; i < N; i++) {
> +		xsnprintf(name, sizeof(name), "refs/heads/branch%02"PRIuMAX, (uintmax_t)i);
> +		ref.refname = name;
> +		ref.update_index = i;
> +		ref.value_type = REFTABLE_REF_VAL1;
> +		set_test_hash(ref.value.val1, i);
> +
> +		n = reftable_writer_add_ref(w, &ref);
> +		check_int(n, ==, 0);
> +	}
> +
> +	for (i = 0; i < N; i++) {
> +		xsnprintf(name, sizeof(name), "refs/heads/branch%02"PRIuMAX, (uintmax_t)i);
> +		log.refname = name;
> +		log.update_index = i;
> +		log.value_type = REFTABLE_LOG_UPDATE;
> +		set_test_hash(log.value.update.new_hash, i);
> +		log.value.update.name = (char *) "John Doe";
> +		log.value.update.email = (char *) "johndoe@anon.org";
> +		log.value.update.time = 0x6673e5b9;
> +		log.value.update.message = (char *) "message";
> +
> +		n = reftable_writer_add_log(w, &log);
> +		check_int(n, ==, 0);
> +	}
> +
> +	n = reftable_writer_close(w);
> +	check_int(n, ==, 0);
> +
> +	test_msg("testing printing functionality:");

Is it intentionally that this line still exists? If so, I think it
really only causes unnecessary noise and should rather be dropped.

> +	n = reftable_reader_print_file(tmp->filename.buf);
> +	check_int(n, ==, 0);

Wait, doesn't this print to stdout? I don't think it is a good idea to
exercise the function as-is. For one, it would pollute stdout with data
that we shouldn't care about. Second, it doesn't verify that the result
is actually what we expect.

I can see two options:

  1. Refactor these interfaces such that they take a file descriptor as
     input that they are writing to. This would allow us to exercise
     that the output is correct.

  2. Rip out this function. I don't think this functionality should be
     part of the library in the first place, and it really only exists
     because of "reftable/dump.c".

I think the latter is the better option. The functionality exists to
drive `cmd__dump_reftable()` in our reftable test helper. We should
likely make the whole implementation of this an internal implementation
detail and not expose it.

Patrick
Patrick Steinhardt Aug. 8, 2024, noon UTC | #2
On Thu, Aug 08, 2024 at 10:12:07AM +0200, Patrick Steinhardt wrote:
> On Wed, Aug 07, 2024 at 07:42:01PM +0530, Chandra Pratap wrote:
> > +static void t_table_print(void)
> > +{
> > +	char name[100];
> > +	struct reftable_write_options opts = {
> > +		.block_size = 512,
> > +		.hash_id = GIT_SHA1_FORMAT_ID,
> > +	};
> > +	struct reftable_ref_record ref = { 0 };
> > +	struct reftable_log_record log = { 0 };
> > +	struct reftable_writer *w = NULL;
> > +	struct tempfile *tmp = NULL;
> > +	size_t i, N = 3;
> > +	int n, fd;
> > +
> > +	xsnprintf(name, sizeof(name), "t-reftable-readwrite-%d-XXXXXX", __LINE__);
> 
> Is it really required to include the line number in this file? This
> feels unnecessarily defensive to me as `mks_tempfile_t()` should already
> make sure that we get a unique filename. So if we drop that, we could
> skip this call to `xsnprintf()`.
> 
> > +	tmp = mks_tempfile_t(name);
> > +	fd = get_tempfile_fd(tmp);
> > +	w = reftable_new_writer(&fd_write, &fd_flush, &fd, &opts);
> > +	reftable_writer_set_limits(w, 0, update_index);
> > +
> > +	for (i = 0; i < N; i++) {
> > +		xsnprintf(name, sizeof(name), "refs/heads/branch%02"PRIuMAX, (uintmax_t)i);
> > +		ref.refname = name;
> > +		ref.update_index = i;
> > +		ref.value_type = REFTABLE_REF_VAL1;
> > +		set_test_hash(ref.value.val1, i);
> > +
> > +		n = reftable_writer_add_ref(w, &ref);
> > +		check_int(n, ==, 0);
> > +	}
> > +
> > +	for (i = 0; i < N; i++) {
> > +		xsnprintf(name, sizeof(name), "refs/heads/branch%02"PRIuMAX, (uintmax_t)i);
> > +		log.refname = name;
> > +		log.update_index = i;
> > +		log.value_type = REFTABLE_LOG_UPDATE;
> > +		set_test_hash(log.value.update.new_hash, i);
> > +		log.value.update.name = (char *) "John Doe";
> > +		log.value.update.email = (char *) "johndoe@anon.org";
> > +		log.value.update.time = 0x6673e5b9;
> > +		log.value.update.message = (char *) "message";
> > +
> > +		n = reftable_writer_add_log(w, &log);
> > +		check_int(n, ==, 0);
> > +	}
> > +
> > +	n = reftable_writer_close(w);
> > +	check_int(n, ==, 0);
> > +
> > +	test_msg("testing printing functionality:");
> 
> Is it intentionally that this line still exists? If so, I think it
> really only causes unnecessary noise and should rather be dropped.
> 
> > +	n = reftable_reader_print_file(tmp->filename.buf);
> > +	check_int(n, ==, 0);
> 
> Wait, doesn't this print to stdout? I don't think it is a good idea to
> exercise the function as-is. For one, it would pollute stdout with data
> that we shouldn't care about. Second, it doesn't verify that the result
> is actually what we expect.
> 
> I can see two options:
> 
>   1. Refactor these interfaces such that they take a file descriptor as
>      input that they are writing to. This would allow us to exercise
>      that the output is correct.
> 
>   2. Rip out this function. I don't think this functionality should be
>      part of the library in the first place, and it really only exists
>      because of "reftable/dump.c".
> 
> I think the latter is the better option. The functionality exists to
> drive `cmd__dump_reftable()` in our reftable test helper. We should
> likely make the whole implementation of this an internal implementation
> detail and not expose it.

For the record: I've got a bigger patch series in development that drops
the generic reftable interfaces. As part of this, I'll also rip out the
functionality provided by "reftabel/dump.c".

Patrick
Chandra Pratap Aug. 8, 2024, 2:25 p.m. UTC | #3
On Thu, 8 Aug 2024 at 17:36, Patrick Steinhardt <ps@pks.im> wrote:
>
> On Thu, Aug 08, 2024 at 10:12:07AM +0200, Patrick Steinhardt wrote:
> > On Wed, Aug 07, 2024 at 07:42:01PM +0530, Chandra Pratap wrote:
> > > +static void t_table_print(void)
> > > +{
> > > +   char name[100];
> > > +   struct reftable_write_options opts = {
> > > +           .block_size = 512,
> > > +           .hash_id = GIT_SHA1_FORMAT_ID,
> > > +   };
> > > +   struct reftable_ref_record ref = { 0 };
> > > +   struct reftable_log_record log = { 0 };
> > > +   struct reftable_writer *w = NULL;
> > > +   struct tempfile *tmp = NULL;
> > > +   size_t i, N = 3;
> > > +   int n, fd;
> > > +
> > > +   xsnprintf(name, sizeof(name), "t-reftable-readwrite-%d-XXXXXX", __LINE__);
> >
> > Is it really required to include the line number in this file? This
> > feels unnecessarily defensive to me as `mks_tempfile_t()` should already
> > make sure that we get a unique filename. So if we drop that, we could
> > skip this call to `xsnprintf()`.
> >
> > > +   tmp = mks_tempfile_t(name);
> > > +   fd = get_tempfile_fd(tmp);
> > > +   w = reftable_new_writer(&fd_write, &fd_flush, &fd, &opts);
> > > +   reftable_writer_set_limits(w, 0, update_index);
> > > +
> > > +   for (i = 0; i < N; i++) {
> > > +           xsnprintf(name, sizeof(name), "refs/heads/branch%02"PRIuMAX, (uintmax_t)i);
> > > +           ref.refname = name;
> > > +           ref.update_index = i;
> > > +           ref.value_type = REFTABLE_REF_VAL1;
> > > +           set_test_hash(ref.value.val1, i);
> > > +
> > > +           n = reftable_writer_add_ref(w, &ref);
> > > +           check_int(n, ==, 0);
> > > +   }
> > > +
> > > +   for (i = 0; i < N; i++) {
> > > +           xsnprintf(name, sizeof(name), "refs/heads/branch%02"PRIuMAX, (uintmax_t)i);
> > > +           log.refname = name;
> > > +           log.update_index = i;
> > > +           log.value_type = REFTABLE_LOG_UPDATE;
> > > +           set_test_hash(log.value.update.new_hash, i);
> > > +           log.value.update.name = (char *) "John Doe";
> > > +           log.value.update.email = (char *) "johndoe@anon.org";
> > > +           log.value.update.time = 0x6673e5b9;
> > > +           log.value.update.message = (char *) "message";
> > > +
> > > +           n = reftable_writer_add_log(w, &log);
> > > +           check_int(n, ==, 0);
> > > +   }
> > > +
> > > +   n = reftable_writer_close(w);
> > > +   check_int(n, ==, 0);
> > > +
> > > +   test_msg("testing printing functionality:");
> >
> > Is it intentionally that this line still exists? If so, I think it
> > really only causes unnecessary noise and should rather be dropped.
> >
> > > +   n = reftable_reader_print_file(tmp->filename.buf);
> > > +   check_int(n, ==, 0);
> >
> > Wait, doesn't this print to stdout? I don't think it is a good idea to
> > exercise the function as-is. For one, it would pollute stdout with data
> > that we shouldn't care about. Second, it doesn't verify that the result
> > is actually what we expect.
> >
> > I can see two options:
> >
> >   1. Refactor these interfaces such that they take a file descriptor as
> >      input that they are writing to. This would allow us to exercise
> >      that the output is correct.
> >
> >   2. Rip out this function. I don't think this functionality should be
> >      part of the library in the first place, and it really only exists
> >      because of "reftable/dump.c".
> >
> > I think the latter is the better option. The functionality exists to
> > drive `cmd__dump_reftable()` in our reftable test helper. We should
> > likely make the whole implementation of this an internal implementation
> > detail and not expose it.
>
> For the record: I've got a bigger patch series in development that drops
> the generic reftable interfaces. As part of this, I'll also rip out the
> functionality provided by "reftabel/dump.c".

Cool, I'll just drop this patch from the series then.
Junio C Hamano Aug. 9, 2024, 4:56 p.m. UTC | #4
Patrick Steinhardt <ps@pks.im> writes:

> I can see two options:
>
>   1. Refactor these interfaces such that they take a file descriptor as
>      input that they are writing to. This would allow us to exercise
>      that the output is correct.
>
>   2. Rip out this function. I don't think this functionality should be
>      part of the library in the first place, and it really only exists
>      because of "reftable/dump.c".
>
> I think the latter is the better option. The functionality exists to
> drive `cmd__dump_reftable()` in our reftable test helper. We should
> likely make the whole implementation of this an internal implementation
> detail and not expose it.

Thanks for a review.  Are there anything other than removing this
step that this series needs?
diff mbox series

Patch

diff --git a/t/unit-tests/t-reftable-readwrite.c b/t/unit-tests/t-reftable-readwrite.c
index a5462441d3..8c6f2f1f5d 100644
--- a/t/unit-tests/t-reftable-readwrite.c
+++ b/t/unit-tests/t-reftable-readwrite.c
@@ -11,6 +11,8 @@  license that can be found in the LICENSE file or at
 #include "reftable/blocksource.h"
 #include "reftable/reftable-error.h"
 #include "reftable/reftable-writer.h"
+#include "tempfile.h"
+#include "write-or-die.h"
 
 static const int update_index = 5;
 
@@ -25,11 +27,23 @@  static ssize_t strbuf_add_void(void *b, const void *data, size_t sz)
 	return sz;
 }
 
+static ssize_t fd_write(void *b, const void *data, size_t sz)
+{
+	int *fdp = (int *)b;
+	return write_in_full(*fdp, data, sz);
+}
+
 static int noop_flush(void *arg)
 {
 	return 0;
 }
 
+static int fd_flush(void *arg)
+{
+	int *fdp = (int *)arg;
+	return fsync_component(FSYNC_COMPONENT_REFERENCE, *fdp);
+}
+
 static void t_buffer(void)
 {
 	struct strbuf buf = STRBUF_INIT;
@@ -944,6 +958,66 @@  static void t_corrupt_table(void)
 	strbuf_release(&buf);
 }
 
+static void t_table_print(void)
+{
+	char name[100];
+	struct reftable_write_options opts = {
+		.block_size = 512,
+		.hash_id = GIT_SHA1_FORMAT_ID,
+	};
+	struct reftable_ref_record ref = { 0 };
+	struct reftable_log_record log = { 0 };
+	struct reftable_writer *w = NULL;
+	struct tempfile *tmp = NULL;
+	size_t i, N = 3;
+	int n, fd;
+
+	xsnprintf(name, sizeof(name), "t-reftable-readwrite-%d-XXXXXX", __LINE__);
+	tmp = mks_tempfile_t(name);
+	fd = get_tempfile_fd(tmp);
+	w = reftable_new_writer(&fd_write, &fd_flush, &fd, &opts);
+	reftable_writer_set_limits(w, 0, update_index);
+
+	for (i = 0; i < N; i++) {
+		xsnprintf(name, sizeof(name), "refs/heads/branch%02"PRIuMAX, (uintmax_t)i);
+		ref.refname = name;
+		ref.update_index = i;
+		ref.value_type = REFTABLE_REF_VAL1;
+		set_test_hash(ref.value.val1, i);
+
+		n = reftable_writer_add_ref(w, &ref);
+		check_int(n, ==, 0);
+	}
+
+	for (i = 0; i < N; i++) {
+		xsnprintf(name, sizeof(name), "refs/heads/branch%02"PRIuMAX, (uintmax_t)i);
+		log.refname = name;
+		log.update_index = i;
+		log.value_type = REFTABLE_LOG_UPDATE;
+		set_test_hash(log.value.update.new_hash, i);
+		log.value.update.name = (char *) "John Doe";
+		log.value.update.email = (char *) "johndoe@anon.org";
+		log.value.update.time = 0x6673e5b9;
+		log.value.update.message = (char *) "message";
+
+		n = reftable_writer_add_log(w, &log);
+		check_int(n, ==, 0);
+	}
+
+	n = reftable_writer_close(w);
+	check_int(n, ==, 0);
+
+	test_msg("testing printing functionality:");
+	n = reftable_reader_print_file(tmp->filename.buf);
+	check_int(n, ==, 0);
+	n = reftable_reader_print_blocks(tmp->filename.buf);
+	/* end of blocks is denoted by a return value of 1 */
+	check_int(n, ==, 1);
+
+	delete_tempfile(&tmp);
+	reftable_writer_free(w);
+}
+
 int cmd_main(int argc, const char *argv[])
 {
 	TEST(t_buffer(), "strbuf works as blocksource");
@@ -953,6 +1027,7 @@  int cmd_main(int argc, const char *argv[])
 	TEST(t_log_overflow(), "log overflow returns expected error");
 	TEST(t_log_write_read(), "read-write on log records");
 	TEST(t_log_zlib_corruption(), "reading corrupted log record returns expected error");
+	TEST(t_table_print(), "print tables and blocks");
 	TEST(t_table_read_api(), "read on a table");
 	TEST(t_table_read_write_seek_index(), "read-write on a table with index");
 	TEST(t_table_read_write_seek_linear(), "read-write on a table without index (SHA1)");