diff mbox series

[bpf-next,v3,1/5] selftests/bpf: specify expected instructions in test_verifier tests

Message ID 20220603141047.2163170-2-eddyz87@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf_loop inlining | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 8 maintainers not CCed: netdev@vger.kernel.org songliubraving@fb.com linux-kselftest@vger.kernel.org yhs@fb.com john.fastabend@gmail.com kafai@fb.com shuah@kernel.org kpsingh@kernel.org
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Kernel LATEST on z15 with gcc
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for Kernel LATEST on ubuntu-latest with gcc
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Kernel LATEST on ubuntu-latest with llvm-15

Commit Message

Eduard Zingerman June 3, 2022, 2:10 p.m. UTC
Allows to specify expected and unexpected instruction sequences in
test_verifier test cases. The instructions are requested from kernel
after BPF program loading, thus allowing to check some of the
transformations applied by BPF verifier.

- `expected_insn` field specifies a sequence of instructions expected
  to be found in the program;
- `unexpected_insn` field specifies a sequence of instructions that
  are not expected to be found in the program;
- `INSN_OFF_MASK` and `INSN_IMM_MASK` values could be used to mask
  `off` and `imm` fields.
- `SKIP_INSNS` could be used to specify that some instructions in the
  (un)expected pattern are not important (behavior similar to usage of
  `\t` in `errstr` field).

The intended usage is as follows:

  {
	"inline simple bpf_loop call",
	.insns = {
	/* main */
	BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
	BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2,
			BPF_PSEUDO_FUNC, 0, 6),
    ...
	BPF_EXIT_INSN(),
	/* callback */
	BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
	BPF_EXIT_INSN(),
	},
	.expected_insns = {
	BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
	SKIP_INSNS(),
	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_CALL, 8, 1)
	},
	.unexpected_insns = {
	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0,
			INSN_OFF_MASK, INSN_IMM_MASK),
	},
	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
	.result = ACCEPT,
	.runs = 0,
  },

Here it is expected that move of 1 to register 1 would remain in place
and helper function call instruction would be replaced by a relative
call instruction.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
---
 tools/testing/selftests/bpf/test_verifier.c | 222 ++++++++++++++++++++
 1 file changed, 222 insertions(+)

Comments

Song Liu June 3, 2022, 9:31 p.m. UTC | #1
On Fri, Jun 3, 2022 at 7:11 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
[...]

> Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
> ---
>  tools/testing/selftests/bpf/test_verifier.c | 222 ++++++++++++++++++++
>  1 file changed, 222 insertions(+)
>
> diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
> index 372579c9f45e..373f7661f4d0 100644
> --- a/tools/testing/selftests/bpf/test_verifier.c
> +++ b/tools/testing/selftests/bpf/test_verifier.c
> @@ -51,6 +51,8 @@
>  #endif
>
>  #define MAX_INSNS      BPF_MAXINSNS
> +#define MAX_EXPECTED_INSNS     32
> +#define MAX_UNEXPECTED_INSNS   32
>  #define MAX_TEST_INSNS 1000000
>  #define MAX_FIXUPS     8
>  #define MAX_NR_MAPS    23
> @@ -58,6 +60,10 @@
>  #define POINTER_VALUE  0xcafe4all
>  #define TEST_DATA_LEN  64
>
> +#define INSN_OFF_MASK  ((s16)0xFFFF)
> +#define INSN_IMM_MASK  ((s32)0xFFFFFFFF)

Shall we use __s16 and __s32 to match struct bpf_insn exactly.

> +#define SKIP_INSNS()   BPF_RAW_INSN(0xde, 0xa, 0xd, 0xbeef, 0xdeadbeef)
> +
>  #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS     (1 << 0)
>  #define F_LOAD_WITH_STRICT_ALIGNMENT           (1 << 1)
>
> @@ -79,6 +85,19 @@ struct bpf_test {
>         const char *descr;
>         struct bpf_insn insns[MAX_INSNS];
>         struct bpf_insn *fill_insns;
> +       /* If specified, test engine looks for this sequence of
> +        * instructions in the BPF program after loading. Allows to
> +        * test rewrites applied by verifier.  Use values
> +        * INSN_OFF_MASK and INSN_IMM_MASK to mask `off` and `imm`
> +        * fields if content does not matter.  The test case fails if
> +        * specified instructions are not found.
> +        *
> +        * The sequence could be split into sub-sequences by adding
> +        * SKIP_INSNS instruction at the end of each sub-sequence. In
> +        * such case sub-sequences are searched for one after another.
> +        */
> +       struct bpf_insn expected_insns[MAX_EXPECTED_INSNS];
> +       struct bpf_insn unexpected_insns[MAX_UNEXPECTED_INSNS];
>         int fixup_map_hash_8b[MAX_FIXUPS];
>         int fixup_map_hash_48b[MAX_FIXUPS];
>         int fixup_map_hash_16b[MAX_FIXUPS];
> @@ -1126,6 +1145,206 @@ static bool cmp_str_seq(const char *log, const char *exp)
>         return true;
>  }
>
> +static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt)
> +{
> +       struct bpf_prog_info info = {};
> +       __u32 info_len = sizeof(info);
> +       __u32 xlated_prog_len;
> +       __u32 buf_elt_size = sizeof(**buf);

I guess elt means "element"? I would recommend use sizeof(struct bpf_insn)
directly.

[...]

> +static int null_terminated_insn_len(struct bpf_insn *seq, int max_len)
> +{
> +       for (int i = 0; i < max_len; ++i) {

Sorry for missing this in v1. We should really pull variable
declaration out, like

int i;

for (int i = 0; ...)

> +               if (is_null_insn(&seq[i]))
> +                       return i;
> +       }
> +       return max_len;
> +}
> +
[...]

> +
> +static int find_insn_subseq(struct bpf_insn *seq, struct bpf_insn *subseq,
> +                           int seq_len, int subseq_len)
> +{
> +       if (subseq_len > seq_len)
> +               return -1;
> +
> +       for (int i = 0; i < seq_len - subseq_len + 1; ++i) {
> +               bool found = true;
> +
> +               for (int j = 0; j < subseq_len; ++j) {
> +                       if (!compare_masked_insn(&seq[i + j], &subseq[j])) {
> +                               found = false;
> +                               break;
> +                       }
> +               }
> +               if (found)
> +                       return i;
> +       }
> +
> +       return -1;
> +}
> +

[...]

> +
> +static bool check_xlated_program(struct bpf_test *test, int fd_prog)
> +{
> +       struct bpf_insn *buf;
> +       int cnt;
> +       bool result = true;
> +       bool check_expected = !is_null_insn(test->expected_insns);
> +       bool check_unexpected = !is_null_insn(test->unexpected_insns);
> +
> +       if (!check_expected && !check_unexpected)
> +               goto out;
> +
> +       if (get_xlated_program(fd_prog, &buf, &cnt)) {
> +               printf("FAIL: can't get xlated program\n");
> +               result = false;
> +               goto out;
> +       }
> +
> +       if (check_expected &&
> +           !find_all_insn_subseqs(buf, test->expected_insns,
> +                                  cnt, MAX_EXPECTED_INSNS)) {
> +               printf("FAIL: can't find expected subsequence of instructions\n");
> +               result = false;
> +               if (verbose) {
> +                       printf("Program:\n");
> +                       print_insn(buf, cnt);
> +                       printf("Expected subsequence:\n");
> +                       print_insn(test->expected_insns, MAX_EXPECTED_INSNS);
> +               }
> +       }
> +
> +       if (check_unexpected &&
> +           find_all_insn_subseqs(buf, test->unexpected_insns,
> +                                 cnt, MAX_UNEXPECTED_INSNS)) {

I wonder whether we want different logic for unexpected_insns. With multiple
sub sequences, say seq-A and seq-B, it is more natural to reject any results
with either seq-A or seq-B. However, current logic will reject seq-A => seq-B,
but will accept seq-B => seq-A. Does this make sense?

> +               printf("FAIL: found unexpected subsequence of instructions\n");
> +               result = false;
> +               if (verbose) {
> +                       printf("Program:\n");
> +                       print_insn(buf, cnt);
> +                       printf("Un-expected subsequence:\n");
> +                       print_insn(test->unexpected_insns, MAX_UNEXPECTED_INSNS);
> +               }
> +       }
> +
> +       free(buf);
> + out:
> +       return result;
> +}
> +
>  static void do_test_single(struct bpf_test *test, bool unpriv,
>                            int *passes, int *errors)
>  {
> @@ -1262,6 +1481,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
>         if (verbose)
>                 printf(", verifier log:\n%s", bpf_vlog);
>
> +       if (!check_xlated_program(test, fd_prog))
> +               goto fail_log;
> +
>         run_errs = 0;
>         run_successes = 0;
>         if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) {
> --
> 2.25.1
>
Eduard Zingerman June 3, 2022, 10:08 p.m. UTC | #2
> On Fri, 2022-06-03 at 14:31 -0700, Song Liu wrote:
> > On Fri, Jun 3, 2022 at 7:11 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
> 
> > +#define INSN_OFF_MASK  ((s16)0xFFFF)
> > +#define INSN_IMM_MASK  ((s32)0xFFFFFFFF)
> 
> Shall we use __s16 and __s32 to match struct bpf_insn exactly.

Will do.

[...]

> > +       __u32 buf_elt_size = sizeof(**buf);
> 
> I guess elt means "element"? I would recommend use sizeof(struct bpf_insn)
> directly.

Will do.

[...]

> > +static int null_terminated_insn_len(struct bpf_insn *seq, int max_len)
> > +{
> > +       for (int i = 0; i < max_len; ++i) {
> 
> Sorry for missing this in v1. We should really pull variable
> declaration out, like
> 
> int i;
> 
> for (int i = 0; ...)

Sorry, just to clarify, you want me to pull all loop counter
declarations to the top of the relevant functions, right? (Affecting 4
functions in this patch).

[...]

> > +static int find_insn_subseq(struct bpf_insn *seq, struct bpf_insn *subseq,
> > +       if (check_unexpected &&
> > +           find_all_insn_subseqs(buf, test->unexpected_insns,
> > +                                 cnt, MAX_UNEXPECTED_INSNS)) {
> 
> I wonder whether we want different logic for unexpected_insns. With multiple
> sub sequences, say seq-A and seq-B, it is more natural to reject any results
> with either seq-A or seq-B. However, current logic will reject seq-A => seq-B,
> but will accept seq-B => seq-A. Does this make sense?

Have no strong opinion on this topic. In theory different variants
might be useful in different cases.

In the test cases for bpf_loop inlining I only had to match a single
unexpected instruction, so I opted to use same match function in both
expected and unexpected cases to keep things simple.

One thought that I had was that struct bpf_test might be extended in
the future as follows:

#define MAX_PATTERNS 4
...
struct bpf_test {
	...
	struct bpf_insn unexpected_insns[MAX_UNEXPECTED_INSNS][MAX_PATTERNS];
	...
}

Where each pattern follows logic "seq-A => seq-B", but patterns are
matched independently. So if the goal is to match either "seq-A" or
"seq-B" these should be specified as separate patterns. However, this
seems to be an overkill for the problem at hand.
Song Liu June 3, 2022, 11:01 p.m. UTC | #3
On Fri, Jun 3, 2022 at 3:08 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
[...]

> >
> > for (int i = 0; ...)
>
> Sorry, just to clarify, you want me to pull all loop counter
> declarations to the top of the relevant functions, right? (Affecting 4
> functions in this patch).

Right. There are a few more in later patches.

>
> [...]
>
> > > +static int find_insn_subseq(struct bpf_insn *seq, struct bpf_insn *subseq,
> > > +       if (check_unexpected &&
> > > +           find_all_insn_subseqs(buf, test->unexpected_insns,
> > > +                                 cnt, MAX_UNEXPECTED_INSNS)) {
> >
> > I wonder whether we want different logic for unexpected_insns. With multiple
> > sub sequences, say seq-A and seq-B, it is more natural to reject any results
> > with either seq-A or seq-B. However, current logic will reject seq-A => seq-B,
> > but will accept seq-B => seq-A. Does this make sense?
>
> Have no strong opinion on this topic. In theory different variants
> might be useful in different cases.
>
> In the test cases for bpf_loop inlining I only had to match a single
> unexpected instruction, so I opted to use same match function in both
> expected and unexpected cases to keep things simple.

I think we can wait until we have an actual use case for multiple seq.
For now, let's keep current logic and document this clearly in struct
bpf_test.

Thanks,
Song
[...]
Eduard Zingerman June 4, 2022, 12:53 p.m. UTC | #4
Hi Song,

> Right. There are a few more in later patches.

Understood, thanks for the review, I'll apply the suggested fixes.

Thanks,
Eduard
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 372579c9f45e..373f7661f4d0 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -51,6 +51,8 @@ 
 #endif
 
 #define MAX_INSNS	BPF_MAXINSNS
+#define MAX_EXPECTED_INSNS	32
+#define MAX_UNEXPECTED_INSNS	32
 #define MAX_TEST_INSNS	1000000
 #define MAX_FIXUPS	8
 #define MAX_NR_MAPS	23
@@ -58,6 +60,10 @@ 
 #define POINTER_VALUE	0xcafe4all
 #define TEST_DATA_LEN	64
 
+#define INSN_OFF_MASK	((s16)0xFFFF)
+#define INSN_IMM_MASK	((s32)0xFFFFFFFF)
+#define SKIP_INSNS()	BPF_RAW_INSN(0xde, 0xa, 0xd, 0xbeef, 0xdeadbeef)
+
 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS	(1 << 0)
 #define F_LOAD_WITH_STRICT_ALIGNMENT		(1 << 1)
 
@@ -79,6 +85,19 @@  struct bpf_test {
 	const char *descr;
 	struct bpf_insn	insns[MAX_INSNS];
 	struct bpf_insn	*fill_insns;
+	/* If specified, test engine looks for this sequence of
+	 * instructions in the BPF program after loading. Allows to
+	 * test rewrites applied by verifier.  Use values
+	 * INSN_OFF_MASK and INSN_IMM_MASK to mask `off` and `imm`
+	 * fields if content does not matter.  The test case fails if
+	 * specified instructions are not found.
+	 *
+	 * The sequence could be split into sub-sequences by adding
+	 * SKIP_INSNS instruction at the end of each sub-sequence. In
+	 * such case sub-sequences are searched for one after another.
+	 */
+	struct bpf_insn expected_insns[MAX_EXPECTED_INSNS];
+	struct bpf_insn unexpected_insns[MAX_UNEXPECTED_INSNS];
 	int fixup_map_hash_8b[MAX_FIXUPS];
 	int fixup_map_hash_48b[MAX_FIXUPS];
 	int fixup_map_hash_16b[MAX_FIXUPS];
@@ -1126,6 +1145,206 @@  static bool cmp_str_seq(const char *log, const char *exp)
 	return true;
 }
 
+static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt)
+{
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
+	__u32 xlated_prog_len;
+	__u32 buf_elt_size = sizeof(**buf);
+
+	if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) {
+		perror("bpf_obj_get_info_by_fd failed");
+		return -1;
+	}
+
+	xlated_prog_len = info.xlated_prog_len;
+	if (xlated_prog_len % buf_elt_size) {
+		printf("Program length %d is not multiple of %d\n",
+		       xlated_prog_len, buf_elt_size);
+		return -1;
+	}
+
+	*cnt = xlated_prog_len / buf_elt_size;
+	*buf = calloc(*cnt, buf_elt_size);
+	if (!buf) {
+		perror("can't allocate xlated program buffer");
+		return -ENOMEM;
+	}
+
+	bzero(&info, sizeof(info));
+	info.xlated_prog_len = xlated_prog_len;
+	info.xlated_prog_insns = (__u64)*buf;
+	if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) {
+		perror("second bpf_obj_get_info_by_fd failed");
+		goto out_free_buf;
+	}
+
+	return 0;
+
+out_free_buf:
+	free(*buf);
+	return -1;
+}
+
+static bool is_null_insn(struct bpf_insn *insn)
+{
+	struct bpf_insn null_insn = {};
+
+	return memcmp(insn, &null_insn, sizeof(null_insn)) == 0;
+}
+
+static bool is_skip_insn(struct bpf_insn *insn)
+{
+	struct bpf_insn skip_insn = SKIP_INSNS();
+
+	return memcmp(insn, &skip_insn, sizeof(skip_insn)) == 0;
+}
+
+static int null_terminated_insn_len(struct bpf_insn *seq, int max_len)
+{
+	for (int i = 0; i < max_len; ++i) {
+		if (is_null_insn(&seq[i]))
+			return i;
+	}
+	return max_len;
+}
+
+static bool compare_masked_insn(struct bpf_insn *orig, struct bpf_insn *masked)
+{
+	struct bpf_insn orig_masked;
+
+	memcpy(&orig_masked, orig, sizeof(orig_masked));
+	if (masked->imm == INSN_IMM_MASK)
+		orig_masked.imm = INSN_IMM_MASK;
+	if (masked->off == INSN_OFF_MASK)
+		orig_masked.off = INSN_OFF_MASK;
+
+	return memcmp(&orig_masked, masked, sizeof(orig_masked)) == 0;
+}
+
+static int find_insn_subseq(struct bpf_insn *seq, struct bpf_insn *subseq,
+			    int seq_len, int subseq_len)
+{
+	if (subseq_len > seq_len)
+		return -1;
+
+	for (int i = 0; i < seq_len - subseq_len + 1; ++i) {
+		bool found = true;
+
+		for (int j = 0; j < subseq_len; ++j) {
+			if (!compare_masked_insn(&seq[i + j], &subseq[j])) {
+				found = false;
+				break;
+			}
+		}
+		if (found)
+			return i;
+	}
+
+	return -1;
+}
+
+static int find_skip_insn_marker(struct bpf_insn *seq, int len)
+{
+	for (int i = 0; i < len; ++i)
+		if (is_skip_insn(&seq[i]))
+			return i;
+
+	return -1;
+}
+
+/* Return true if all sub-sequences in `subseqs` could be found in
+ * `seq` one after another. Sub-sequences are separated by a single
+ * nil instruction.
+ */
+static bool find_all_insn_subseqs(struct bpf_insn *seq, struct bpf_insn *subseqs,
+				  int seq_len, int max_subseqs_len)
+{
+	int subseqs_len = null_terminated_insn_len(subseqs, max_subseqs_len);
+
+	while (subseqs_len > 0) {
+		int skip_idx = find_skip_insn_marker(subseqs, subseqs_len);
+		int cur_subseq_len = skip_idx < 0 ? subseqs_len : skip_idx;
+		int subseq_idx = find_insn_subseq(seq, subseqs,
+						  seq_len, cur_subseq_len);
+
+		if (subseq_idx < 0)
+			return false;
+		seq += subseq_idx + cur_subseq_len;
+		seq_len -= subseq_idx + cur_subseq_len;
+		subseqs += cur_subseq_len + 1;
+		subseqs_len -= cur_subseq_len + 1;
+	}
+
+	return true;
+}
+
+static void print_insn(struct bpf_insn *buf, int cnt)
+{
+	printf("  addr  op d s off  imm\n");
+	for (int i = 0; i < cnt; ++i) {
+		struct bpf_insn *insn = &buf[i];
+
+		if (is_null_insn(insn))
+			break;
+
+		if (is_skip_insn(insn))
+			printf("  ...\n");
+		else
+			printf("  %04x: %02x %1x %x %04hx %08x\n",
+			       i, insn->code, insn->dst_reg,
+			       insn->src_reg, insn->off, insn->imm);
+	}
+}
+
+static bool check_xlated_program(struct bpf_test *test, int fd_prog)
+{
+	struct bpf_insn *buf;
+	int cnt;
+	bool result = true;
+	bool check_expected = !is_null_insn(test->expected_insns);
+	bool check_unexpected = !is_null_insn(test->unexpected_insns);
+
+	if (!check_expected && !check_unexpected)
+		goto out;
+
+	if (get_xlated_program(fd_prog, &buf, &cnt)) {
+		printf("FAIL: can't get xlated program\n");
+		result = false;
+		goto out;
+	}
+
+	if (check_expected &&
+	    !find_all_insn_subseqs(buf, test->expected_insns,
+				   cnt, MAX_EXPECTED_INSNS)) {
+		printf("FAIL: can't find expected subsequence of instructions\n");
+		result = false;
+		if (verbose) {
+			printf("Program:\n");
+			print_insn(buf, cnt);
+			printf("Expected subsequence:\n");
+			print_insn(test->expected_insns, MAX_EXPECTED_INSNS);
+		}
+	}
+
+	if (check_unexpected &&
+	    find_all_insn_subseqs(buf, test->unexpected_insns,
+				  cnt, MAX_UNEXPECTED_INSNS)) {
+		printf("FAIL: found unexpected subsequence of instructions\n");
+		result = false;
+		if (verbose) {
+			printf("Program:\n");
+			print_insn(buf, cnt);
+			printf("Un-expected subsequence:\n");
+			print_insn(test->unexpected_insns, MAX_UNEXPECTED_INSNS);
+		}
+	}
+
+	free(buf);
+ out:
+	return result;
+}
+
 static void do_test_single(struct bpf_test *test, bool unpriv,
 			   int *passes, int *errors)
 {
@@ -1262,6 +1481,9 @@  static void do_test_single(struct bpf_test *test, bool unpriv,
 	if (verbose)
 		printf(", verifier log:\n%s", bpf_vlog);
 
+	if (!check_xlated_program(test, fd_prog))
+		goto fail_log;
+
 	run_errs = 0;
 	run_successes = 0;
 	if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) {