diff mbox series

[mptcp-next,v2,3/5] Squash to "mptcp: add get_subflow wrappers"

Message ID e0034f846f86901bd0c996bca6693c4036602750.1653305364.git.geliang.tang@suse.com (mailing list archive)
State Superseded, archived
Headers show
Series BPF packet scheduler | expand

Checks

Context Check Description
matttbe/build fail Build error with: -Werror
matttbe/checkpatch success total: 0 errors, 0 warnings, 0 checks, 76 lines checked
matttbe/KVM_Validation__normal warning Unstable: 1 failed test(s): selftest_mptcp_join

Commit Message

Geliang Tang May 23, 2022, 11:33 a.m. UTC
Please update the commit log:

'''
This patch defines two new wrappers mptcp_sched_get_send() and
mptcp_sched_get_retrans(), invoke get_subflow() of msk->sched in them.
Use them instead of using mptcp_subflow_get_send() or
mptcp_subflow_get_retrans() directly.

Set the subflow pointers array in struct mptcp_sched_data before invoking
get_subflow(), then it can be used in get_subflow() in the BPF contexts.

Get the return bitmap of get_subflow() and test which subflow or subflows
are picked by the scheduler.
'''

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 net/mptcp/sched.c | 47 +++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 8 deletions(-)

Comments

Mat Martineau May 24, 2022, 1:01 a.m. UTC | #1
On Mon, 23 May 2022, Geliang Tang wrote:

> Please update the commit log:
>
> '''
> This patch defines two new wrappers mptcp_sched_get_send() and
> mptcp_sched_get_retrans(), invoke get_subflow() of msk->sched in them.
> Use them instead of using mptcp_subflow_get_send() or
> mptcp_subflow_get_retrans() directly.
>
> Set the subflow pointers array in struct mptcp_sched_data before invoking
> get_subflow(), then it can be used in get_subflow() in the BPF contexts.
>
> Get the return bitmap of get_subflow() and test which subflow or subflows
> are picked by the scheduler.
> '''
>
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> net/mptcp/sched.c | 47 +++++++++++++++++++++++++++++++++++++++--------
> 1 file changed, 39 insertions(+), 8 deletions(-)
>
> diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
> index 3ceb721e6489..0ef805c489ab 100644
> --- a/net/mptcp/sched.c
> +++ b/net/mptcp/sched.c
> @@ -91,8 +91,19 @@ void mptcp_release_sched(struct mptcp_sock *msk)
> static int mptcp_sched_data_init(struct mptcp_sock *msk,
> 				 struct mptcp_sched_data *data)
> {
> -	data->sock = NULL;
> -	data->call_again = 0;
> +	struct mptcp_subflow_context *subflow;
> +	int i = 0;
> +
> +	mptcp_for_each_subflow(msk, subflow) {
> +		if (i == MPTCP_SUBFLOWS_MAX) {
> +			pr_warn_once("too many subflows");
> +			break;
> +		}
> +		data->contexts[i++] = subflow;
> +	}
> +
> +	for (; i < MPTCP_SUBFLOWS_MAX; i++)
> +		data->contexts[i++] = NULL;
>
> 	return 0;
> }
> @@ -100,6 +111,9 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk,
> struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> {
> 	struct mptcp_sched_data data;
> +	struct sock *ssk = NULL;
> +	unsigned long bitmap;
> +	int i;
>
> 	sock_owned_by_me((struct sock *)msk);
>
> @@ -114,15 +128,25 @@ struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> 		return mptcp_subflow_get_send(msk);
>
> 	mptcp_sched_data_init(msk, &data);
> -	msk->sched->get_subflow(msk, false, &data);
> +	bitmap = msk->sched->get_subflow(msk, false, &data);
>
> -	msk->last_snd = data.sock;
> -	return data.sock;
> +	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
> +		if (test_bit(i, &bitmap) && data.contexts[i]) {
> +			ssk = data.contexts[i]->tcp_sock;
> +			msk->last_snd = ssk;
> +			break;
> +		}
> +	}
> +
> +	return ssk;

The commit that this gets squashed too also ignores call_again, so is this 
code that just returns the ssk for the first bit in the bitmap also 
placeholder code?


It also seems like correlate the bitmap bits with the data.contexts array 
makes the bitmap require extra work. What do you think about using an 
array instead, like:

struct mptcp_sched_data {
        struct mptcp_subflow_context *context;
        bool is_scheduled;
};

And passing an array of that struct to the BPF code? Then the is_scheduled 
flag could be set for the corresponding subflow.

Do you think that array-based API would be clearer than the bitmap to 
someone writing a BPF scheduler?


- Mat


> }
>
> struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
> {
> 	struct mptcp_sched_data data;
> +	struct sock *ssk = NULL;
> +	unsigned long bitmap;
> +	int i;
>
> 	sock_owned_by_me((const struct sock *)msk);
>
> @@ -134,8 +158,15 @@ struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
> 		return mptcp_subflow_get_retrans(msk);
>
> 	mptcp_sched_data_init(msk, &data);
> -	msk->sched->get_subflow(msk, true, &data);
> +	bitmap = msk->sched->get_subflow(msk, true, &data);
> +
> +	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
> +		if (test_bit(i, &bitmap) && data.contexts[i]) {
> +			ssk = data.contexts[i]->tcp_sock;
> +			msk->last_snd = ssk;
> +			break;
> +		}
> +	}
>
> -	msk->last_snd = data.sock;
> -	return data.sock;
> +	return ssk;
> }
> -- 
> 2.34.1
>
>
>

--
Mat Martineau
Intel
Geliang Tang May 26, 2022, 12:18 p.m. UTC | #2
Hi Mat,

On Mon, May 23, 2022 at 06:01:03PM -0700, Mat Martineau wrote:
> On Mon, 23 May 2022, Geliang Tang wrote:
> 
> > Please update the commit log:
> > 
> > '''
> > This patch defines two new wrappers mptcp_sched_get_send() and
> > mptcp_sched_get_retrans(), invoke get_subflow() of msk->sched in them.
> > Use them instead of using mptcp_subflow_get_send() or
> > mptcp_subflow_get_retrans() directly.
> > 
> > Set the subflow pointers array in struct mptcp_sched_data before invoking
> > get_subflow(), then it can be used in get_subflow() in the BPF contexts.
> > 
> > Get the return bitmap of get_subflow() and test which subflow or subflows
> > are picked by the scheduler.
> > '''
> > 
> > Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> > ---
> > net/mptcp/sched.c | 47 +++++++++++++++++++++++++++++++++++++++--------
> > 1 file changed, 39 insertions(+), 8 deletions(-)
> > 
> > diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
> > index 3ceb721e6489..0ef805c489ab 100644
> > --- a/net/mptcp/sched.c
> > +++ b/net/mptcp/sched.c
> > @@ -91,8 +91,19 @@ void mptcp_release_sched(struct mptcp_sock *msk)
> > static int mptcp_sched_data_init(struct mptcp_sock *msk,
> > 				 struct mptcp_sched_data *data)
> > {
> > -	data->sock = NULL;
> > -	data->call_again = 0;
> > +	struct mptcp_subflow_context *subflow;
> > +	int i = 0;
> > +
> > +	mptcp_for_each_subflow(msk, subflow) {
> > +		if (i == MPTCP_SUBFLOWS_MAX) {
> > +			pr_warn_once("too many subflows");
> > +			break;
> > +		}
> > +		data->contexts[i++] = subflow;
> > +	}
> > +
> > +	for (; i < MPTCP_SUBFLOWS_MAX; i++)
> > +		data->contexts[i++] = NULL;
> > 
> > 	return 0;
> > }
> > @@ -100,6 +111,9 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk,
> > struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> > {
> > 	struct mptcp_sched_data data;
> > +	struct sock *ssk = NULL;
> > +	unsigned long bitmap;
> > +	int i;
> > 
> > 	sock_owned_by_me((struct sock *)msk);
> > 
> > @@ -114,15 +128,25 @@ struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> > 		return mptcp_subflow_get_send(msk);
> > 
> > 	mptcp_sched_data_init(msk, &data);
> > -	msk->sched->get_subflow(msk, false, &data);
> > +	bitmap = msk->sched->get_subflow(msk, false, &data);
> > 
> > -	msk->last_snd = data.sock;
> > -	return data.sock;
> > +	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
> > +		if (test_bit(i, &bitmap) && data.contexts[i]) {
> > +			ssk = data.contexts[i]->tcp_sock;
> > +			msk->last_snd = ssk;
> > +			break;
> > +		}
> > +	}
> > +
> > +	return ssk;
> 
> The commit that this gets squashed too also ignores call_again, so is this
> code that just returns the ssk for the first bit in the bitmap also
> placeholder code?

Yes. Since the redundant scheduler is still under development and there's
still a lot of work to be done, I plan to support single subflow schedulers
in this series first. The multiple subflows schedulers will be added later.

> 
> 
> It also seems like correlate the bitmap bits with the data.contexts array
> makes the bitmap require extra work. What do you think about using an array
> instead, like:
> 
> struct mptcp_sched_data {
>        struct mptcp_subflow_context *context;
>        bool is_scheduled;
> };
> 
> And passing an array of that struct to the BPF code? Then the is_scheduled
> flag could be set for the corresponding subflow.
> 
> Do you think that array-based API would be clearer than the bitmap to
> someone writing a BPF scheduler?

I tried to implement this array-based API, but it's not going well. Array
parameters are not easily supported in BPF functions. And the write access
permissions of array members is not easy to allow in BPF. I haven't found
a solution to these two issues yet. Here are codes and error logs in the
attachment.

Thanks,
-Geliang

> 
> 
> - Mat
> 
> 
> > }
> > 
> > struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
> > {
> > 	struct mptcp_sched_data data;
> > +	struct sock *ssk = NULL;
> > +	unsigned long bitmap;
> > +	int i;
> > 
> > 	sock_owned_by_me((const struct sock *)msk);
> > 
> > @@ -134,8 +158,15 @@ struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
> > 		return mptcp_subflow_get_retrans(msk);
> > 
> > 	mptcp_sched_data_init(msk, &data);
> > -	msk->sched->get_subflow(msk, true, &data);
> > +	bitmap = msk->sched->get_subflow(msk, true, &data);
> > +
> > +	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
> > +		if (test_bit(i, &bitmap) && data.contexts[i]) {
> > +			ssk = data.contexts[i]->tcp_sock;
> > +			msk->last_snd = ssk;
> > +			break;
> > +		}
> > +	}
> > 
> > -	msk->last_snd = data.sock;
> > -	return data.sock;
> > +	return ssk;
> > }
> > -- 
> > 2.34.1
> > 
> > 
> > 
> 
> --
> Mat Martineau
> Intel
>
#106 mptcp:FAIL
test_base:PASS:test__join_cgroup 0 nsec
test_base:PASS:start_server 0 nsec
run_test:PASS:skel_open_load 0 nsec
run_test:PASS:skel_attach 0 nsec
run_test:PASS:bpf_program__fd 0 nsec
run_test:PASS:bpf_map__fd 0 nsec
run_test:PASS:bpf_prog_attach 0 nsec
run_test:PASS:connect to fd 0 nsec
verify_tsk:PASS:bpf_map_lookup_elem 0 nsec
verify_tsk:PASS:unexpected invoked count 0 nsec
verify_tsk:PASS:unexpected is_mptcp 0 nsec
test_base:PASS:run_test tcp 0 nsec
test_base:PASS:start_mptcp_server 0 nsec
run_test:PASS:skel_open_load 0 nsec
run_test:PASS:skel_attach 0 nsec
run_test:PASS:bpf_program__fd 0 nsec
run_test:PASS:bpf_map__fd 0 nsec
run_test:PASS:bpf_prog_attach 0 nsec
run_test:PASS:connect to fd 0 nsec
verify_msk:PASS:invalid token 0 nsec
get_msk_ca_name:PASS:failed to open tcp_congestion_control 0 nsec
get_msk_ca_name:PASS:failed to read ca_name 0 nsec
verify_msk:PASS:bpf_map_lookup_elem 0 nsec
verify_msk:PASS:unexpected invoked count 0 nsec
verify_msk:PASS:unexpected is_mptcp 0 nsec
verify_msk:PASS:unexpected token 0 nsec
verify_msk:PASS:unexpected first 0 nsec
verify_msk:PASS:unexpected ca_name 0 nsec
test_base:PASS:run_test mptcp 0 nsec
#106/1 mptcp/base:OK
test_first:PASS:bpf_first__open_and_load 0 nsec
test_first:PASS:bpf_map__attach_struct_ops 0 nsec
send_data:PASS:pthread_create 0 nsec
server:PASS:send 0 nsec
send_data:PASS:recv 0 nsec
send_data:PASS:pthread_join 0 nsec
#106/2 mptcp/first:OK
libbpf: prog 'bpf_rr_get_subflow': BPF program load failed: Permission denied
libbpf: prog 'bpf_rr_get_subflow': -- BEGIN PROG LOAD LOG --
R1 type=ctx expected=fp
0: R1=ctx(off=0,imm=0) R10=fp0
; void BPF_STRUCT_OPS(bpf_rr_get_subflow, const struct mptcp_sock *msk,
0: (b7) r3 = 0                        ; R3_w=0
; void BPF_STRUCT_OPS(bpf_rr_get_subflow, const struct mptcp_sock *msk,
1: (79) r2 = *(u64 *)(r1 +16)
func 'get_subflow' arg2 has btf_id 27881 type STRUCT 'mptcp_sched_data'
2: R1=ctx(off=0,imm=0) R2_w=ptr_mptcp_sched_data(off=0,imm=0)
2: (79) r1 = *(u64 *)(r1 +0)
func 'get_subflow' arg0 has btf_id 137236 type STRUCT 'mptcp_sock'
3: R1_w=ptr_mptcp_sock(off=0,imm=0)
; if (!msk->last_snd || !contexts[i].context)
3: (79) r4 = *(u64 *)(r1 +1448)       ; R1_w=ptr_mptcp_sock(off=0,imm=0) R4_w=ptr_sock(off=0,imm=0)
; if (!msk->last_snd || !contexts[i].context)
4: (15) if r4 == 0x0 goto pc+44       ; R4_w=ptr_sock(off=0,imm=0)
; if (!msk->last_snd || !contexts[i].context)
5: (79) r5 = *(u64 *)(r2 +0)          ; R2_w=ptr_mptcp_sched_data(off=0,imm=0) R5_w=ptr_mptcp_subflow_context(off=0,imm=0)
; if (!msk->last_snd || !contexts[i].context)
6: (15) if r5 == 0x0 goto pc+42       ; R5_w=ptr_mptcp_subflow_context(off=0,imm=0)
7: (b7) r4 = 1                        ; R4_w=1
; if (contexts[i].context->tcp_sock == msk->last_snd) {
8: (79) r1 = *(u64 *)(r1 +1448)       ; R1_w=ptr_sock(off=0,imm=0)
; if (contexts[i].context->tcp_sock == msk->last_snd) {
9: (79) r5 = *(u64 *)(r5 +176)        ; R5=ptr_sock(off=0,imm=0)
; if (contexts[i].context->tcp_sock == msk->last_snd) {
10: (5d) if r5 != r1 goto pc+8        ; R1=ptr_sock(off=0,imm=0) R5=ptr_sock(off=0,imm=0)
; if (i + 1 == MPTCP_SUBFLOWS_MAX || !contexts[i + 1].context)
11: (bf) r1 = r4                      ; R1_w=1 R4=1
12: (67) r1 <<= 4                     ; R1_w=16
13: (bf) r5 = r2                      ; R2=ptr_mptcp_sched_data(off=0,imm=0) R5_w=ptr_mptcp_sched_data(off=0,imm=0)
14: (0f) r5 += r1                     ; R1_w=P16 R5_w=ptr_mptcp_sched_data(off=16,imm=0)
15: (79) r1 = *(u64 *)(r5 +0)
access beyond struct mptcp_sched_data at off 16 size 8
processed 16 insns (limit 1000000) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 1
-- END PROG LOAD LOG --
libbpf: failed to load program 'bpf_rr_get_subflow'
libbpf: failed to load object 'mptcp_bpf_rr'
libbpf: failed to load BPF skeleton 'mptcp_bpf_rr': -13
test_rr:FAIL:bpf_rr__open_and_load unexpected error: -13
#106/3 mptcp/rr:FAIL

All error logs:
#106 mptcp:FAIL
test_base:PASS:test__join_cgroup 0 nsec
test_base:PASS:start_server 0 nsec
run_test:PASS:skel_open_load 0 nsec
run_test:PASS:skel_attach 0 nsec
run_test:PASS:bpf_program__fd 0 nsec
run_test:PASS:bpf_map__fd 0 nsec
run_test:PASS:bpf_prog_attach 0 nsec
run_test:PASS:connect to fd 0 nsec
verify_tsk:PASS:bpf_map_lookup_elem 0 nsec
verify_tsk:PASS:unexpected invoked count 0 nsec
verify_tsk:PASS:unexpected is_mptcp 0 nsec
test_base:PASS:run_test tcp 0 nsec
test_base:PASS:start_mptcp_server 0 nsec
run_test:PASS:skel_open_load 0 nsec
run_test:PASS:skel_attach 0 nsec
run_test:PASS:bpf_program__fd 0 nsec
run_test:PASS:bpf_map__fd 0 nsec
run_test:PASS:bpf_prog_attach 0 nsec
run_test:PASS:connect to fd 0 nsec
verify_msk:PASS:invalid token 0 nsec
get_msk_ca_name:PASS:failed to open tcp_congestion_control 0 nsec
get_msk_ca_name:PASS:failed to read ca_name 0 nsec
verify_msk:PASS:bpf_map_lookup_elem 0 nsec
verify_msk:PASS:unexpected invoked count 0 nsec
verify_msk:PASS:unexpected is_mptcp 0 nsec
verify_msk:PASS:unexpected token 0 nsec
verify_msk:PASS:unexpected first 0 nsec
verify_msk:PASS:unexpected ca_name 0 nsec
test_base:PASS:run_test mptcp 0 nsec
#106/1 mptcp/base:OK
test_first:PASS:bpf_first__open_and_load 0 nsec
test_first:PASS:bpf_map__attach_struct_ops 0 nsec
send_data:PASS:pthread_create 0 nsec
server:PASS:send 0 nsec
send_data:PASS:recv 0 nsec
send_data:PASS:pthread_join 0 nsec
#106/2 mptcp/first:OK
libbpf: prog 'bpf_rr_get_subflow': BPF program load failed: Permission denied
libbpf: prog 'bpf_rr_get_subflow': -- BEGIN PROG LOAD LOG --
R1 type=ctx expected=fp
0: R1=ctx(off=0,imm=0) R10=fp0
; void BPF_STRUCT_OPS(bpf_rr_get_subflow, const struct mptcp_sock *msk,
0: (b7) r3 = 0                        ; R3_w=0
; void BPF_STRUCT_OPS(bpf_rr_get_subflow, const struct mptcp_sock *msk,
1: (79) r2 = *(u64 *)(r1 +16)
func 'get_subflow' arg2 has btf_id 27881 type STRUCT 'mptcp_sched_data'
2: R1=ctx(off=0,imm=0) R2_w=ptr_mptcp_sched_data(off=0,imm=0)
2: (79) r1 = *(u64 *)(r1 +0)
func 'get_subflow' arg0 has btf_id 137236 type STRUCT 'mptcp_sock'
3: R1_w=ptr_mptcp_sock(off=0,imm=0)
; if (!msk->last_snd || !contexts[i].context)
3: (79) r4 = *(u64 *)(r1 +1448)       ; R1_w=ptr_mptcp_sock(off=0,imm=0) R4_w=ptr_sock(off=0,imm=0)
; if (!msk->last_snd || !contexts[i].context)
4: (15) if r4 == 0x0 goto pc+44       ; R4_w=ptr_sock(off=0,imm=0)
; if (!msk->last_snd || !contexts[i].context)
5: (79) r5 = *(u64 *)(r2 +0)          ; R2_w=ptr_mptcp_sched_data(off=0,imm=0) R5_w=ptr_mptcp_subflow_context(off=0,imm=0)
; if (!msk->last_snd || !contexts[i].context)
6: (15) if r5 == 0x0 goto pc+42       ; R5_w=ptr_mptcp_subflow_context(off=0,imm=0)
7: (b7) r4 = 1                        ; R4_w=1
; if (contexts[i].context->tcp_sock == msk->last_snd) {
8: (79) r1 = *(u64 *)(r1 +1448)       ; R1_w=ptr_sock(off=0,imm=0)
; if (contexts[i].context->tcp_sock == msk->last_snd) {
9: (79) r5 = *(u64 *)(r5 +176)        ; R5=ptr_sock(off=0,imm=0)
; if (contexts[i].context->tcp_sock == msk->last_snd) {
10: (5d) if r5 != r1 goto pc+8        ; R1=ptr_sock(off=0,imm=0) R5=ptr_sock(off=0,imm=0)
; if (i + 1 == MPTCP_SUBFLOWS_MAX || !contexts[i + 1].context)
11: (bf) r1 = r4                      ; R1_w=1 R4=1
12: (67) r1 <<= 4                     ; R1_w=16
13: (bf) r5 = r2                      ; R2=ptr_mptcp_sched_data(off=0,imm=0) R5_w=ptr_mptcp_sched_data(off=0,imm=0)
14: (0f) r5 += r1                     ; R1_w=P16 R5_w=ptr_mptcp_sched_data(off=16,imm=0)
15: (79) r1 = *(u64 *)(r5 +0)
access beyond struct mptcp_sched_data at off 16 size 8
processed 16 insns (limit 1000000) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 1
-- END PROG LOAD LOG --
libbpf: failed to load program 'bpf_rr_get_subflow'
libbpf: failed to load object 'mptcp_bpf_rr'
libbpf: failed to load BPF skeleton 'mptcp_bpf_rr': -13
test_rr:FAIL:bpf_rr__open_and_load unexpected error: -13
#106/3 mptcp/rr:FAIL
Summary: 0/2 PASSED, 0 SKIPPED, 1 FAILED
Mat Martineau May 26, 2022, 11:48 p.m. UTC | #3
On Thu, 26 May 2022, Geliang Tang wrote:

> Hi Mat,
>
> On Mon, May 23, 2022 at 06:01:03PM -0700, Mat Martineau wrote:
>> On Mon, 23 May 2022, Geliang Tang wrote:
>>
>>> Please update the commit log:
>>>
>>> '''
>>> This patch defines two new wrappers mptcp_sched_get_send() and
>>> mptcp_sched_get_retrans(), invoke get_subflow() of msk->sched in them.
>>> Use them instead of using mptcp_subflow_get_send() or
>>> mptcp_subflow_get_retrans() directly.
>>>
>>> Set the subflow pointers array in struct mptcp_sched_data before invoking
>>> get_subflow(), then it can be used in get_subflow() in the BPF contexts.
>>>
>>> Get the return bitmap of get_subflow() and test which subflow or subflows
>>> are picked by the scheduler.
>>> '''
>>>
>>> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
>>> ---
>>> net/mptcp/sched.c | 47 +++++++++++++++++++++++++++++++++++++++--------
>>> 1 file changed, 39 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
>>> index 3ceb721e6489..0ef805c489ab 100644
>>> --- a/net/mptcp/sched.c
>>> +++ b/net/mptcp/sched.c
>>> @@ -91,8 +91,19 @@ void mptcp_release_sched(struct mptcp_sock *msk)
>>> static int mptcp_sched_data_init(struct mptcp_sock *msk,
>>> 				 struct mptcp_sched_data *data)
>>> {
>>> -	data->sock = NULL;
>>> -	data->call_again = 0;
>>> +	struct mptcp_subflow_context *subflow;
>>> +	int i = 0;
>>> +
>>> +	mptcp_for_each_subflow(msk, subflow) {
>>> +		if (i == MPTCP_SUBFLOWS_MAX) {
>>> +			pr_warn_once("too many subflows");
>>> +			break;
>>> +		}
>>> +		data->contexts[i++] = subflow;
>>> +	}
>>> +
>>> +	for (; i < MPTCP_SUBFLOWS_MAX; i++)
>>> +		data->contexts[i++] = NULL;
>>>
>>> 	return 0;
>>> }
>>> @@ -100,6 +111,9 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk,
>>> struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
>>> {
>>> 	struct mptcp_sched_data data;
>>> +	struct sock *ssk = NULL;
>>> +	unsigned long bitmap;
>>> +	int i;
>>>
>>> 	sock_owned_by_me((struct sock *)msk);
>>>
>>> @@ -114,15 +128,25 @@ struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
>>> 		return mptcp_subflow_get_send(msk);
>>>
>>> 	mptcp_sched_data_init(msk, &data);
>>> -	msk->sched->get_subflow(msk, false, &data);
>>> +	bitmap = msk->sched->get_subflow(msk, false, &data);
>>>
>>> -	msk->last_snd = data.sock;
>>> -	return data.sock;
>>> +	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
>>> +		if (test_bit(i, &bitmap) && data.contexts[i]) {
>>> +			ssk = data.contexts[i]->tcp_sock;
>>> +			msk->last_snd = ssk;
>>> +			break;
>>> +		}
>>> +	}
>>> +
>>> +	return ssk;
>>
>> The commit that this gets squashed too also ignores call_again, so is this
>> code that just returns the ssk for the first bit in the bitmap also
>> placeholder code?
>
> Yes. Since the redundant scheduler is still under development and there's
> still a lot of work to be done, I plan to support single subflow schedulers
> in this series first. The multiple subflows schedulers will be added later.
>
>>
>>
>> It also seems like correlate the bitmap bits with the data.contexts array
>> makes the bitmap require extra work. What do you think about using an array
>> instead, like:
>>
>> struct mptcp_sched_data {
>>        struct mptcp_subflow_context *context;
>>        bool is_scheduled;
>> };
>>
>> And passing an array of that struct to the BPF code? Then the is_scheduled
>> flag could be set for the corresponding subflow.
>>
>> Do you think that array-based API would be clearer than the bitmap to
>> someone writing a BPF scheduler?
>
> I tried to implement this array-based API, but it's not going well. Array
> parameters are not easily supported in BPF functions. And the write access
> permissions of array members is not easy to allow in BPF. I haven't found
> a solution to these two issues yet. Here are codes and error logs in the
> attachment.
>

Yeah, after looking at your logs and trying a few experiments, I 
definitely agree that array parameters are not well supported by the BPF 
verifies.

It looks like the bpf verifier was inspecting the args for get_subflow in 
mptcp_sched_ops:

void (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
 		    struct mptcp_sched_data contexts[]);

and thinking 'contexts' was a pointer to a single struct mptcp_sched_data 
instance, instead of an array. The verifier can't guarantee safe access 
for a variable-length array so that does make some sense.

I tried changing the code to:

void (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
                     struct mptcp_sched_data (*contexts)[MPTCP_SUBFLOWS_MAX]);

so the third arg was a "pointer to array of structs, with 
MPTCP_SUBFLOWS_MAX elements in the array". The verifier didn't like that 
either:

"""
func 'get_subflow' arg2 type ARRAY is not a struct
"""

That error message is printed by btf_ctx_access(). It might be possible to 
customize bpf_mptcp_sched_verifier_ops to handle that, but it seems 
complicated.


We could instead use mptcp_sched_data to contain all the parameters 
(including an array of structs):

struct mptcp_sched_subflow {
 	struct mptcp_subflow_context *context;
 	bool is_scheduled;
};

struct mptcp_sched_data {
 	/* Moving the msk and reinject args here is optional, but
 	 * it seemed like a good way to group all of the data
 	 * for a bpf scheduler to use */
 	const struct mptcp_sock *msk;
 	bool reinject;
 	struct mptcp_sched_subflow subflows[MPTCP_SUBFLOWS_MAX];
};

struct mptcp_sched_ops {
 	void (*get_subflow)(struct mptcp_sched_data *data);

 	char			name[MPTCP_SCHED_NAME_MAX];
 	struct module		*owner;
 	struct list_head	list;

 	void (*init)(const struct mptcp_sock *msk);
 	void (*release)(const struct mptcp_sock *msk);
} ____cacheline_aligned_in_smp;

It looks like btf_struct_access() and btf_struct_walk() know how to handle 
an array *inside* a struct, so MPTCP would not need as much custom 
verifier code. This seems like a better fit than my array idea - hopefully 
it's more workable.

Do you think this seems like a reasonable interface for BPF 
scheduler code?


--
Mat Martineau
Intel
Geliang Tang May 27, 2022, 3:27 p.m. UTC | #4
Hi Mat,

On Thu, May 26, 2022 at 04:48:41PM -0700, Mat Martineau wrote:
> On Thu, 26 May 2022, Geliang Tang wrote:
> 
> > Hi Mat,
> > 
> > On Mon, May 23, 2022 at 06:01:03PM -0700, Mat Martineau wrote:
> > > On Mon, 23 May 2022, Geliang Tang wrote:
> > > 
> > > > Please update the commit log:
> > > > 
> > > > '''
> > > > This patch defines two new wrappers mptcp_sched_get_send() and
> > > > mptcp_sched_get_retrans(), invoke get_subflow() of msk->sched in them.
> > > > Use them instead of using mptcp_subflow_get_send() or
> > > > mptcp_subflow_get_retrans() directly.
> > > > 
> > > > Set the subflow pointers array in struct mptcp_sched_data before invoking
> > > > get_subflow(), then it can be used in get_subflow() in the BPF contexts.
> > > > 
> > > > Get the return bitmap of get_subflow() and test which subflow or subflows
> > > > are picked by the scheduler.
> > > > '''
> > > > 
> > > > Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> > > > ---
> > > > net/mptcp/sched.c | 47 +++++++++++++++++++++++++++++++++++++++--------
> > > > 1 file changed, 39 insertions(+), 8 deletions(-)
> > > > 
> > > > diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
> > > > index 3ceb721e6489..0ef805c489ab 100644
> > > > --- a/net/mptcp/sched.c
> > > > +++ b/net/mptcp/sched.c
> > > > @@ -91,8 +91,19 @@ void mptcp_release_sched(struct mptcp_sock *msk)
> > > > static int mptcp_sched_data_init(struct mptcp_sock *msk,
> > > > 				 struct mptcp_sched_data *data)
> > > > {
> > > > -	data->sock = NULL;
> > > > -	data->call_again = 0;
> > > > +	struct mptcp_subflow_context *subflow;
> > > > +	int i = 0;
> > > > +
> > > > +	mptcp_for_each_subflow(msk, subflow) {
> > > > +		if (i == MPTCP_SUBFLOWS_MAX) {
> > > > +			pr_warn_once("too many subflows");
> > > > +			break;
> > > > +		}
> > > > +		data->contexts[i++] = subflow;
> > > > +	}
> > > > +
> > > > +	for (; i < MPTCP_SUBFLOWS_MAX; i++)
> > > > +		data->contexts[i++] = NULL;
> > > > 
> > > > 	return 0;
> > > > }
> > > > @@ -100,6 +111,9 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk,
> > > > struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> > > > {
> > > > 	struct mptcp_sched_data data;
> > > > +	struct sock *ssk = NULL;
> > > > +	unsigned long bitmap;
> > > > +	int i;
> > > > 
> > > > 	sock_owned_by_me((struct sock *)msk);
> > > > 
> > > > @@ -114,15 +128,25 @@ struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> > > > 		return mptcp_subflow_get_send(msk);
> > > > 
> > > > 	mptcp_sched_data_init(msk, &data);
> > > > -	msk->sched->get_subflow(msk, false, &data);
> > > > +	bitmap = msk->sched->get_subflow(msk, false, &data);
> > > > 
> > > > -	msk->last_snd = data.sock;
> > > > -	return data.sock;
> > > > +	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
> > > > +		if (test_bit(i, &bitmap) && data.contexts[i]) {
> > > > +			ssk = data.contexts[i]->tcp_sock;
> > > > +			msk->last_snd = ssk;
> > > > +			break;
> > > > +		}
> > > > +	}
> > > > +
> > > > +	return ssk;
> > > 
> > > The commit that this gets squashed too also ignores call_again, so is this
> > > code that just returns the ssk for the first bit in the bitmap also
> > > placeholder code?
> > 
> > Yes. Since the redundant scheduler is still under development and there's
> > still a lot of work to be done, I plan to support single subflow schedulers
> > in this series first. The multiple subflows schedulers will be added later.
> > 
> > > 
> > > 
> > > It also seems like correlate the bitmap bits with the data.contexts array
> > > makes the bitmap require extra work. What do you think about using an array
> > > instead, like:
> > > 
> > > struct mptcp_sched_data {
> > >        struct mptcp_subflow_context *context;
> > >        bool is_scheduled;
> > > };
> > > 
> > > And passing an array of that struct to the BPF code? Then the is_scheduled
> > > flag could be set for the corresponding subflow.
> > > 
> > > Do you think that array-based API would be clearer than the bitmap to
> > > someone writing a BPF scheduler?
> > 
> > I tried to implement this array-based API, but it's not going well. Array
> > parameters are not easily supported in BPF functions. And the write access
> > permissions of array members is not easy to allow in BPF. I haven't found
> > a solution to these two issues yet. Here are codes and error logs in the
> > attachment.
> > 
> 
> Yeah, after looking at your logs and trying a few experiments, I definitely
> agree that array parameters are not well supported by the BPF verifies.
> 
> It looks like the bpf verifier was inspecting the args for get_subflow in
> mptcp_sched_ops:
> 
> void (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
> 		    struct mptcp_sched_data contexts[]);
> 
> and thinking 'contexts' was a pointer to a single struct mptcp_sched_data
> instance, instead of an array. The verifier can't guarantee safe access for
> a variable-length array so that does make some sense.
> 
> I tried changing the code to:
> 
> void (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
>                     struct mptcp_sched_data (*contexts)[MPTCP_SUBFLOWS_MAX]);
> 
> so the third arg was a "pointer to array of structs, with MPTCP_SUBFLOWS_MAX
> elements in the array". The verifier didn't like that either:
> 
> """
> func 'get_subflow' arg2 type ARRAY is not a struct
> """
> 
> That error message is printed by btf_ctx_access(). It might be possible to
> customize bpf_mptcp_sched_verifier_ops to handle that, but it seems
> complicated.
> 
> 
> We could instead use mptcp_sched_data to contain all the parameters
> (including an array of structs):
> 
> struct mptcp_sched_subflow {
> 	struct mptcp_subflow_context *context;
> 	bool is_scheduled;
> };
> 
> struct mptcp_sched_data {
> 	/* Moving the msk and reinject args here is optional, but
> 	 * it seemed like a good way to group all of the data
> 	 * for a bpf scheduler to use */
> 	const struct mptcp_sock *msk;
> 	bool reinject;
> 	struct mptcp_sched_subflow subflows[MPTCP_SUBFLOWS_MAX];
> };
> 
> struct mptcp_sched_ops {
> 	void (*get_subflow)(struct mptcp_sched_data *data);
> 
> 	char			name[MPTCP_SCHED_NAME_MAX];
> 	struct module		*owner;
> 	struct list_head	list;
> 
> 	void (*init)(const struct mptcp_sock *msk);
> 	void (*release)(const struct mptcp_sock *msk);
> } ____cacheline_aligned_in_smp;
> 
> It looks like btf_struct_access() and btf_struct_walk() know how to handle
> an array *inside* a struct, so MPTCP would not need as much custom verifier
> code. This seems like a better fit than my array idea - hopefully it's more
> workable.

It's hard to get the write access to is_scheduled in this case.

If we add another member bitmap in mptcp_sched_data like this:

struct mptcp_sched_subflow {
	struct mptcp_subflow_context *context;
	bool is_scheduled;
};

struct mptcp_sched_data {
	struct mptcp_sched_subflow subflows[MPTCP_SUBFLOWS_MAX];
	const struct mptcp_sock *msk;
	bool reinject;
	unsigned bitmap;
};

It's easy to calculate the offset in bpf_mptcp_sched_btf_struct_access():

	switch (off) {
	case offsetof(struct mptcp_sched_data, bitmap):
		end = offsetofend(struct mptcp_sched_data, bitmap);
		break;

But it's hard to calculate the offsets of is_scheduled, we need to have
write access for 8 different offsets.

We may calculate them like this:

data->contexts[0].is_scheduled	offset = 0 + sizeof(struct mptcp_subflow_context)
data->contexts[1].is_scheduled  offset = 1 * sizeof(mptcp_sched_subflow) + sizeof(struct mptcp_subflow_context *)
data->contexts[2].is_scheduled  ...
data->contexts[3].is_scheduled  ...
data->contexts[4].is_scheduled  ...
data->contexts[5].is_scheduled  ...
data->contexts[6].is_scheduled  ...
data->contexts[7].is_scheduled  offset = 7 * sizeof(mptcp_sched_subflow) + sizeof(struct mptcp_subflow_context *)

But it doesn't work. I haven't found a solution yet.

Maybe we also need to consider the actual number of subflows, which makes
it more complicated.


If we make the array read only, just write the bitmap member or return a
bitmap, we can avoid dealing with these complex offsets.


Anyway, I will continue to solve this write access issue, but I also want
to hear your opinion.

Thanks,
-Geliang

> 
> Do you think this seems like a reasonable interface for BPF scheduler code?
> 
> 
> --
> Mat Martineau
> Intel
>
Mat Martineau May 27, 2022, 8:03 p.m. UTC | #5
On Fri, 27 May 2022, Geliang Tang wrote:

> Hi Mat,
>
> On Thu, May 26, 2022 at 04:48:41PM -0700, Mat Martineau wrote:
>> On Thu, 26 May 2022, Geliang Tang wrote:
>>
>>> Hi Mat,
>>>
>>> On Mon, May 23, 2022 at 06:01:03PM -0700, Mat Martineau wrote:
>>>> On Mon, 23 May 2022, Geliang Tang wrote:
>>>>
>>>>> Please update the commit log:
>>>>>
>>>>> '''
>>>>> This patch defines two new wrappers mptcp_sched_get_send() and
>>>>> mptcp_sched_get_retrans(), invoke get_subflow() of msk->sched in them.
>>>>> Use them instead of using mptcp_subflow_get_send() or
>>>>> mptcp_subflow_get_retrans() directly.
>>>>>
>>>>> Set the subflow pointers array in struct mptcp_sched_data before invoking
>>>>> get_subflow(), then it can be used in get_subflow() in the BPF contexts.
>>>>>
>>>>> Get the return bitmap of get_subflow() and test which subflow or subflows
>>>>> are picked by the scheduler.
>>>>> '''
>>>>>
>>>>> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
>>>>> ---
>>>>> net/mptcp/sched.c | 47 +++++++++++++++++++++++++++++++++++++++--------
>>>>> 1 file changed, 39 insertions(+), 8 deletions(-)
>>>>>
>>>>> diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
>>>>> index 3ceb721e6489..0ef805c489ab 100644
>>>>> --- a/net/mptcp/sched.c
>>>>> +++ b/net/mptcp/sched.c
>>>>> @@ -91,8 +91,19 @@ void mptcp_release_sched(struct mptcp_sock *msk)
>>>>> static int mptcp_sched_data_init(struct mptcp_sock *msk,
>>>>> 				 struct mptcp_sched_data *data)
>>>>> {
>>>>> -	data->sock = NULL;
>>>>> -	data->call_again = 0;
>>>>> +	struct mptcp_subflow_context *subflow;
>>>>> +	int i = 0;
>>>>> +
>>>>> +	mptcp_for_each_subflow(msk, subflow) {
>>>>> +		if (i == MPTCP_SUBFLOWS_MAX) {
>>>>> +			pr_warn_once("too many subflows");
>>>>> +			break;
>>>>> +		}
>>>>> +		data->contexts[i++] = subflow;
>>>>> +	}
>>>>> +
>>>>> +	for (; i < MPTCP_SUBFLOWS_MAX; i++)
>>>>> +		data->contexts[i++] = NULL;
>>>>>
>>>>> 	return 0;
>>>>> }
>>>>> @@ -100,6 +111,9 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk,
>>>>> struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
>>>>> {
>>>>> 	struct mptcp_sched_data data;
>>>>> +	struct sock *ssk = NULL;
>>>>> +	unsigned long bitmap;
>>>>> +	int i;
>>>>>
>>>>> 	sock_owned_by_me((struct sock *)msk);
>>>>>
>>>>> @@ -114,15 +128,25 @@ struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
>>>>> 		return mptcp_subflow_get_send(msk);
>>>>>
>>>>> 	mptcp_sched_data_init(msk, &data);
>>>>> -	msk->sched->get_subflow(msk, false, &data);
>>>>> +	bitmap = msk->sched->get_subflow(msk, false, &data);
>>>>>
>>>>> -	msk->last_snd = data.sock;
>>>>> -	return data.sock;
>>>>> +	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
>>>>> +		if (test_bit(i, &bitmap) && data.contexts[i]) {
>>>>> +			ssk = data.contexts[i]->tcp_sock;
>>>>> +			msk->last_snd = ssk;
>>>>> +			break;
>>>>> +		}
>>>>> +	}
>>>>> +
>>>>> +	return ssk;
>>>>
>>>> The commit that this gets squashed too also ignores call_again, so is this
>>>> code that just returns the ssk for the first bit in the bitmap also
>>>> placeholder code?
>>>
>>> Yes. Since the redundant scheduler is still under development and there's
>>> still a lot of work to be done, I plan to support single subflow schedulers
>>> in this series first. The multiple subflows schedulers will be added later.
>>>
>>>>
>>>>
>>>> It also seems like correlate the bitmap bits with the data.contexts array
>>>> makes the bitmap require extra work. What do you think about using an array
>>>> instead, like:
>>>>
>>>> struct mptcp_sched_data {
>>>>        struct mptcp_subflow_context *context;
>>>>        bool is_scheduled;
>>>> };
>>>>
>>>> And passing an array of that struct to the BPF code? Then the is_scheduled
>>>> flag could be set for the corresponding subflow.
>>>>
>>>> Do you think that array-based API would be clearer than the bitmap to
>>>> someone writing a BPF scheduler?
>>>
>>> I tried to implement this array-based API, but it's not going well. Array
>>> parameters are not easily supported in BPF functions. And the write access
>>> permissions of array members is not easy to allow in BPF. I haven't found
>>> a solution to these two issues yet. Here are codes and error logs in the
>>> attachment.
>>>
>>
>> Yeah, after looking at your logs and trying a few experiments, I definitely
>> agree that array parameters are not well supported by the BPF verifies.
>>
>> It looks like the bpf verifier was inspecting the args for get_subflow in
>> mptcp_sched_ops:
>>
>> void (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
>> 		    struct mptcp_sched_data contexts[]);
>>
>> and thinking 'contexts' was a pointer to a single struct mptcp_sched_data
>> instance, instead of an array. The verifier can't guarantee safe access for
>> a variable-length array so that does make some sense.
>>
>> I tried changing the code to:
>>
>> void (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
>>                     struct mptcp_sched_data (*contexts)[MPTCP_SUBFLOWS_MAX]);
>>
>> so the third arg was a "pointer to array of structs, with MPTCP_SUBFLOWS_MAX
>> elements in the array". The verifier didn't like that either:
>>
>> """
>> func 'get_subflow' arg2 type ARRAY is not a struct
>> """
>>
>> That error message is printed by btf_ctx_access(). It might be possible to
>> customize bpf_mptcp_sched_verifier_ops to handle that, but it seems
>> complicated.
>>
>>
>> We could instead use mptcp_sched_data to contain all the parameters
>> (including an array of structs):
>>
>> struct mptcp_sched_subflow {
>> 	struct mptcp_subflow_context *context;
>> 	bool is_scheduled;
>> };
>>
>> struct mptcp_sched_data {
>> 	/* Moving the msk and reinject args here is optional, but
>> 	 * it seemed like a good way to group all of the data
>> 	 * for a bpf scheduler to use */
>> 	const struct mptcp_sock *msk;
>> 	bool reinject;
>> 	struct mptcp_sched_subflow subflows[MPTCP_SUBFLOWS_MAX];
>> };
>>
>> struct mptcp_sched_ops {
>> 	void (*get_subflow)(struct mptcp_sched_data *data);
>>
>> 	char			name[MPTCP_SCHED_NAME_MAX];
>> 	struct module		*owner;
>> 	struct list_head	list;
>>
>> 	void (*init)(const struct mptcp_sock *msk);
>> 	void (*release)(const struct mptcp_sock *msk);
>> } ____cacheline_aligned_in_smp;
>>
>> It looks like btf_struct_access() and btf_struct_walk() know how to handle
>> an array *inside* a struct, so MPTCP would not need as much custom verifier
>> code. This seems like a better fit than my array idea - hopefully it's more
>> workable.
>
> It's hard to get the write access to is_scheduled in this case.
>
> If we add another member bitmap in mptcp_sched_data like this:
>
> struct mptcp_sched_subflow {
> 	struct mptcp_subflow_context *context;
> 	bool is_scheduled;
> };
>
> struct mptcp_sched_data {
> 	struct mptcp_sched_subflow subflows[MPTCP_SUBFLOWS_MAX];
> 	const struct mptcp_sock *msk;
> 	bool reinject;
> 	unsigned bitmap;
> };
>
> It's easy to calculate the offset in bpf_mptcp_sched_btf_struct_access():
>
> 	switch (off) {
> 	case offsetof(struct mptcp_sched_data, bitmap):
> 		end = offsetofend(struct mptcp_sched_data, bitmap);
> 		break;
>
> But it's hard to calculate the offsets of is_scheduled, we need to have
> write access for 8 different offsets.
>
> We may calculate them like this:
>
> data->contexts[0].is_scheduled	offset = 0 + sizeof(struct mptcp_subflow_context)
> data->contexts[1].is_scheduled  offset = 1 * sizeof(mptcp_sched_subflow) + sizeof(struct mptcp_subflow_context *)
> data->contexts[2].is_scheduled  ...
> data->contexts[3].is_scheduled  ...
> data->contexts[4].is_scheduled  ...
> data->contexts[5].is_scheduled  ...
> data->contexts[6].is_scheduled  ...
> data->contexts[7].is_scheduled  offset = 7 * sizeof(mptcp_sched_subflow) + sizeof(struct mptcp_subflow_context *)
>
> But it doesn't work. I haven't found a solution yet.
>
> Maybe we also need to consider the actual number of subflows, which makes
> it more complicated.
>
>
> If we make the array read only, just write the bitmap member or return a
> bitmap, we can avoid dealing with these complex offsets.
>
>
> Anyway, I will continue to solve this write access issue, but I also want
> to hear your opinion.


I think using a loop to evaluate possible offsets within the array is not 
too complex.


Going back to this layout:

struct mptcp_sched_data {
 	const struct mptcp_sock *msk;
 	bool reinject;
 	struct mptcp_sched_subflow subflows[MPTCP_SUBFLOWS_MAX];
};

The offset limits of is_scheduled within each mptcp_sched_subflow are:

nested_offset = offsetof(struct mptcp_sched_subflow, is_scheduled);
nested_offset_end = offsetofend(struct mptcp_sched_subflow, is_scheduled);

and the starting offset of each element in the subflows[] array is:

static size_t subflow_offset(int i)
{
 	return offsetof(struct mptcp_sched_data, subflows) + i * sizeof(struct mptcp_sched_subflow);
}


Then I think the offset 'off' can be checked for write access in a loop:

for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
 	size_t soff = subflow_offset(i);
 	if (off == soff + nested_offset && off + size <= soff + nested_offset_end)
 		return NOT_INIT; /* offsets match up with is_scheduled */
}


--
Mat Martineau
Intel
diff mbox series

Patch

diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 3ceb721e6489..0ef805c489ab 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -91,8 +91,19 @@  void mptcp_release_sched(struct mptcp_sock *msk)
 static int mptcp_sched_data_init(struct mptcp_sock *msk,
 				 struct mptcp_sched_data *data)
 {
-	data->sock = NULL;
-	data->call_again = 0;
+	struct mptcp_subflow_context *subflow;
+	int i = 0;
+
+	mptcp_for_each_subflow(msk, subflow) {
+		if (i == MPTCP_SUBFLOWS_MAX) {
+			pr_warn_once("too many subflows");
+			break;
+		}
+		data->contexts[i++] = subflow;
+	}
+
+	for (; i < MPTCP_SUBFLOWS_MAX; i++)
+		data->contexts[i++] = NULL;
 
 	return 0;
 }
@@ -100,6 +111,9 @@  static int mptcp_sched_data_init(struct mptcp_sock *msk,
 struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
 {
 	struct mptcp_sched_data data;
+	struct sock *ssk = NULL;
+	unsigned long bitmap;
+	int i;
 
 	sock_owned_by_me((struct sock *)msk);
 
@@ -114,15 +128,25 @@  struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
 		return mptcp_subflow_get_send(msk);
 
 	mptcp_sched_data_init(msk, &data);
-	msk->sched->get_subflow(msk, false, &data);
+	bitmap = msk->sched->get_subflow(msk, false, &data);
 
-	msk->last_snd = data.sock;
-	return data.sock;
+	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
+		if (test_bit(i, &bitmap) && data.contexts[i]) {
+			ssk = data.contexts[i]->tcp_sock;
+			msk->last_snd = ssk;
+			break;
+		}
+	}
+
+	return ssk;
 }
 
 struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
 {
 	struct mptcp_sched_data data;
+	struct sock *ssk = NULL;
+	unsigned long bitmap;
+	int i;
 
 	sock_owned_by_me((const struct sock *)msk);
 
@@ -134,8 +158,15 @@  struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
 		return mptcp_subflow_get_retrans(msk);
 
 	mptcp_sched_data_init(msk, &data);
-	msk->sched->get_subflow(msk, true, &data);
+	bitmap = msk->sched->get_subflow(msk, true, &data);
+
+	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
+		if (test_bit(i, &bitmap) && data.contexts[i]) {
+			ssk = data.contexts[i]->tcp_sock;
+			msk->last_snd = ssk;
+			break;
+		}
+	}
 
-	msk->last_snd = data.sock;
-	return data.sock;
+	return ssk;
 }