Message ID | 20220302195622.3483941-1-kafai@fb.com (mailing list archive) |
---|---|
State | Accepted |
Commit | cd14e9b7b8d312dfbf75ce1f78552902e51b9045 |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | Preserve mono delivery time (EDT) in skb->tstamp | expand |
On Wed, Mar 2, 2022 at 11:56 AM Martin KaFai Lau <kafai@fb.com> wrote: > > The previous patches handled the delivery_time in the ingress path > before the routing decision is made. This patch can postpone clearing > delivery_time in a skb until knowing it is delivered locally and also > set the (rcv) timestamp if needed. This patch moves the > skb_clear_delivery_time() from dev.c to ip_local_deliver_finish() > and ip6_input_finish(). > > Signed-off-by: Martin KaFai Lau <kafai@fb.com> > --- > net/core/dev.c | 8 ++------ > net/ipv4/ip_input.c | 1 + > net/ipv6/ip6_input.c | 1 + > 3 files changed, 4 insertions(+), 6 deletions(-) > > diff --git a/net/core/dev.c b/net/core/dev.c > index 0fc02cf32476..3ff686cc8c84 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -5193,10 +5193,8 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, > goto out; > } > > - if (skb_skip_tc_classify(skb)) { > - skb_clear_delivery_time(skb); > + if (skb_skip_tc_classify(skb)) > goto skip_classify; > - } > > if (pfmemalloc) > goto skip_taps; > @@ -5225,14 +5223,12 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, > goto another_round; > if (!skb) > goto out; > - skb_clear_delivery_time(skb); > > nf_skip_egress(skb, false); > if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0) > goto out; > - } else > + } > #endif > - skb_clear_delivery_time(skb); > skb_reset_redirect(skb); > skip_classify: > if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) > diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c > index d94f9f7e60c3..95f7bb052784 100644 > --- a/net/ipv4/ip_input.c > +++ b/net/ipv4/ip_input.c > @@ -226,6 +226,7 @@ void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol) > > static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb) > { > + skb_clear_delivery_time(skb); > __skb_pull(skb, skb_network_header_len(skb)); > > rcu_read_lock(); > diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c > index d4b1e2c5aa76..5b5ea35635f9 100644 > --- a/net/ipv6/ip6_input.c > +++ b/net/ipv6/ip6_input.c > @@ -459,6 +459,7 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, > > static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) > { > + skb_clear_delivery_time(skb); > rcu_read_lock(); > ip6_protocol_deliver_rcu(net, skb, 0, false); > rcu_read_unlock(); > -- > 2.30.2 > It is not clear to me why we need to clear tstamp if packet is locally delivered ? TCP stack is using tstamp for incoming packets (look for TCP_SKB_CB(skb)->has_rxtstamp)
On Wed, Mar 02, 2022 at 12:30:14PM -0800, Eric Dumazet wrote: > On Wed, Mar 2, 2022 at 11:56 AM Martin KaFai Lau <kafai@fb.com> wrote: > > > > The previous patches handled the delivery_time in the ingress path > > before the routing decision is made. This patch can postpone clearing > > delivery_time in a skb until knowing it is delivered locally and also > > set the (rcv) timestamp if needed. This patch moves the > > skb_clear_delivery_time() from dev.c to ip_local_deliver_finish() > > and ip6_input_finish(). > > > > Signed-off-by: Martin KaFai Lau <kafai@fb.com> > > --- > > net/core/dev.c | 8 ++------ > > net/ipv4/ip_input.c | 1 + > > net/ipv6/ip6_input.c | 1 + > > 3 files changed, 4 insertions(+), 6 deletions(-) > > > > diff --git a/net/core/dev.c b/net/core/dev.c > > index 0fc02cf32476..3ff686cc8c84 100644 > > --- a/net/core/dev.c > > +++ b/net/core/dev.c > > @@ -5193,10 +5193,8 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, > > goto out; > > } > > > > - if (skb_skip_tc_classify(skb)) { > > - skb_clear_delivery_time(skb); > > + if (skb_skip_tc_classify(skb)) > > goto skip_classify; > > - } > > > > if (pfmemalloc) > > goto skip_taps; > > @@ -5225,14 +5223,12 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, > > goto another_round; > > if (!skb) > > goto out; > > - skb_clear_delivery_time(skb); > > > > nf_skip_egress(skb, false); > > if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0) > > goto out; > > - } else > > + } > > #endif > > - skb_clear_delivery_time(skb); > > skb_reset_redirect(skb); > > skip_classify: > > if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) > > diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c > > index d94f9f7e60c3..95f7bb052784 100644 > > --- a/net/ipv4/ip_input.c > > +++ b/net/ipv4/ip_input.c > > @@ -226,6 +226,7 @@ void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol) > > > > static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb) > > { > > + skb_clear_delivery_time(skb); > > __skb_pull(skb, skb_network_header_len(skb)); > > > > rcu_read_lock(); > > diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c > > index d4b1e2c5aa76..5b5ea35635f9 100644 > > --- a/net/ipv6/ip6_input.c > > +++ b/net/ipv6/ip6_input.c > > @@ -459,6 +459,7 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, > > > > static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) > > { > > + skb_clear_delivery_time(skb); > > rcu_read_lock(); > > ip6_protocol_deliver_rcu(net, skb, 0, false); > > rcu_read_unlock(); > > -- > > 2.30.2 > > > > It is not clear to me why we need to clear tstamp if packet is locally > delivered ? It does not clear the rx tstamp in skb->tstamp. It only clears the EDT in skb->tstamp when the skb is transmitted out of a local tcp_sock and then loop back from egress to ingress through virtual interface like veth. > > TCP stack is using tstamp for incoming packets (look for > TCP_SKB_CB(skb)->has_rxtstamp) skb_clear_delivery_time() will put ktime_get_real() back to skb->tstamp so that the receiving tcp_sock can use it.
On Wed, Mar 2, 2022 at 2:34 PM Martin KaFai Lau <kafai@fb.com> wrote: > > On Wed, Mar 02, 2022 at 12:30:14PM -0800, Eric Dumazet wrote: > > On Wed, Mar 2, 2022 at 11:56 AM Martin KaFai Lau <kafai@fb.com> wrote: > > > > > > The previous patches handled the delivery_time in the ingress path > > > before the routing decision is made. This patch can postpone clearing > > > delivery_time in a skb until knowing it is delivered locally and also > > > set the (rcv) timestamp if needed. This patch moves the > > > skb_clear_delivery_time() from dev.c to ip_local_deliver_finish() > > > and ip6_input_finish(). > > > > > > Signed-off-by: Martin KaFai Lau <kafai@fb.com> > > > --- > > > net/core/dev.c | 8 ++------ > > > net/ipv4/ip_input.c | 1 + > > > net/ipv6/ip6_input.c | 1 + > > > 3 files changed, 4 insertions(+), 6 deletions(-) > > > > > > diff --git a/net/core/dev.c b/net/core/dev.c > > > index 0fc02cf32476..3ff686cc8c84 100644 > > > --- a/net/core/dev.c > > > +++ b/net/core/dev.c > > > @@ -5193,10 +5193,8 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, > > > goto out; > > > } > > > > > > - if (skb_skip_tc_classify(skb)) { > > > - skb_clear_delivery_time(skb); > > > + if (skb_skip_tc_classify(skb)) > > > goto skip_classify; > > > - } > > > > > > if (pfmemalloc) > > > goto skip_taps; > > > @@ -5225,14 +5223,12 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, > > > goto another_round; > > > if (!skb) > > > goto out; > > > - skb_clear_delivery_time(skb); > > > > > > nf_skip_egress(skb, false); > > > if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0) > > > goto out; > > > - } else > > > + } > > > #endif > > > - skb_clear_delivery_time(skb); > > > skb_reset_redirect(skb); > > > skip_classify: > > > if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) > > > diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c > > > index d94f9f7e60c3..95f7bb052784 100644 > > > --- a/net/ipv4/ip_input.c > > > +++ b/net/ipv4/ip_input.c > > > @@ -226,6 +226,7 @@ void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol) > > > > > > static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb) > > > { > > > + skb_clear_delivery_time(skb); > > > __skb_pull(skb, skb_network_header_len(skb)); > > > > > > rcu_read_lock(); > > > diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c > > > index d4b1e2c5aa76..5b5ea35635f9 100644 > > > --- a/net/ipv6/ip6_input.c > > > +++ b/net/ipv6/ip6_input.c > > > @@ -459,6 +459,7 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, > > > > > > static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) > > > { > > > + skb_clear_delivery_time(skb); > > > rcu_read_lock(); > > > ip6_protocol_deliver_rcu(net, skb, 0, false); > > > rcu_read_unlock(); > > > -- > > > 2.30.2 > > > > > > > It is not clear to me why we need to clear tstamp if packet is locally > > delivered ? > It does not clear the rx tstamp in skb->tstamp. > > It only clears the EDT in skb->tstamp when the skb > is transmitted out of a local tcp_sock and then loop back from egress > to ingress through virtual interface like veth. > > > > > TCP stack is using tstamp for incoming packets (look for > > TCP_SKB_CB(skb)->has_rxtstamp) > skb_clear_delivery_time() will put ktime_get_real() back to skb->tstamp > so that the receiving tcp_sock can use it. Oh... I had to look at +static inline void skb_clear_delivery_time(struct sk_buff *skb) +{ + if (skb->mono_delivery_time) { + skb->mono_delivery_time = 0; + if (static_branch_unlikely(&netstamp_needed_key)) + skb->tstamp = ktime_get_real(); + else + skb->tstamp = 0; + } +} Name was a bit confusing :) And it seems you have a big opportunity to not call ktime_get_real() when skb->sk is known at this point (early demux) because few sockets actually enable timestamping ?
On Wed, Mar 02, 2022 at 03:41:59PM -0800, Eric Dumazet wrote: > On Wed, Mar 2, 2022 at 2:34 PM Martin KaFai Lau <kafai@fb.com> wrote: > > > > On Wed, Mar 02, 2022 at 12:30:14PM -0800, Eric Dumazet wrote: > > > On Wed, Mar 2, 2022 at 11:56 AM Martin KaFai Lau <kafai@fb.com> wrote: > > > > > > > > The previous patches handled the delivery_time in the ingress path > > > > before the routing decision is made. This patch can postpone clearing > > > > delivery_time in a skb until knowing it is delivered locally and also > > > > set the (rcv) timestamp if needed. This patch moves the > > > > skb_clear_delivery_time() from dev.c to ip_local_deliver_finish() > > > > and ip6_input_finish(). > > > > > > > > Signed-off-by: Martin KaFai Lau <kafai@fb.com> > > > > --- > > > > net/core/dev.c | 8 ++------ > > > > net/ipv4/ip_input.c | 1 + > > > > net/ipv6/ip6_input.c | 1 + > > > > 3 files changed, 4 insertions(+), 6 deletions(-) > > > > > > > > diff --git a/net/core/dev.c b/net/core/dev.c > > > > index 0fc02cf32476..3ff686cc8c84 100644 > > > > --- a/net/core/dev.c > > > > +++ b/net/core/dev.c > > > > @@ -5193,10 +5193,8 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, > > > > goto out; > > > > } > > > > > > > > - if (skb_skip_tc_classify(skb)) { > > > > - skb_clear_delivery_time(skb); > > > > + if (skb_skip_tc_classify(skb)) > > > > goto skip_classify; > > > > - } > > > > > > > > if (pfmemalloc) > > > > goto skip_taps; > > > > @@ -5225,14 +5223,12 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, > > > > goto another_round; > > > > if (!skb) > > > > goto out; > > > > - skb_clear_delivery_time(skb); > > > > > > > > nf_skip_egress(skb, false); > > > > if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0) > > > > goto out; > > > > - } else > > > > + } > > > > #endif > > > > - skb_clear_delivery_time(skb); > > > > skb_reset_redirect(skb); > > > > skip_classify: > > > > if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) > > > > diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c > > > > index d94f9f7e60c3..95f7bb052784 100644 > > > > --- a/net/ipv4/ip_input.c > > > > +++ b/net/ipv4/ip_input.c > > > > @@ -226,6 +226,7 @@ void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol) > > > > > > > > static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb) > > > > { > > > > + skb_clear_delivery_time(skb); > > > > __skb_pull(skb, skb_network_header_len(skb)); > > > > > > > > rcu_read_lock(); > > > > diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c > > > > index d4b1e2c5aa76..5b5ea35635f9 100644 > > > > --- a/net/ipv6/ip6_input.c > > > > +++ b/net/ipv6/ip6_input.c > > > > @@ -459,6 +459,7 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, > > > > > > > > static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) > > > > { > > > > + skb_clear_delivery_time(skb); > > > > rcu_read_lock(); > > > > ip6_protocol_deliver_rcu(net, skb, 0, false); > > > > rcu_read_unlock(); > > > > -- > > > > 2.30.2 > > > > > > > > > > It is not clear to me why we need to clear tstamp if packet is locally > > > delivered ? > > It does not clear the rx tstamp in skb->tstamp. > > > > It only clears the EDT in skb->tstamp when the skb > > is transmitted out of a local tcp_sock and then loop back from egress > > to ingress through virtual interface like veth. > > > > > > > > TCP stack is using tstamp for incoming packets (look for > > > TCP_SKB_CB(skb)->has_rxtstamp) > > skb_clear_delivery_time() will put ktime_get_real() back to skb->tstamp > > so that the receiving tcp_sock can use it. > > > Oh... I had to look at > > +static inline void skb_clear_delivery_time(struct sk_buff *skb) > +{ > + if (skb->mono_delivery_time) { > + skb->mono_delivery_time = 0; > + if (static_branch_unlikely(&netstamp_needed_key)) > + skb->tstamp = ktime_get_real(); > + else > + skb->tstamp = 0; > + } > +} > > Name was a bit confusing :) A few names were attempted in the early version and then concluded on delivery_time. :p > > And it seems you have a big opportunity to not call ktime_get_real() > when skb->sk is known at this point (early demux) > because few sockets actually enable timestamping ? iiuc, you are suggesting to also check the skb->sk (if early demux) and check for SK_FLAGS_TIMESTAMP. Without checking skb->sk here, it should not be worse than the current ktime_get_real() done in dev.c where it also does not have sk available? netstamp_needed_key should have been enabled as long as there is one sk asked for it.
On Wed, Mar 2, 2022 at 4:20 PM Martin KaFai Lau <kafai@fb.com> wrote: > > On Wed, Mar 02, 2022 at 03:41:59PM -0800, Eric Dumazet wrote: > > Name was a bit confusing :) > A few names were attempted in the early version and > then concluded on delivery_time. :p > > > > > And it seems you have a big opportunity to not call ktime_get_real() > > when skb->sk is known at this point (early demux) > > because few sockets actually enable timestamping ? > iiuc, you are suggesting to also check the skb->sk (if early demux) > and check for SK_FLAGS_TIMESTAMP. > > Without checking skb->sk here, it should not be worse than the > current ktime_get_real() done in dev.c where it also does not have sk > available? netstamp_needed_key should have been enabled as > long as there is one sk asked for it. Yes, but typically there is often one active timestamp consumer, enabling the static key. This is a corner case anyway, not sure if hosts running VM would have slow ktime_get_real()
diff --git a/net/core/dev.c b/net/core/dev.c index 0fc02cf32476..3ff686cc8c84 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5193,10 +5193,8 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, goto out; } - if (skb_skip_tc_classify(skb)) { - skb_clear_delivery_time(skb); + if (skb_skip_tc_classify(skb)) goto skip_classify; - } if (pfmemalloc) goto skip_taps; @@ -5225,14 +5223,12 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, goto another_round; if (!skb) goto out; - skb_clear_delivery_time(skb); nf_skip_egress(skb, false); if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0) goto out; - } else + } #endif - skb_clear_delivery_time(skb); skb_reset_redirect(skb); skip_classify: if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d94f9f7e60c3..95f7bb052784 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -226,6 +226,7 @@ void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol) static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + skb_clear_delivery_time(skb); __skb_pull(skb, skb_network_header_len(skb)); rcu_read_lock(); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index d4b1e2c5aa76..5b5ea35635f9 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -459,6 +459,7 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + skb_clear_delivery_time(skb); rcu_read_lock(); ip6_protocol_deliver_rcu(net, skb, 0, false); rcu_read_unlock();
The previous patches handled the delivery_time in the ingress path before the routing decision is made. This patch can postpone clearing delivery_time in a skb until knowing it is delivered locally and also set the (rcv) timestamp if needed. This patch moves the skb_clear_delivery_time() from dev.c to ip_local_deliver_finish() and ip6_input_finish(). Signed-off-by: Martin KaFai Lau <kafai@fb.com> --- net/core/dev.c | 8 ++------ net/ipv4/ip_input.c | 1 + net/ipv6/ip6_input.c | 1 + 3 files changed, 4 insertions(+), 6 deletions(-)