diff mbox series

[net-next,v3,2/3] net: sched: Make tc-related drop reason more flexible for remaining qdiscs

Message ID 20231205205030.3119672-3-victor@mojatatu.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net: sched: Make tc-related drop reason more flexible for remaining qdiscs | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 2581 this patch: 2581
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang success Errors and warnings before: 1265 this patch: 1265
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 2675 this patch: 2675
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 109 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Victor Nogueira Dec. 5, 2023, 8:50 p.m. UTC
Incrementing on Daniel's patch[1], make tc-related drop reason more
flexible for remaining qdiscs - that is, all qdiscs aside from clsact.
In essence, the drop reason will be set by cls_api and act_api in case
any error occurred in the data path. With that, we can give the user more
detailed information so that they can distinguish between a policy drop
or an error drop.

[1] https://lore.kernel.org/all/20231009092655.22025-1-daniel@iogearbox.net

Signed-off-by: Victor Nogueira <victor@mojatatu.com>
---
 include/net/pkt_cls.h     | 16 ----------------
 include/net/pkt_sched.h   | 19 -------------------
 include/net/sch_generic.h | 31 +++++++++++++++++++++++++++++++
 net/core/dev.c            |  7 +++++--
 4 files changed, 36 insertions(+), 37 deletions(-)

Comments

Daniel Borkmann Dec. 5, 2023, 9:20 p.m. UTC | #1
On 12/5/23 9:50 PM, Victor Nogueira wrote:
> Incrementing on Daniel's patch[1], make tc-related drop reason more
> flexible for remaining qdiscs - that is, all qdiscs aside from clsact.
> In essence, the drop reason will be set by cls_api and act_api in case
> any error occurred in the data path. With that, we can give the user more
> detailed information so that they can distinguish between a policy drop
> or an error drop.
> 
> [1] https://lore.kernel.org/all/20231009092655.22025-1-daniel@iogearbox.net
> 
> Signed-off-by: Victor Nogueira <victor@mojatatu.com>

Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Simon Horman Dec. 11, 2023, 1:38 p.m. UTC | #2
On Tue, Dec 05, 2023 at 05:50:29PM -0300, Victor Nogueira wrote:
> Incrementing on Daniel's patch[1], make tc-related drop reason more
> flexible for remaining qdiscs - that is, all qdiscs aside from clsact.
> In essence, the drop reason will be set by cls_api and act_api in case
> any error occurred in the data path. With that, we can give the user more
> detailed information so that they can distinguish between a policy drop
> or an error drop.
> 
> [1] https://lore.kernel.org/all/20231009092655.22025-1-daniel@iogearbox.net
> 
> Signed-off-by: Victor Nogueira <victor@mojatatu.com>

Reviewed-by: Simon Horman <horms@kernel.org>
Jakub Kicinski Dec. 12, 2023, 2:25 a.m. UTC | #3
On Tue,  5 Dec 2023 17:50:29 -0300 Victor Nogueira wrote:
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 4b84b72ebae8..f38c928a34aa 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -3753,6 +3753,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
>  
>  	qdisc_calculate_pkt_len(skb, q);
>  
> +	tcf_set_drop_reason(skb, SKB_DROP_REASON_QDISC_DROP);
> +
>  	if (q->flags & TCQ_F_NOLOCK) {
>  		if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) &&
>  		    qdisc_run_begin(q)) {
> @@ -3782,7 +3784,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
>  no_lock_out:
>  		if (unlikely(to_free))
>  			kfree_skb_list_reason(to_free,
> -					      SKB_DROP_REASON_QDISC_DROP);
> +					      tcf_get_drop_reason(to_free));
>  		return rc;
>  	}
>  
> @@ -3837,7 +3839,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
>  	}
>  	spin_unlock(root_lock);
>  	if (unlikely(to_free))
> -		kfree_skb_list_reason(to_free, SKB_DROP_REASON_QDISC_DROP);
> +		kfree_skb_list_reason(to_free,
> +				      tcf_get_drop_reason(to_free));

You stuff the drop reason into every skb but then only use the one from
the head? Herm. __qdisc_drop() only uses the next pointer can't we
overload the prev pointer to carry the drop reason. That means only
storing it if we already plan to drop the packet.

BTW I lack TC knowledge but struct tc_skb_cb is even more clsact
specific today than tcf_result. And reserving space for drop reason
in a state structure seems odd. Maybe that's just me.
Jamal Hadi Salim Dec. 12, 2023, 4:27 p.m. UTC | #4
On Mon, Dec 11, 2023 at 9:25 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Tue,  5 Dec 2023 17:50:29 -0300 Victor Nogueira wrote:
> > diff --git a/net/core/dev.c b/net/core/dev.c
> > index 4b84b72ebae8..f38c928a34aa 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> > @@ -3753,6 +3753,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
> >
> >       qdisc_calculate_pkt_len(skb, q);
> >
> > +     tcf_set_drop_reason(skb, SKB_DROP_REASON_QDISC_DROP);
> > +
> >       if (q->flags & TCQ_F_NOLOCK) {
> >               if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) &&
> >                   qdisc_run_begin(q)) {
> > @@ -3782,7 +3784,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
> >  no_lock_out:
> >               if (unlikely(to_free))
> >                       kfree_skb_list_reason(to_free,
> > -                                           SKB_DROP_REASON_QDISC_DROP);
> > +                                           tcf_get_drop_reason(to_free));
> >               return rc;
> >       }
> >
> > @@ -3837,7 +3839,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
> >       }
> >       spin_unlock(root_lock);
> >       if (unlikely(to_free))
> > -             kfree_skb_list_reason(to_free, SKB_DROP_REASON_QDISC_DROP);
> > +             kfree_skb_list_reason(to_free,
> > +                                   tcf_get_drop_reason(to_free));
>
> You stuff the drop reason into every skb but then only use the one from
> the head? Herm. __qdisc_drop() only uses the next pointer can't we
> overload the prev pointer to carry the drop reason. That means only
> storing it if we already plan to drop the packet.
>

So when we looked at this code there was some mystery. It wasnt clear
how to_free could have more than one skb.
According to: 520ac30f4551 Eric says:

"I measured a performance increase of up to 12 %, but this patch is a
prereq so that future batches in enqueue() can fly. "

I am not sure if the batch enqueue ever happened and if it didnt then
there's only one skb in that list... When 7faef0547f4c added the
reason code it assumed a single code for the "list" - so it felt safer
to leave it that way. I guess it will depend on if a list could exist
to rethink this. Eric?

> BTW I lack TC knowledge but struct tc_skb_cb is even more clsact
> specific today than tcf_result. And reserving space for drop reason
> in a state structure seems odd. Maybe that's just me.

It is not clsact main use - it is a scratchpad for all of tc:

struct qdisc_skb_cb {
        struct {
                unsigned int            pkt_len;
                u16                     slave_dev_queue_mapping;
                u16                     tc_classid;
        };
#define QDISC_CB_PRIV_LEN 20
        unsigned char           data[QDISC_CB_PRIV_LEN];
};

struct tc_skb_cb {
        struct qdisc_skb_cb qdisc_cb;

        u16 mru;
        u8 post_ct:1;
        u8 post_ct_snat:1;
        u8 post_ct_dnat:1;
        u16 zone; /* Only valid if post_ct = true */
};

cheers,
jamal
Eric Dumazet Dec. 12, 2023, 4:57 p.m. UTC | #5
On Tue, Dec 12, 2023 at 5:28 PM Jamal Hadi Salim <jhs@mojatatu.com> wrote:
>

>
> So when we looked at this code there was some mystery. It wasnt clear
> how to_free could have more than one skb.

Some qdisc can free skbs at enqueue() time in  a batch for performance
reason (fq_codel_drop() is such an instance)

I agree that all skbs in this list have probably the same drop_reason ?
Jamal Hadi Salim Dec. 12, 2023, 5:16 p.m. UTC | #6
On Tue, Dec 12, 2023 at 11:57 AM 'Eric Dumazet' via kernel issues
<kernel@mojatatu.com> wrote:
>
> On Tue, Dec 12, 2023 at 5:28 PM Jamal Hadi Salim <jhs@mojatatu.com> wrote:
> >
>
> >
> > So when we looked at this code there was some mystery. It wasnt clear
> > how to_free could have more than one skb.
>
> Some qdisc can free skbs at enqueue() time in  a batch for performance
> reason (fq_codel_drop() is such an instance)

Ok, makes more sense, should've caught that (there are others like taprio).

> I agree that all skbs in this list have probably the same drop_reason ?

It seems that way. We will review all the qdiscs to see if there's any
exceptions.
Thanks Eric.

cheers,
jamal
Jamal Hadi Salim Dec. 13, 2023, 6:36 p.m. UTC | #7
On Tue, Dec 12, 2023 at 12:17 PM Jamal Hadi Salim <hadi@mojatatu.com> wrote:
>
> On Tue, Dec 12, 2023 at 11:57 AM 'Eric Dumazet' via kernel issues
> <kernel@mojatatu.com> wrote:
> >
> > On Tue, Dec 12, 2023 at 5:28 PM Jamal Hadi Salim <jhs@mojatatu.com> wrote:
> > >
> >
> > >
> > > So when we looked at this code there was some mystery. It wasnt clear
> > > how to_free could have more than one skb.
> >
> > Some qdisc can free skbs at enqueue() time in  a batch for performance
> > reason (fq_codel_drop() is such an instance)
>
> Ok, makes more sense, should've caught that (there are others like taprio).
>
> > I agree that all skbs in this list have probably the same drop_reason ?
>
> It seems that way. We will review all the qdiscs to see if there's any
> exceptions.

Putting this to rest:
Other than fq codel, the others that deal with multiple skbs due to
gso segments. So the conclusion is:  if we have a bunch in the list
then they all suffer the same fate. So a single reason for the list is
sufficient.

cheers,
jamal
Jakub Kicinski Dec. 13, 2023, 9:08 p.m. UTC | #8
On Wed, 13 Dec 2023 13:36:31 -0500 Jamal Hadi Salim wrote:
> Putting this to rest:
> Other than fq codel, the others that deal with multiple skbs due to
> gso segments. So the conclusion is:  if we have a bunch in the list
> then they all suffer the same fate. So a single reason for the list is
> sufficient.

Alright.

I'm still a bit confused about the cb, tho.

struct qdisc_skb_cb is the state struct.
But we put the drop reason in struct tc_skb_cb.
How does that work. Qdiscs will assume they own all of
qdisc_skb_cb::data ?

Maybe some documentation about the lifetimes of these things
would clarify things?
Jamal Hadi Salim Dec. 14, 2023, 3:31 p.m. UTC | #9
On Wed, Dec 13, 2023 at 4:08 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Wed, 13 Dec 2023 13:36:31 -0500 Jamal Hadi Salim wrote:
> > Putting this to rest:
> > Other than fq codel, the others that deal with multiple skbs due to
> > gso segments. So the conclusion is:  if we have a bunch in the list
> > then they all suffer the same fate. So a single reason for the list is
> > sufficient.
>
> Alright.
>
> I'm still a bit confused about the cb, tho.
>
> struct qdisc_skb_cb is the state struct.

Per packet state within tc though, no? Once it leaves tc whatever sits
in that space cant be trusted to be valid.
To answer your earlier question tcf_result is not available at the
qdisc level (when we call free_skb_list() but cb is and thats why we
used it)

> But we put the drop reason in struct tc_skb_cb.
> How does that work. Qdiscs will assume they own all of
> qdisc_skb_cb::data ?
>

Short answer, yes. Anyone can scribble over that. And multiple
consumers have a food fight going on - but it is expected behavior:
ebpf's skb->cb, cake, fq_codel etc - all use qdisc_skb_cb::data.
Out of the 48B in skb->cb qdisc_skb_cb redefined the first 28B and
left in qdisc_skb_cb::data as free-for-all space. I think,
unfortunately, that is now cast in stone.
Which still leaves us 20 bytes which is now being squatered by
tc_skb_cb where the drop reason was placed. Shit, i just noticed this
is not exclusive - seems like
drivers/net/amt.c is using this space - not sure why it is even using
tc space. I am wondering if we can make it use the 20B scratch pad.
+Cc author Taehee Yoo - it doesnt seem to conflict but strange that it
is considering qdisc_skb_cb

> Maybe some documentation about the lifetimes of these things
> would clarify things?

What text do you think makes sense and where should it go?

cheers,
jamal
Taehee Yoo Dec. 14, 2023, 5:18 p.m. UTC | #10
On 12/15/23 00:31, Jamal Hadi Salim wrote:

Hi Jamal,

 > On Wed, Dec 13, 2023 at 4:08 PM Jakub Kicinski <kuba@kernel.org> wrote:
 >>
 >> On Wed, 13 Dec 2023 13:36:31 -0500 Jamal Hadi Salim wrote:
 >>> Putting this to rest:
 >>> Other than fq codel, the others that deal with multiple skbs due to
 >>> gso segments. So the conclusion is: if we have a bunch in the list
 >>> then they all suffer the same fate. So a single reason for the list is
 >>> sufficient.
 >>
 >> Alright.
 >>
 >> I'm still a bit confused about the cb, tho.
 >>
 >> struct qdisc_skb_cb is the state struct.
 >
 > Per packet state within tc though, no? Once it leaves tc whatever sits
 > in that space cant be trusted to be valid.
 > To answer your earlier question tcf_result is not available at the
 > qdisc level (when we call free_skb_list() but cb is and thats why we
 > used it)
 >
 >> But we put the drop reason in struct tc_skb_cb.
 >> How does that work. Qdiscs will assume they own all of
 >> qdisc_skb_cb::data ?
 >>
 >
 > Short answer, yes. Anyone can scribble over that. And multiple
 > consumers have a food fight going on - but it is expected behavior:
 > ebpf's skb->cb, cake, fq_codel etc - all use qdisc_skb_cb::data.
 > Out of the 48B in skb->cb qdisc_skb_cb redefined the first 28B and
 > left in qdisc_skb_cb::data as free-for-all space. I think,
 > unfortunately, that is now cast in stone.
 > Which still leaves us 20 bytes which is now being squatered by
 > tc_skb_cb where the drop reason was placed. Shit, i just noticed this
 > is not exclusive - seems like
 > drivers/net/amt.c is using this space - not sure why it is even using
 > tc space. I am wondering if we can make it use the 20B scratch pad.
 > +Cc author Taehee Yoo - it doesnt seem to conflict but strange that it
 > is considering qdisc_skb_cb

The reason why amt considers qdisc_skb_cb is to not use CB area the TC 
is using.
When amt driver send igmp/mld packet, it stores tunnel data in CB before 
calling dev_queue_xmit().
Then, it uses that tunnel data from CB in the amt_dev_xmit().
So, amt CB area should not be used by TC, this is the reason why it 
considers qdisc_skb_cb size.
But It looks wrong, it should use tc_skb_cb instead of qdisc_skb_cb to 
fully avoid CB area of TC, right?

 >
 >> Maybe some documentation about the lifetimes of these things
 >> would clarify things?
 >
 > What text do you think makes sense and where should it go?
 >
 > cheers,
 > jamal
Jakub Kicinski Dec. 14, 2023, 6:01 p.m. UTC | #11
On Thu, 14 Dec 2023 10:31:24 -0500 Jamal Hadi Salim wrote:
> > I'm still a bit confused about the cb, tho.
> >
> > struct qdisc_skb_cb is the state struct.  
> 
> Per packet state within tc though, no? Once it leaves tc whatever sits
> in that space cant be trusted to be valid.
> To answer your earlier question tcf_result is not available at the
> qdisc level (when we call free_skb_list() but cb is and thats why we
> used it)
> 
> > But we put the drop reason in struct tc_skb_cb.
> > How does that work. Qdiscs will assume they own all of
> > qdisc_skb_cb::data ?
> >  
> 
> Short answer, yes. Anyone can scribble over that. And multiple
> consumers have a food fight going on - but it is expected behavior:
> ebpf's skb->cb, cake, fq_codel etc - all use qdisc_skb_cb::data.
> Out of the 48B in skb->cb qdisc_skb_cb redefined the first 28B and
> left in qdisc_skb_cb::data as free-for-all space. I think,
> unfortunately, that is now cast in stone.
> Which still leaves us 20 bytes which is now being squatered by
> tc_skb_cb where the drop reason was placed.

Okay I got it now, for some reason I thought the new fields in
struct tc_skb_cb overlay the data[] in struct qdisc_skb_cb...
Jamal Hadi Salim Dec. 15, 2023, 2:42 p.m. UTC | #12
On Thu, Dec 14, 2023 at 12:18 PM Taehee Yoo <ap420073@gmail.com> wrote:
>
> On 12/15/23 00:31, Jamal Hadi Salim wrote:
>
> Hi Jamal,
>
>  > On Wed, Dec 13, 2023 at 4:08 PM Jakub Kicinski <kuba@kernel.org> wrote:
>  >>
>  >> On Wed, 13 Dec 2023 13:36:31 -0500 Jamal Hadi Salim wrote:
>  >>> Putting this to rest:
>  >>> Other than fq codel, the others that deal with multiple skbs due to
>  >>> gso segments. So the conclusion is: if we have a bunch in the list
>  >>> then they all suffer the same fate. So a single reason for the list is
>  >>> sufficient.
>  >>
>  >> Alright.
>  >>
>  >> I'm still a bit confused about the cb, tho.
>  >>
>  >> struct qdisc_skb_cb is the state struct.
>  >
>  > Per packet state within tc though, no? Once it leaves tc whatever sits
>  > in that space cant be trusted to be valid.
>  > To answer your earlier question tcf_result is not available at the
>  > qdisc level (when we call free_skb_list() but cb is and thats why we
>  > used it)
>  >
>  >> But we put the drop reason in struct tc_skb_cb.
>  >> How does that work. Qdiscs will assume they own all of
>  >> qdisc_skb_cb::data ?
>  >>
>  >
>  > Short answer, yes. Anyone can scribble over that. And multiple
>  > consumers have a food fight going on - but it is expected behavior:
>  > ebpf's skb->cb, cake, fq_codel etc - all use qdisc_skb_cb::data.
>  > Out of the 48B in skb->cb qdisc_skb_cb redefined the first 28B and
>  > left in qdisc_skb_cb::data as free-for-all space. I think,
>  > unfortunately, that is now cast in stone.
>  > Which still leaves us 20 bytes which is now being squatered by
>  > tc_skb_cb where the drop reason was placed. Shit, i just noticed this
>  > is not exclusive - seems like
>  > drivers/net/amt.c is using this space - not sure why it is even using
>  > tc space. I am wondering if we can make it use the 20B scratch pad.
>  > +Cc author Taehee Yoo - it doesnt seem to conflict but strange that it
>  > is considering qdisc_skb_cb
>
> The reason why amt considers qdisc_skb_cb is to not use CB area the TC
> is using.
> When amt driver send igmp/mld packet, it stores tunnel data in CB before
> calling dev_queue_xmit().
> Then, it uses that tunnel data from CB in the amt_dev_xmit().
> So, amt CB area should not be used by TC, this is the reason why it
> considers qdisc_skb_cb size.
> But It looks wrong, it should use tc_skb_cb instead of qdisc_skb_cb to
> fully avoid CB area of TC, right?


Yeah, that doesnt seem safe if you are storing state expecting it to
be intact afterwards - tc will definitely overwrite parts of it today.
See this:

struct qdisc_skb_cb {
        struct {
                unsigned int            pkt_len;
                u16                     slave_dev_queue_mapping;
                u16                     tc_classid;
        };
#define QDISC_CB_PRIV_LEN 20
        unsigned char           data[QDISC_CB_PRIV_LEN];
};

struct tc_skb_cb {
        struct qdisc_skb_cb qdisc_cb;

        u16 mru;
        u8 post_ct:1;
        u8 post_ct_snat:1;
        u8 post_ct_dnat:1;
        u16 zone; /* Only valid if post_ct = true */
};

tc_skb_cb->mru etc are writting over the area you are using.

The rule is you cant trust skb->cb content after it goes out of your "domain".

cheers,
jamal

>  >
>  >> Maybe some documentation about the lifetimes of these things
>  >> would clarify things?
>  >
>  > What text do you think makes sense and where should it go?
>  >
>  > cheers,
>  > jamal
diff mbox series

Patch

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 761e4500cca0..f308e8268651 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -154,22 +154,6 @@  __cls_set_class(unsigned long *clp, unsigned long cl)
 	return xchg(clp, cl);
 }
 
-struct tc_skb_cb;
-
-static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb);
-
-static inline enum skb_drop_reason
-tcf_get_drop_reason(const struct sk_buff *skb)
-{
-	return tc_skb_cb(skb)->drop_reason;
-}
-
-static inline void tcf_set_drop_reason(const struct sk_buff *skb,
-				       enum skb_drop_reason reason)
-{
-	tc_skb_cb(skb)->drop_reason = reason;
-}
-
 static inline void
 __tcf_bind_filter(struct Qdisc *q, struct tcf_result *r, unsigned long base)
 {
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 9b559aa5c079..1e200d9a066d 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -275,25 +275,6 @@  static inline void skb_txtime_consumed(struct sk_buff *skb)
 	skb->tstamp = ktime_set(0, 0);
 }
 
-struct tc_skb_cb {
-	struct qdisc_skb_cb qdisc_cb;
-	u32 drop_reason;
-
-	u16 zone; /* Only valid if post_ct = true */
-	u16 mru;
-	u8 post_ct:1;
-	u8 post_ct_snat:1;
-	u8 post_ct_dnat:1;
-};
-
-static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
-{
-	struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb;
-
-	BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
-	return cb;
-}
-
 static inline bool tc_qdisc_stats_dump(struct Qdisc *sch,
 				       unsigned long cl,
 				       struct qdisc_walker *arg)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c499b56bb215..1d70c2c1572f 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -1036,6 +1036,37 @@  static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
 	return skb;
 }
 
+struct tc_skb_cb {
+	struct qdisc_skb_cb qdisc_cb;
+	u32 drop_reason;
+
+	u16 zone; /* Only valid if post_ct = true */
+	u16 mru;
+	u8 post_ct:1;
+	u8 post_ct_snat:1;
+	u8 post_ct_dnat:1;
+};
+
+static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
+{
+	struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb;
+
+	BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
+	return cb;
+}
+
+static inline enum skb_drop_reason
+tcf_get_drop_reason(const struct sk_buff *skb)
+{
+	return tc_skb_cb(skb)->drop_reason;
+}
+
+static inline void tcf_set_drop_reason(const struct sk_buff *skb,
+				       enum skb_drop_reason reason)
+{
+	tc_skb_cb(skb)->drop_reason = reason;
+}
+
 /* Instead of calling kfree_skb() while root qdisc lock is held,
  * queue the skb for future freeing at end of __dev_xmit_skb()
  */
diff --git a/net/core/dev.c b/net/core/dev.c
index 4b84b72ebae8..f38c928a34aa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3753,6 +3753,8 @@  static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 
 	qdisc_calculate_pkt_len(skb, q);
 
+	tcf_set_drop_reason(skb, SKB_DROP_REASON_QDISC_DROP);
+
 	if (q->flags & TCQ_F_NOLOCK) {
 		if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) &&
 		    qdisc_run_begin(q)) {
@@ -3782,7 +3784,7 @@  static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 no_lock_out:
 		if (unlikely(to_free))
 			kfree_skb_list_reason(to_free,
-					      SKB_DROP_REASON_QDISC_DROP);
+					      tcf_get_drop_reason(to_free));
 		return rc;
 	}
 
@@ -3837,7 +3839,8 @@  static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 	}
 	spin_unlock(root_lock);
 	if (unlikely(to_free))
-		kfree_skb_list_reason(to_free, SKB_DROP_REASON_QDISC_DROP);
+		kfree_skb_list_reason(to_free,
+				      tcf_get_drop_reason(to_free));
 	if (unlikely(contended))
 		spin_unlock(&q->busylock);
 	return rc;