diff mbox series

[net-next,v2,1/3] net/sched: Introduce tc block netdev tracking infra

Message ID 20230819163515.2266246-2-victor@mojatatu.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series net/sched: Introduce tc block ports tracking and use | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 2742 this patch: 2742
netdev/cc_maintainers success CCed 8 of 8 maintainers
netdev/build_clang success Errors and warnings before: 1502 this patch: 1502
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 2789 this patch: 2789
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Victor Nogueira Aug. 19, 2023, 4:35 p.m. UTC
The tc block is a collection of netdevs/ports which allow qdiscs to share
filter block instances (as opposed to the traditional tc filter per port).
Example:
$ tc qdisc add dev ens7 ingress block 22
$ tc qdisc add dev ens8 ingress block 22

Now we can add a filter using the block index:
$ tc filter add block 22 protocol ip pref 25 \
  flower dst_ip 192.168.0.0/16 action drop

Up to this point, the block is unaware of its ports. This patch fixes that
and makes the tc block ports available to the datapath as well as control
path on offloading.

Suggested-by: Jiri Pirko <jiri@nvidia.com>
Co-developed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Co-developed-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
---
 include/net/sch_generic.h |  4 ++
 net/sched/cls_api.c       |  1 +
 net/sched/sch_api.c       | 79 +++++++++++++++++++++++++++++++++++++--
 net/sched/sch_generic.c   | 34 ++++++++++++++++-
 4 files changed, 112 insertions(+), 6 deletions(-)

Comments

Vlad Buslov Aug. 21, 2023, 7:12 p.m. UTC | #1
On Sat 19 Aug 2023 at 13:35, Victor Nogueira <victor@mojatatu.com> wrote:
> The tc block is a collection of netdevs/ports which allow qdiscs to share
> filter block instances (as opposed to the traditional tc filter per port).
> Example:
> $ tc qdisc add dev ens7 ingress block 22
> $ tc qdisc add dev ens8 ingress block 22
>
> Now we can add a filter using the block index:
> $ tc filter add block 22 protocol ip pref 25 \
>   flower dst_ip 192.168.0.0/16 action drop
>
> Up to this point, the block is unaware of its ports. This patch fixes that
> and makes the tc block ports available to the datapath as well as control
> path on offloading.
>
> Suggested-by: Jiri Pirko <jiri@nvidia.com>
> Co-developed-by: Jamal Hadi Salim <jhs@mojatatu.com>
> Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
> Co-developed-by: Pedro Tammela <pctammela@mojatatu.com>
> Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
> Signed-off-by: Victor Nogueira <victor@mojatatu.com>
> ---
>  include/net/sch_generic.h |  4 ++
>  net/sched/cls_api.c       |  1 +
>  net/sched/sch_api.c       | 79 +++++++++++++++++++++++++++++++++++++--
>  net/sched/sch_generic.c   | 34 ++++++++++++++++-
>  4 files changed, 112 insertions(+), 6 deletions(-)
>
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index e92f73bb3198..824a0ecb5afc 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -19,6 +19,7 @@
>  #include <net/gen_stats.h>
>  #include <net/rtnetlink.h>
>  #include <net/flow_offload.h>
> +#include <linux/xarray.h>
>  
>  struct Qdisc_ops;
>  struct qdisc_walker;
> @@ -126,6 +127,8 @@ struct Qdisc {
>  
>  	struct rcu_head		rcu;
>  	netdevice_tracker	dev_tracker;
> +	netdevice_tracker	in_block_tracker;
> +	netdevice_tracker	eg_block_tracker;
>  	/* private data */
>  	long privdata[] ____cacheline_aligned;
>  };
> @@ -458,6 +461,7 @@ struct tcf_chain {
>  };
>  
>  struct tcf_block {
> +	struct xarray ports; /* datapath accessible */
>  	/* Lock protects tcf_block and lifetime-management data of chains
>  	 * attached to the block (refcnt, action_refcnt, explicitly_created).
>  	 */
> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> index a193cc7b3241..a976792ef02f 100644
> --- a/net/sched/cls_api.c
> +++ b/net/sched/cls_api.c
> @@ -1003,6 +1003,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
>  	refcount_set(&block->refcnt, 1);
>  	block->net = net;
>  	block->index = block_index;
> +	xa_init(&block->ports);

Missing dual call to xa_destroy() for this.

>  
>  	/* Don't store q pointer for blocks which are shared */
>  	if (!tcf_block_shared(block))
> diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
> index aa6b1fe65151..6c0c220cdb21 100644
> --- a/net/sched/sch_api.c
> +++ b/net/sched/sch_api.c
> @@ -1180,6 +1180,71 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
>  	return 0;
>  }
>  
> +static void qdisc_block_undo_set(struct Qdisc *sch, struct nlattr **tca)
> +{
> +	if (tca[TCA_INGRESS_BLOCK])
> +		sch->ops->ingress_block_set(sch, 0);
> +
> +	if (tca[TCA_EGRESS_BLOCK])
> +		sch->ops->egress_block_set(sch, 0);
> +}
> +
> +static int qdisc_block_add_dev(struct Qdisc *sch, struct net_device *dev,
> +			       struct nlattr **tca,
> +			       struct netlink_ext_ack *extack)
> +{
> +	const struct Qdisc_class_ops *cl_ops = sch->ops->cl_ops;
> +	struct tcf_block *in_block = NULL;
> +	struct tcf_block *eg_block = NULL;
> +	unsigned long cl = 0;
> +	int err;
> +
> +	if (tca[TCA_INGRESS_BLOCK]) {
> +		/* works for both ingress and clsact */
> +		cl = TC_H_MIN_INGRESS;
> +		in_block = cl_ops->tcf_block(sch, cl, NULL);
> +		if (!in_block) {
> +			NL_SET_ERR_MSG(extack, "Shared ingress block missing");
> +			return -EINVAL;
> +		}
> +
> +		err = xa_insert(&in_block->ports, dev->ifindex, dev, GFP_KERNEL);
> +		if (err) {
> +			NL_SET_ERR_MSG(extack, "ingress block dev insert failed");
> +			return err;
> +		}
> +
> +		netdev_hold(dev, &sch->in_block_tracker, GFP_KERNEL);
> +	}
> +
> +	if (tca[TCA_EGRESS_BLOCK]) {
> +		cl = TC_H_MIN_EGRESS;
> +		eg_block = cl_ops->tcf_block(sch, cl, NULL);
> +		if (!eg_block) {
> +			NL_SET_ERR_MSG(extack, "Shared egress block missing");
> +			err = -EINVAL;
> +			goto err_out;
> +		}
> +
> +		err = xa_insert(&eg_block->ports, dev->ifindex, dev, GFP_KERNEL);
> +		if (err) {
> +			netdev_put(dev, &sch->eg_block_tracker);
> +			NL_SET_ERR_MSG(extack, "Egress block dev insert failed");
> +			goto err_out;
> +		}
> +		netdev_hold(dev, &sch->eg_block_tracker, GFP_KERNEL);
> +	}
> +
> +	return 0;
> +err_out:
> +	if (in_block) {
> +		xa_erase(&in_block->ports, dev->ifindex);
> +		netdev_put(dev, &sch->in_block_tracker);
> +		NL_SET_ERR_MSG(extack, "ingress block dev insert failed");
> +	}
> +	return err;
> +}
> +
>  static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
>  				   struct netlink_ext_ack *extack)
>  {
> @@ -1270,7 +1335,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
>  	sch = qdisc_alloc(dev_queue, ops, extack);
>  	if (IS_ERR(sch)) {
>  		err = PTR_ERR(sch);
> -		goto err_out2;
> +		goto err_out1;
>  	}
>  
>  	sch->parent = parent;
> @@ -1289,7 +1354,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
>  			if (handle == 0) {
>  				NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
>  				err = -ENOSPC;
> -				goto err_out3;
> +				goto err_out2;
>  			}
>  		}
>  		if (!netif_is_multiqueue(dev))
> @@ -1311,7 +1376,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
>  
>  	err = qdisc_block_indexes_set(sch, tca, extack);
>  	if (err)
> -		goto err_out3;
> +		goto err_out2;
>  
>  	if (tca[TCA_STAB]) {
>  		stab = qdisc_get_stab(tca[TCA_STAB], extack);
> @@ -1350,6 +1415,10 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
>  	qdisc_hash_add(sch, false);
>  	trace_qdisc_create(ops, dev, parent);
>  
> +	err = qdisc_block_add_dev(sch, dev, tca, extack);
> +	if (err)
> +		goto err_out4;
> +
>  	return sch;
>  
>  err_out4:
> @@ -1360,9 +1429,11 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
>  		ops->destroy(sch);
>  	qdisc_put_stab(rtnl_dereference(sch->stab));
>  err_out3:
> +	qdisc_block_undo_set(sch, tca);

Is this a bugfix? This new call is for all sites that jump to
err_out{3|4} even though you only added new code to the end of the
function.

> +err_out2:
>  	netdev_put(dev, &sch->dev_tracker);
>  	qdisc_free(sch);
> -err_out2:
> +err_out1:
>  	module_put(ops->owner);
>  err_out:
>  	*errp = err;
> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> index 5d7e23f4cc0e..0fb51fd6f01e 100644
> --- a/net/sched/sch_generic.c
> +++ b/net/sched/sch_generic.c
> @@ -1048,7 +1048,12 @@ static void qdisc_free_cb(struct rcu_head *head)
>  
>  static void __qdisc_destroy(struct Qdisc *qdisc)
>  {
> -	const struct Qdisc_ops  *ops = qdisc->ops;
> +	struct net_device *dev = qdisc_dev(qdisc);
> +	const struct Qdisc_ops *ops = qdisc->ops;
> +	const struct Qdisc_class_ops *cops;
> +	struct tcf_block *block;
> +	unsigned long cl;
> +	u32 block_index;
>  
>  #ifdef CONFIG_NET_SCHED
>  	qdisc_hash_del(qdisc);
> @@ -1059,11 +1064,36 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
>  
>  	qdisc_reset(qdisc);
>  
> +	cops = ops->cl_ops;
> +	if (ops->ingress_block_get) {
> +		block_index = ops->ingress_block_get(qdisc);
> +		if (block_index) {
> +			cl = TC_H_MIN_INGRESS;
> +			block = cops->tcf_block(qdisc, cl, NULL);
> +			if (block) {
> +				if (xa_erase(&block->ports, dev->ifindex))
> +					netdev_put(dev, &qdisc->in_block_tracker);
> +			}
> +		}
> +	}
> +
> +	if (ops->egress_block_get) {
> +		block_index = ops->egress_block_get(qdisc);
> +		if (block_index) {
> +			cl = TC_H_MIN_EGRESS;
> +			block = cops->tcf_block(qdisc, cl, NULL);
> +			if (block) {
> +				if (xa_erase(&block->ports, dev->ifindex))
> +					netdev_put(dev, &qdisc->eg_block_tracker);
> +			}
> +		}
> +	}
> +
>  	if (ops->destroy)
>  		ops->destroy(qdisc);
>  
>  	module_put(ops->owner);
> -	netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker);
> +	netdev_put(dev, &qdisc->dev_tracker);
>  
>  	trace_qdisc_destroy(qdisc);
Jamal Hadi Salim Aug. 24, 2023, 2:05 p.m. UTC | #2
On Mon, Aug 21, 2023 at 3:18 PM Vlad Buslov <vladbu@nvidia.com> wrote:
>
>
> On Sat 19 Aug 2023 at 13:35, Victor Nogueira <victor@mojatatu.com> wrote:
> > The tc block is a collection of netdevs/ports which allow qdiscs to share
> > filter block instances (as opposed to the traditional tc filter per port).
> > Example:
> > $ tc qdisc add dev ens7 ingress block 22
> > $ tc qdisc add dev ens8 ingress block 22
> >
> > Now we can add a filter using the block index:
> > $ tc filter add block 22 protocol ip pref 25 \
> >   flower dst_ip 192.168.0.0/16 action drop
> >
> > Up to this point, the block is unaware of its ports. This patch fixes that
> > and makes the tc block ports available to the datapath as well as control
> > path on offloading.
> >
> > Suggested-by: Jiri Pirko <jiri@nvidia.com>
> > Co-developed-by: Jamal Hadi Salim <jhs@mojatatu.com>
> > Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
> > Co-developed-by: Pedro Tammela <pctammela@mojatatu.com>
> > Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
> > Signed-off-by: Victor Nogueira <victor@mojatatu.com>
> > ---
> >  include/net/sch_generic.h |  4 ++
> >  net/sched/cls_api.c       |  1 +
> >  net/sched/sch_api.c       | 79 +++++++++++++++++++++++++++++++++++++--
> >  net/sched/sch_generic.c   | 34 ++++++++++++++++-
> >  4 files changed, 112 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> > index e92f73bb3198..824a0ecb5afc 100644
> > --- a/include/net/sch_generic.h
> > +++ b/include/net/sch_generic.h
> > @@ -19,6 +19,7 @@
> >  #include <net/gen_stats.h>
> >  #include <net/rtnetlink.h>
> >  #include <net/flow_offload.h>
> > +#include <linux/xarray.h>
> >
> >  struct Qdisc_ops;
> >  struct qdisc_walker;
> > @@ -126,6 +127,8 @@ struct Qdisc {
> >
> >       struct rcu_head         rcu;
> >       netdevice_tracker       dev_tracker;
> > +     netdevice_tracker       in_block_tracker;
> > +     netdevice_tracker       eg_block_tracker;
> >       /* private data */
> >       long privdata[] ____cacheline_aligned;
> >  };
> > @@ -458,6 +461,7 @@ struct tcf_chain {
> >  };
> >
> >  struct tcf_block {
> > +     struct xarray ports; /* datapath accessible */
> >       /* Lock protects tcf_block and lifetime-management data of chains
> >        * attached to the block (refcnt, action_refcnt, explicitly_created).
> >        */
> > diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> > index a193cc7b3241..a976792ef02f 100644
> > --- a/net/sched/cls_api.c
> > +++ b/net/sched/cls_api.c
> > @@ -1003,6 +1003,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
> >       refcount_set(&block->refcnt, 1);
> >       block->net = net;
> >       block->index = block_index;
> > +     xa_init(&block->ports);
>
> Missing dual call to xa_destroy() for this.
>

Good catch - that should go in block destroy. I am not sure why
kmemleak test didnt catch this.

> >
> >       /* Don't store q pointer for blocks which are shared */
> >       if (!tcf_block_shared(block))
> > diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
> > index aa6b1fe65151..6c0c220cdb21 100644
> > --- a/net/sched/sch_api.c
> > +++ b/net/sched/sch_api.c
> > @@ -1180,6 +1180,71 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
> >       return 0;
> >  }
> >
> > +static void qdisc_block_undo_set(struct Qdisc *sch, struct nlattr **tca)
> > +{
> > +     if (tca[TCA_INGRESS_BLOCK])
> > +             sch->ops->ingress_block_set(sch, 0);
> > +
> > +     if (tca[TCA_EGRESS_BLOCK])
> > +             sch->ops->egress_block_set(sch, 0);
> > +}
> > +
> > +static int qdisc_block_add_dev(struct Qdisc *sch, struct net_device *dev,
> > +                            struct nlattr **tca,
> > +                            struct netlink_ext_ack *extack)
> > +{
> > +     const struct Qdisc_class_ops *cl_ops = sch->ops->cl_ops;
> > +     struct tcf_block *in_block = NULL;
> > +     struct tcf_block *eg_block = NULL;
> > +     unsigned long cl = 0;
> > +     int err;
> > +
> > +     if (tca[TCA_INGRESS_BLOCK]) {
> > +             /* works for both ingress and clsact */
> > +             cl = TC_H_MIN_INGRESS;
> > +             in_block = cl_ops->tcf_block(sch, cl, NULL);
> > +             if (!in_block) {
> > +                     NL_SET_ERR_MSG(extack, "Shared ingress block missing");
> > +                     return -EINVAL;
> > +             }
> > +
> > +             err = xa_insert(&in_block->ports, dev->ifindex, dev, GFP_KERNEL);
> > +             if (err) {
> > +                     NL_SET_ERR_MSG(extack, "ingress block dev insert failed");
> > +                     return err;
> > +             }
> > +
> > +             netdev_hold(dev, &sch->in_block_tracker, GFP_KERNEL);
> > +     }
> > +
> > +     if (tca[TCA_EGRESS_BLOCK]) {
> > +             cl = TC_H_MIN_EGRESS;
> > +             eg_block = cl_ops->tcf_block(sch, cl, NULL);
> > +             if (!eg_block) {
> > +                     NL_SET_ERR_MSG(extack, "Shared egress block missing");
> > +                     err = -EINVAL;
> > +                     goto err_out;
> > +             }
> > +
> > +             err = xa_insert(&eg_block->ports, dev->ifindex, dev, GFP_KERNEL);
> > +             if (err) {
> > +                     netdev_put(dev, &sch->eg_block_tracker);
> > +                     NL_SET_ERR_MSG(extack, "Egress block dev insert failed");
> > +                     goto err_out;
> > +             }
> > +             netdev_hold(dev, &sch->eg_block_tracker, GFP_KERNEL);
> > +     }
> > +
> > +     return 0;
> > +err_out:
> > +     if (in_block) {
> > +             xa_erase(&in_block->ports, dev->ifindex);
> > +             netdev_put(dev, &sch->in_block_tracker);
> > +             NL_SET_ERR_MSG(extack, "ingress block dev insert failed");
> > +     }
> > +     return err;
> > +}
> > +
> >  static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
> >                                  struct netlink_ext_ack *extack)
> >  {
> > @@ -1270,7 +1335,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> >       sch = qdisc_alloc(dev_queue, ops, extack);
> >       if (IS_ERR(sch)) {
> >               err = PTR_ERR(sch);
> > -             goto err_out2;
> > +             goto err_out1;
> >       }
> >
> >       sch->parent = parent;
> > @@ -1289,7 +1354,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> >                       if (handle == 0) {
> >                               NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
> >                               err = -ENOSPC;
> > -                             goto err_out3;
> > +                             goto err_out2;
> >                       }
> >               }
> >               if (!netif_is_multiqueue(dev))
> > @@ -1311,7 +1376,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> >
> >       err = qdisc_block_indexes_set(sch, tca, extack);
> >       if (err)
> > -             goto err_out3;
> > +             goto err_out2;
> >
> >       if (tca[TCA_STAB]) {
> >               stab = qdisc_get_stab(tca[TCA_STAB], extack);
> > @@ -1350,6 +1415,10 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> >       qdisc_hash_add(sch, false);
> >       trace_qdisc_create(ops, dev, parent);
> >
> > +     err = qdisc_block_add_dev(sch, dev, tca, extack);
> > +     if (err)
> > +             goto err_out4;
> > +
> >       return sch;
> >
> >  err_out4:
> > @@ -1360,9 +1429,11 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> >               ops->destroy(sch);
> >       qdisc_put_stab(rtnl_dereference(sch->stab));
> >  err_out3:
> > +     qdisc_block_undo_set(sch, tca);
>
> Is this a bugfix? This new call is for all sites that jump to
> err_out{3|4} even though you only added new code to the end of the
> function.

I guess it could be labelled as a "bug fix" - the existing code did
not "rewind" the block ID setting when you have attributes
TCA_EGRESS/INGRESS_BLOCK and the blockid is set and then something
later on fails down the codepath..
Maybe need to separate out this into a different patch or even send it
as a bug fix.

cheers,
jamal
> > +err_out2:
> >       netdev_put(dev, &sch->dev_tracker);
> >       qdisc_free(sch);
> > -err_out2:
> > +err_out1:
> >       module_put(ops->owner);
> >  err_out:
> >       *errp = err;
> > diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> > index 5d7e23f4cc0e..0fb51fd6f01e 100644
> > --- a/net/sched/sch_generic.c
> > +++ b/net/sched/sch_generic.c
> > @@ -1048,7 +1048,12 @@ static void qdisc_free_cb(struct rcu_head *head)
> >
> >  static void __qdisc_destroy(struct Qdisc *qdisc)
> >  {
> > -     const struct Qdisc_ops  *ops = qdisc->ops;
> > +     struct net_device *dev = qdisc_dev(qdisc);
> > +     const struct Qdisc_ops *ops = qdisc->ops;
> > +     const struct Qdisc_class_ops *cops;
> > +     struct tcf_block *block;
> > +     unsigned long cl;
> > +     u32 block_index;
> >
> >  #ifdef CONFIG_NET_SCHED
> >       qdisc_hash_del(qdisc);
> > @@ -1059,11 +1064,36 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
> >
> >       qdisc_reset(qdisc);
> >
> > +     cops = ops->cl_ops;
> > +     if (ops->ingress_block_get) {
> > +             block_index = ops->ingress_block_get(qdisc);
> > +             if (block_index) {
> > +                     cl = TC_H_MIN_INGRESS;
> > +                     block = cops->tcf_block(qdisc, cl, NULL);
> > +                     if (block) {
> > +                             if (xa_erase(&block->ports, dev->ifindex))
> > +                                     netdev_put(dev, &qdisc->in_block_tracker);
> > +                     }
> > +             }
> > +     }
> > +
> > +     if (ops->egress_block_get) {
> > +             block_index = ops->egress_block_get(qdisc);
> > +             if (block_index) {
> > +                     cl = TC_H_MIN_EGRESS;
> > +                     block = cops->tcf_block(qdisc, cl, NULL);
> > +                     if (block) {
> > +                             if (xa_erase(&block->ports, dev->ifindex))
> > +                                     netdev_put(dev, &qdisc->eg_block_tracker);
> > +                     }
> > +             }
> > +     }
> > +
> >       if (ops->destroy)
> >               ops->destroy(qdisc);
> >
> >       module_put(ops->owner);
> > -     netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker);
> > +     netdev_put(dev, &qdisc->dev_tracker);
> >
> >       trace_qdisc_destroy(qdisc);
>
diff mbox series

Patch

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e92f73bb3198..824a0ecb5afc 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -19,6 +19,7 @@ 
 #include <net/gen_stats.h>
 #include <net/rtnetlink.h>
 #include <net/flow_offload.h>
+#include <linux/xarray.h>
 
 struct Qdisc_ops;
 struct qdisc_walker;
@@ -126,6 +127,8 @@  struct Qdisc {
 
 	struct rcu_head		rcu;
 	netdevice_tracker	dev_tracker;
+	netdevice_tracker	in_block_tracker;
+	netdevice_tracker	eg_block_tracker;
 	/* private data */
 	long privdata[] ____cacheline_aligned;
 };
@@ -458,6 +461,7 @@  struct tcf_chain {
 };
 
 struct tcf_block {
+	struct xarray ports; /* datapath accessible */
 	/* Lock protects tcf_block and lifetime-management data of chains
 	 * attached to the block (refcnt, action_refcnt, explicitly_created).
 	 */
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index a193cc7b3241..a976792ef02f 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1003,6 +1003,7 @@  static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
 	refcount_set(&block->refcnt, 1);
 	block->net = net;
 	block->index = block_index;
+	xa_init(&block->ports);
 
 	/* Don't store q pointer for blocks which are shared */
 	if (!tcf_block_shared(block))
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index aa6b1fe65151..6c0c220cdb21 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1180,6 +1180,71 @@  static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 	return 0;
 }
 
+static void qdisc_block_undo_set(struct Qdisc *sch, struct nlattr **tca)
+{
+	if (tca[TCA_INGRESS_BLOCK])
+		sch->ops->ingress_block_set(sch, 0);
+
+	if (tca[TCA_EGRESS_BLOCK])
+		sch->ops->egress_block_set(sch, 0);
+}
+
+static int qdisc_block_add_dev(struct Qdisc *sch, struct net_device *dev,
+			       struct nlattr **tca,
+			       struct netlink_ext_ack *extack)
+{
+	const struct Qdisc_class_ops *cl_ops = sch->ops->cl_ops;
+	struct tcf_block *in_block = NULL;
+	struct tcf_block *eg_block = NULL;
+	unsigned long cl = 0;
+	int err;
+
+	if (tca[TCA_INGRESS_BLOCK]) {
+		/* works for both ingress and clsact */
+		cl = TC_H_MIN_INGRESS;
+		in_block = cl_ops->tcf_block(sch, cl, NULL);
+		if (!in_block) {
+			NL_SET_ERR_MSG(extack, "Shared ingress block missing");
+			return -EINVAL;
+		}
+
+		err = xa_insert(&in_block->ports, dev->ifindex, dev, GFP_KERNEL);
+		if (err) {
+			NL_SET_ERR_MSG(extack, "ingress block dev insert failed");
+			return err;
+		}
+
+		netdev_hold(dev, &sch->in_block_tracker, GFP_KERNEL);
+	}
+
+	if (tca[TCA_EGRESS_BLOCK]) {
+		cl = TC_H_MIN_EGRESS;
+		eg_block = cl_ops->tcf_block(sch, cl, NULL);
+		if (!eg_block) {
+			NL_SET_ERR_MSG(extack, "Shared egress block missing");
+			err = -EINVAL;
+			goto err_out;
+		}
+
+		err = xa_insert(&eg_block->ports, dev->ifindex, dev, GFP_KERNEL);
+		if (err) {
+			netdev_put(dev, &sch->eg_block_tracker);
+			NL_SET_ERR_MSG(extack, "Egress block dev insert failed");
+			goto err_out;
+		}
+		netdev_hold(dev, &sch->eg_block_tracker, GFP_KERNEL);
+	}
+
+	return 0;
+err_out:
+	if (in_block) {
+		xa_erase(&in_block->ports, dev->ifindex);
+		netdev_put(dev, &sch->in_block_tracker);
+		NL_SET_ERR_MSG(extack, "ingress block dev insert failed");
+	}
+	return err;
+}
+
 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
 				   struct netlink_ext_ack *extack)
 {
@@ -1270,7 +1335,7 @@  static struct Qdisc *qdisc_create(struct net_device *dev,
 	sch = qdisc_alloc(dev_queue, ops, extack);
 	if (IS_ERR(sch)) {
 		err = PTR_ERR(sch);
-		goto err_out2;
+		goto err_out1;
 	}
 
 	sch->parent = parent;
@@ -1289,7 +1354,7 @@  static struct Qdisc *qdisc_create(struct net_device *dev,
 			if (handle == 0) {
 				NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
 				err = -ENOSPC;
-				goto err_out3;
+				goto err_out2;
 			}
 		}
 		if (!netif_is_multiqueue(dev))
@@ -1311,7 +1376,7 @@  static struct Qdisc *qdisc_create(struct net_device *dev,
 
 	err = qdisc_block_indexes_set(sch, tca, extack);
 	if (err)
-		goto err_out3;
+		goto err_out2;
 
 	if (tca[TCA_STAB]) {
 		stab = qdisc_get_stab(tca[TCA_STAB], extack);
@@ -1350,6 +1415,10 @@  static struct Qdisc *qdisc_create(struct net_device *dev,
 	qdisc_hash_add(sch, false);
 	trace_qdisc_create(ops, dev, parent);
 
+	err = qdisc_block_add_dev(sch, dev, tca, extack);
+	if (err)
+		goto err_out4;
+
 	return sch;
 
 err_out4:
@@ -1360,9 +1429,11 @@  static struct Qdisc *qdisc_create(struct net_device *dev,
 		ops->destroy(sch);
 	qdisc_put_stab(rtnl_dereference(sch->stab));
 err_out3:
+	qdisc_block_undo_set(sch, tca);
+err_out2:
 	netdev_put(dev, &sch->dev_tracker);
 	qdisc_free(sch);
-err_out2:
+err_out1:
 	module_put(ops->owner);
 err_out:
 	*errp = err;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5d7e23f4cc0e..0fb51fd6f01e 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1048,7 +1048,12 @@  static void qdisc_free_cb(struct rcu_head *head)
 
 static void __qdisc_destroy(struct Qdisc *qdisc)
 {
-	const struct Qdisc_ops  *ops = qdisc->ops;
+	struct net_device *dev = qdisc_dev(qdisc);
+	const struct Qdisc_ops *ops = qdisc->ops;
+	const struct Qdisc_class_ops *cops;
+	struct tcf_block *block;
+	unsigned long cl;
+	u32 block_index;
 
 #ifdef CONFIG_NET_SCHED
 	qdisc_hash_del(qdisc);
@@ -1059,11 +1064,36 @@  static void __qdisc_destroy(struct Qdisc *qdisc)
 
 	qdisc_reset(qdisc);
 
+	cops = ops->cl_ops;
+	if (ops->ingress_block_get) {
+		block_index = ops->ingress_block_get(qdisc);
+		if (block_index) {
+			cl = TC_H_MIN_INGRESS;
+			block = cops->tcf_block(qdisc, cl, NULL);
+			if (block) {
+				if (xa_erase(&block->ports, dev->ifindex))
+					netdev_put(dev, &qdisc->in_block_tracker);
+			}
+		}
+	}
+
+	if (ops->egress_block_get) {
+		block_index = ops->egress_block_get(qdisc);
+		if (block_index) {
+			cl = TC_H_MIN_EGRESS;
+			block = cops->tcf_block(qdisc, cl, NULL);
+			if (block) {
+				if (xa_erase(&block->ports, dev->ifindex))
+					netdev_put(dev, &qdisc->eg_block_tracker);
+			}
+		}
+	}
+
 	if (ops->destroy)
 		ops->destroy(qdisc);
 
 	module_put(ops->owner);
-	netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker);
+	netdev_put(dev, &qdisc->dev_tracker);
 
 	trace_qdisc_destroy(qdisc);