[rdma-next,v1] IB/cma: Honor traffic class from lower netdevice for RoCE
diff mbox series

Message ID 20191015072058.17347-1-leon@kernel.org
State Accepted
Delegated to: Jason Gunthorpe
Headers show
Series
  • [rdma-next,v1] IB/cma: Honor traffic class from lower netdevice for RoCE
Related show

Commit Message

Leon Romanovsky Oct. 15, 2019, 7:20 a.m. UTC
From: Parav Pandit <parav@mellanox.com>

When macvlan netdevice is used for RoCE, consider the tos->prio->tc
mapping as SL using its lower netdevice.
1. If lower netdevice is VLAN netdevice, consider such VLAN netdevice
and it's parent netdevice for mapping
2. If lower netdevice is not a VLAN netdevice, consider tc mapping
directly from such lower netdevice

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
Changelog:
v0->v1: https://lore.kernel.org/linux-rdma/20191002121959.17444-1-leon@kernel.org
 - Protect call to netdev_walk_all_lower_dev_rcu with rcu
----
 drivers/infiniband/core/cma.c | 61 +++++++++++++++++++++++++++++------
 1 file changed, 52 insertions(+), 9 deletions(-)

--
2.20.1

Comments

Jason Gunthorpe Oct. 22, 2019, 7:02 p.m. UTC | #1
On Tue, Oct 15, 2019 at 10:20:58AM +0300, Leon Romanovsky wrote:
> From: Parav Pandit <parav@mellanox.com>
> 
> When macvlan netdevice is used for RoCE, consider the tos->prio->tc
> mapping as SL using its lower netdevice.
> 1. If lower netdevice is VLAN netdevice, consider such VLAN netdevice
> and it's parent netdevice for mapping
> 2. If lower netdevice is not a VLAN netdevice, consider tc mapping
> directly from such lower netdevice
> 
> Signed-off-by: Parav Pandit <parav@mellanox.com>
> Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> ---
> Changelog:
> v0->v1: https://lore.kernel.org/linux-rdma/20191002121959.17444-1-leon@kernel.org
>  - Protect call to netdev_walk_all_lower_dev_rcu with rcu
> ----
>  drivers/infiniband/core/cma.c | 61 +++++++++++++++++++++++++++++------
>  1 file changed, 52 insertions(+), 9 deletions(-)

Applied to for-next, thanks

Jason

Patch
diff mbox series

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 0e3cf3461999..c8566a423719 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2827,22 +2827,65 @@  static int cma_resolve_iw_route(struct rdma_id_private *id_priv)
 	return 0;
 }

-static int iboe_tos_to_sl(struct net_device *ndev, int tos)
+static int get_vlan_ndev_tc(struct net_device *vlan_ndev, int prio)
 {
-	int prio;
 	struct net_device *dev;

-	prio = rt_tos2priority(tos);
-	dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev;
+	dev = vlan_dev_real_dev(vlan_ndev);
 	if (dev->num_tc)
 		return netdev_get_prio_tc_map(dev, prio);

-#if IS_ENABLED(CONFIG_VLAN_8021Q)
+	return (vlan_dev_get_egress_qos_mask(vlan_ndev, prio) &
+		VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+}
+
+struct iboe_prio_tc_map {
+	int input_prio;
+	int output_tc;
+	bool found;
+};
+
+static int get_lower_vlan_dev_tc(struct net_device *dev, void *data)
+{
+	struct iboe_prio_tc_map *map = data;
+
+	if (is_vlan_dev(dev))
+		map->output_tc = get_vlan_ndev_tc(dev, map->input_prio);
+	else if (dev->num_tc)
+		map->output_tc = netdev_get_prio_tc_map(dev, map->input_prio);
+	else
+		map->output_tc = 0;
+	/* We are interested only in first level VLAN device, so always
+	 * return 1 to stop iterating over next level devices.
+	 */
+	map->found = true;
+	return 1;
+}
+
+static int iboe_tos_to_sl(struct net_device *ndev, int tos)
+{
+	struct iboe_prio_tc_map prio_tc_map = {};
+	int prio = rt_tos2priority(tos);
+
+	/* If VLAN device, get it directly from the VLAN netdev */
 	if (is_vlan_dev(ndev))
-		return (vlan_dev_get_egress_qos_mask(ndev, prio) &
-			VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
-#endif
-	return 0;
+		return get_vlan_ndev_tc(ndev, prio);
+
+	prio_tc_map.input_prio = prio;
+	rcu_read_lock();
+	netdev_walk_all_lower_dev_rcu(ndev,
+				      get_lower_vlan_dev_tc,
+				      &prio_tc_map);
+	rcu_read_unlock();
+	/* If map is found from lower device, use it; Otherwise
+	 * continue with the current netdevice to get priority to tc map.
+	 */
+	if (prio_tc_map.found)
+		return prio_tc_map.output_tc;
+	else if (ndev->num_tc)
+		return netdev_get_prio_tc_map(ndev, prio);
+	else
+		return 0;
 }

 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)