diff mbox series

[for-next,v2] RDMA/rxe: Cleanup rxe_init_packet()

Message ID 20220727163651.6967-1-rpearsonhpe@gmail.com (mailing list archive)
State Changes Requested
Delegated to: Jason Gunthorpe
Headers show
Series [for-next,v2] RDMA/rxe: Cleanup rxe_init_packet() | expand

Commit Message

Bob Pearson July 27, 2022, 4:36 p.m. UTC
Currently rxe_init_packet() recomputes ndev from the address
vector but struct rxe_dev already holds a reference to ndev.

Cleanup rxe_init_packet to use the value of ndev in rxe and drop
an unneeded parameter paylen since it is already carried in the
packet info struct.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
v2
  Per Haris Iqbal remove redundant 'ack->paylen = paylen'

 drivers/infiniband/sw/rxe/rxe_loc.h  |  2 +-
 drivers/infiniband/sw/rxe/rxe_net.c  | 49 +++++++++++-----------------
 drivers/infiniband/sw/rxe/rxe_req.c  |  2 +-
 drivers/infiniband/sw/rxe/rxe_resp.c |  4 +--
 4 files changed, 23 insertions(+), 34 deletions(-)

Comments

Jason Gunthorpe July 28, 2022, 4:08 p.m. UTC | #1
On Wed, Jul 27, 2022 at 11:36:52AM -0500, Bob Pearson wrote:
>  	unsigned int hdr_len;
>  	struct sk_buff *skb = NULL;
> -	struct net_device *ndev;
> -	const struct ib_gid_attr *attr;
> +	struct net_device *ndev = rxe->ndev;
>  	const int port_num = 1;
> -
> -	attr = rdma_get_gid_attr(&rxe->ib_dev, port_num, av->grh.sgid_index);
> -	if (IS_ERR(attr))
> -		return NULL;

An ib_device can have many netdevs associated with the gid indexes, eg
from VLANs or LAG. The core code creates these things

I think it is nonsense for rxe to work like this, and perhaps it
doesn't work at all, but until rxe blocks creation of these other gid
indexes I'm not sure it makes sense to delete this code..

Jason
Bob Pearson July 28, 2022, 6:04 p.m. UTC | #2
On 7/28/22 11:08, Jason Gunthorpe wrote:
> On Wed, Jul 27, 2022 at 11:36:52AM -0500, Bob Pearson wrote:
>>  	unsigned int hdr_len;
>>  	struct sk_buff *skb = NULL;
>> -	struct net_device *ndev;
>> -	const struct ib_gid_attr *attr;
>> +	struct net_device *ndev = rxe->ndev;
>>  	const int port_num = 1;
>> -
>> -	attr = rdma_get_gid_attr(&rxe->ib_dev, port_num, av->grh.sgid_index);
>> -	if (IS_ERR(attr))
>> -		return NULL;
> 
> An ib_device can have many netdevs associated with the gid indexes, eg
> from VLANs or LAG. The core code creates these things
> 
> I think it is nonsense for rxe to work like this, and perhaps it
> doesn't work at all, but until rxe blocks creation of these other gid
> indexes I'm not sure it makes sense to delete this code..
> 
> Jason


Somehow I had the vague impression that rxe didn't support vlans but
I just looked at the following commit

commit fd49ddaf7e266b5892d659eb99d9f77841e5b4c0

Author: Mohammad Heib <goody698@gmail.com>

Date:   Tue Aug 11 18:04:15 2020 +0300



    RDMA/rxe: prevent rxe creation on top of vlan interface

    

    Creating rxe device on top of vlan interface will create a non-functional

    device that has an empty gids table and can't be used for rdma cm

    communication.

    

    This is caused by the logic in

    enum_all_gids_of_dev_cb()/is_eth_port_of_netdev(), which only considers

    networks connected to "upper devices" of the configured network device,

    resulting in an empty set of gids for a vlan interface, and attempts to

    connect via this rdma device fail in cm_init_av_for_response because no

    gids can be resolved.

    

    Apparently, this behavior was implemented to fit the HW-RoCE devices that

    create RoCE device per port, therefore RXE must behave the same like

    HW-RoCE devices and create rxe device per real device only.

    

    In order to communicate via a vlan interface, the user must use the gid

    index of the vlan address instead of creating rxe over vlan.

    

    Link: https://lore.kernel.org/r/20200811150415.3693-1-goody698@gmail.com

    Signed-off-by: Mohammad Heib <goody698@gmail.com>

    Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>



which jibes with what you are saying. The immediate impact of this is that
rxe->ndev should probably not be used unless you know you want the physical device.

Bob
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index f9af30f582c6..7f98d296bc0d 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -93,7 +93,7 @@  void rxe_mw_cleanup(struct rxe_pool_elem *elem);
 
 /* rxe_net.c */
 struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
-				int paylen, struct rxe_pkt_info *pkt);
+				struct rxe_pkt_info *pkt);
 int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt,
 		struct sk_buff *skb);
 int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index c53f4529f098..4a091f236dde 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -443,18 +443,22 @@  int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
 	return err;
 }
 
+/**
+ * rxe_init_packet - allocate and initialize a new skb
+ * @rxe: rxe device
+ * @av: remote address vector
+ * @pkt: packet info
+ *
+ * Returns: an skb on success else NULL
+ */
 struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
-				int paylen, struct rxe_pkt_info *pkt)
+				struct rxe_pkt_info *pkt)
 {
 	unsigned int hdr_len;
 	struct sk_buff *skb = NULL;
-	struct net_device *ndev;
-	const struct ib_gid_attr *attr;
+	struct net_device *ndev = rxe->ndev;
 	const int port_num = 1;
-
-	attr = rdma_get_gid_attr(&rxe->ib_dev, port_num, av->grh.sgid_index);
-	if (IS_ERR(attr))
-		return NULL;
+	int skb_size;
 
 	if (av->network_type == RXE_NETWORK_TYPE_IPV4)
 		hdr_len = ETH_HLEN + sizeof(struct udphdr) +
@@ -463,26 +467,13 @@  struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
 		hdr_len = ETH_HLEN + sizeof(struct udphdr) +
 			sizeof(struct ipv6hdr);
 
-	rcu_read_lock();
-	ndev = rdma_read_gid_attr_ndev_rcu(attr);
-	if (IS_ERR(ndev)) {
-		rcu_read_unlock();
-		goto out;
-	}
-	skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(ndev),
-			GFP_ATOMIC);
-
-	if (unlikely(!skb)) {
-		rcu_read_unlock();
-		goto out;
-	}
-
-	skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(ndev));
-
-	/* FIXME: hold reference to this netdev until life of this skb. */
-	skb->dev	= ndev;
-	rcu_read_unlock();
+	skb_size = LL_RESERVED_SPACE(ndev) + hdr_len + pkt->paylen;
+	skb = alloc_skb(skb_size, GFP_ATOMIC);
+	if (unlikely(!skb))
+		return NULL;
 
+	skb_reserve(skb, LL_RESERVED_SPACE(ndev) + hdr_len);
+	skb->dev = ndev;
 	if (av->network_type == RXE_NETWORK_TYPE_IPV4)
 		skb->protocol = htons(ETH_P_IP);
 	else
@@ -490,11 +481,9 @@  struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
 
 	pkt->rxe	= rxe;
 	pkt->port_num	= port_num;
-	pkt->hdr	= skb_put(skb, paylen);
-	pkt->mask	|= RXE_GRH_MASK;
+	pkt->hdr	= skb_put(skb, pkt->paylen);
+	pkt->mask      |= RXE_GRH_MASK;
 
-out:
-	rdma_put_gid_attr(attr);
 	return skb;
 }
 
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 49e8f54db6f5..e72db960d7ea 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -397,7 +397,7 @@  static struct sk_buff *init_req_packet(struct rxe_qp *qp,
 	pkt->paylen = paylen;
 
 	/* init skb */
-	skb = rxe_init_packet(rxe, av, paylen, pkt);
+	skb = rxe_init_packet(rxe, av, pkt);
 	if (unlikely(!skb))
 		return NULL;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index b36ec5c4d5e0..19d0537278f6 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -670,15 +670,15 @@  static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
 	 */
 	pad = (-payload) & 0x3;
 	paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
+	ack->paylen = paylen;
 
-	skb = rxe_init_packet(rxe, &qp->pri_av, paylen, ack);
+	skb = rxe_init_packet(rxe, &qp->pri_av, ack);
 	if (!skb)
 		return NULL;
 
 	ack->qp = qp;
 	ack->opcode = opcode;
 	ack->mask = rxe_opcode[opcode].mask;
-	ack->paylen = paylen;
 	ack->psn = psn;
 
 	bth_init(ack, opcode, 0, 0, pad, IB_DEFAULT_PKEY_FULL,