Message ID | 20200921200356.8627-13-rpearson@hpe.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | rdma_rxe: API extensions | expand |
On Tue, Sep 22, 2020 at 4:04 AM Bob Pearson <rpearsonhpe@gmail.com> wrote: > > This patch does the following > - Fix a bug in rxe_rcv. > The current code calls rxe_match_dgid which checks to see if the > destination ip address (dgid) matches one of the addresses in > the gid table. This is ok for unicast adfdresses but not mcast > addresses. Because of this all mcast packets were previously > dropped. 338 /* rxe_rcv is called from the interface driver */ 339 void rxe_rcv(struct sk_buff *skb) 340 { ... 365 err = hdr_check(pkt); <---In this function multicast packets are checked and taken as error. 366 if (unlikely(err)) 367 goto drop; ... Zhu Yanjun > - Fix a bug in rxe_rcv_mcast_pkt. > The current code is just wrong. It assumed that it could pass > the same skb to rxe_rcv_pkt changing the qp pointer as it went > when multiple QPs were attached to the same mcast address. In > fact each QP needs a separate clone of the skb which it will > delete later. > > Signed-off-by: Bob Pearson <rpearson@hpe.com> > --- > drivers/infiniband/sw/rxe/rxe_recv.c | 60 +++++++++++++++++----------- > 1 file changed, 36 insertions(+), 24 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c > index 50411b0069ba..bc86ebbd2c8c 100644 > --- a/drivers/infiniband/sw/rxe/rxe_recv.c > +++ b/drivers/infiniband/sw/rxe/rxe_recv.c > @@ -233,6 +233,8 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) > struct rxe_mc_elem *mce; > struct rxe_qp *qp; > union ib_gid dgid; > + struct sk_buff *per_qp_skb; > + struct rxe_pkt_info *per_qp_pkt; > int err; > > if (skb->protocol == htons(ETH_P_IP)) > @@ -261,42 +263,37 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) > if (err) > continue; > > - /* if *not* the last qp in the list > - * increase the users of the skb then post to the next qp > + /* for all but the last qp create a new clone of the > + * skb and pass to the qp. > + * This effectively reverts an earlier change > + * which did not work. The pkt struct is contained > + * in the skb so each time you changed pkt you also > + * changed all the earlier pkts as well. Caused a mess. > */ > if (mce->qp_list.next != &mcg->qp_list) > - skb_get(skb); > + per_qp_skb = skb_clone(skb, GFP_ATOMIC); > + else > + per_qp_skb = skb; > > - pkt->qp = qp; > + per_qp_pkt = SKB_TO_PKT(per_qp_skb); > + per_qp_pkt->qp = qp; > rxe_add_ref(qp); > - rxe_rcv_pkt(pkt, skb); > + rxe_rcv_pkt(per_qp_pkt, per_qp_skb); > } > > spin_unlock_bh(&mcg->mcg_lock); > - > rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */ > + return; > > err1: > kfree_skb(skb); > + return; > } > > -static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) > +static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb, > + union ib_gid *pdgid) > { > - struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); > const struct ib_gid_attr *gid_attr; > - union ib_gid dgid; > - union ib_gid *pdgid; > - > - if (pkt->mask & RXE_LOOPBACK_MASK) > - return 0; > - > - if (skb->protocol == htons(ETH_P_IP)) { > - ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, > - (struct in6_addr *)&dgid); > - pdgid = &dgid; > - } else { > - pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr; > - } > > gid_attr = rdma_find_gid_by_port(&rxe->ib_dev, pdgid, > IB_GID_TYPE_ROCE_UDP_ENCAP, > @@ -314,17 +311,32 @@ void rxe_rcv(struct sk_buff *skb) > int err; > struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); > struct rxe_dev *rxe = pkt->rxe; > + union ib_gid dgid; > + union ib_gid *pdgid; > __be32 *icrcp; > u32 calc_icrc, pack_icrc; > + int is_mc; > > pkt->offset = 0; > > if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES)) > goto drop; > > - if (rxe_match_dgid(rxe, skb) < 0) { > - pr_warn_ratelimited("failed matching dgid\n"); > - goto drop; > + if (skb->protocol == htons(ETH_P_IP)) { > + ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, > + (struct in6_addr *)&dgid); > + pdgid = &dgid; > + } else { > + pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr; > + } > + > + is_mc = rdma_is_multicast_addr((struct in6_addr *)pdgid); > + > + if (!(pkt->mask & RXE_LOOPBACK_MASK) && !is_mc) { > + if (rxe_match_dgid(rxe, skb, pdgid) < 0) { > + pr_warn_ratelimited("failed matching dgid\n"); > + goto drop; > + } > } > > pkt->opcode = bth_opcode(pkt); > -- > 2.25.1 >
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 50411b0069ba..bc86ebbd2c8c 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -233,6 +233,8 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) struct rxe_mc_elem *mce; struct rxe_qp *qp; union ib_gid dgid; + struct sk_buff *per_qp_skb; + struct rxe_pkt_info *per_qp_pkt; int err; if (skb->protocol == htons(ETH_P_IP)) @@ -261,42 +263,37 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) if (err) continue; - /* if *not* the last qp in the list - * increase the users of the skb then post to the next qp + /* for all but the last qp create a new clone of the + * skb and pass to the qp. + * This effectively reverts an earlier change + * which did not work. The pkt struct is contained + * in the skb so each time you changed pkt you also + * changed all the earlier pkts as well. Caused a mess. */ if (mce->qp_list.next != &mcg->qp_list) - skb_get(skb); + per_qp_skb = skb_clone(skb, GFP_ATOMIC); + else + per_qp_skb = skb; - pkt->qp = qp; + per_qp_pkt = SKB_TO_PKT(per_qp_skb); + per_qp_pkt->qp = qp; rxe_add_ref(qp); - rxe_rcv_pkt(pkt, skb); + rxe_rcv_pkt(per_qp_pkt, per_qp_skb); } spin_unlock_bh(&mcg->mcg_lock); - rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */ + return; err1: kfree_skb(skb); + return; } -static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) +static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb, + union ib_gid *pdgid) { - struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); const struct ib_gid_attr *gid_attr; - union ib_gid dgid; - union ib_gid *pdgid; - - if (pkt->mask & RXE_LOOPBACK_MASK) - return 0; - - if (skb->protocol == htons(ETH_P_IP)) { - ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, - (struct in6_addr *)&dgid); - pdgid = &dgid; - } else { - pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr; - } gid_attr = rdma_find_gid_by_port(&rxe->ib_dev, pdgid, IB_GID_TYPE_ROCE_UDP_ENCAP, @@ -314,17 +311,32 @@ void rxe_rcv(struct sk_buff *skb) int err; struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); struct rxe_dev *rxe = pkt->rxe; + union ib_gid dgid; + union ib_gid *pdgid; __be32 *icrcp; u32 calc_icrc, pack_icrc; + int is_mc; pkt->offset = 0; if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES)) goto drop; - if (rxe_match_dgid(rxe, skb) < 0) { - pr_warn_ratelimited("failed matching dgid\n"); - goto drop; + if (skb->protocol == htons(ETH_P_IP)) { + ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, + (struct in6_addr *)&dgid); + pdgid = &dgid; + } else { + pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr; + } + + is_mc = rdma_is_multicast_addr((struct in6_addr *)pdgid); + + if (!(pkt->mask & RXE_LOOPBACK_MASK) && !is_mc) { + if (rxe_match_dgid(rxe, skb, pdgid) < 0) { + pr_warn_ratelimited("failed matching dgid\n"); + goto drop; + } } pkt->opcode = bth_opcode(pkt);
This patch does the following - Fix a bug in rxe_rcv. The current code calls rxe_match_dgid which checks to see if the destination ip address (dgid) matches one of the addresses in the gid table. This is ok for unicast adfdresses but not mcast addresses. Because of this all mcast packets were previously dropped. - Fix a bug in rxe_rcv_mcast_pkt. The current code is just wrong. It assumed that it could pass the same skb to rxe_rcv_pkt changing the qp pointer as it went when multiple QPs were attached to the same mcast address. In fact each QP needs a separate clone of the skb which it will delete later. Signed-off-by: Bob Pearson <rpearson@hpe.com> --- drivers/infiniband/sw/rxe/rxe_recv.c | 60 +++++++++++++++++----------- 1 file changed, 36 insertions(+), 24 deletions(-)