Message ID | 20231205002613.10219-1-rpearsonhpe@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | RDMA/rxe: Make multicast work | expand |
Add David S. Miller and David Ahern. They are the maintainers in netdev and very familiar with mcast. Zhu Yanjun 在 2023/12/5 8:26, Bob Pearson 写道: > Currently the rdma_rxe driver does not receive mcast packets at all. > > Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister > the IP mcast address. This is required for mcast traffic to reach the > rxe driver when coming from an external source. > > Fixes: 8700e3e7c485 ("Soft RoCE driver") > Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> > --- > drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++----- > drivers/infiniband/sw/rxe/rxe_net.c | 2 +- > drivers/infiniband/sw/rxe/rxe_net.h | 1 + > drivers/infiniband/sw/rxe/rxe_verbs.h | 1 + > 4 files changed, 102 insertions(+), 21 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c > index 86cc2e18a7fd..54735d07cee5 100644 > --- a/drivers/infiniband/sw/rxe/rxe_mcast.c > +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c > @@ -19,38 +19,116 @@ > * mcast packets in the rxe receive path. > */ > > +#include <linux/igmp.h> > + > #include "rxe.h" > > -/** > - * rxe_mcast_add - add multicast address to rxe device > - * @rxe: rxe device object > - * @mgid: multicast address as a gid > - * > - * Returns 0 on success else an error > - */ > -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) > +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid) > { > + struct in6_addr *addr6 = (struct in6_addr *)mgid; > + struct sock *sk = recv_sockets.sk6->sk; > unsigned char ll_addr[ETH_ALEN]; > + int err; > + > + spin_lock_bh(&sk->sk_lock.slock); > + rtnl_lock(); > + err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6); > + rtnl_unlock(); > + spin_unlock_bh(&sk->sk_lock.slock); > + if (err && err != -EADDRINUSE) > + goto err_out; > > ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); > + err = dev_mc_add(rxe->ndev, ll_addr); > + if (err) > + goto err_drop; > + > + return 0; > > - return dev_mc_add(rxe->ndev, ll_addr); > +err_drop: > + spin_lock_bh(&sk->sk_lock.slock); > + rtnl_lock(); > + ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6); > + rtnl_unlock(); > + spin_unlock_bh(&sk->sk_lock.slock); > +err_out: > + return err; > } > > -/** > - * rxe_mcast_del - delete multicast address from rxe device > - * @rxe: rxe device object > - * @mgid: multicast address as a gid > - * > - * Returns 0 on success else an error > - */ > -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) > +static int rxe_mcast_add(struct rxe_mcg *mcg) > { > + struct rxe_dev *rxe = mcg->rxe; > + union ib_gid *mgid = &mcg->mgid; > unsigned char ll_addr[ETH_ALEN]; > + struct ip_mreqn imr = {}; > + int err; > + > + if (mcg->is_ipv6) > + return rxe_mcast_add6(rxe, mgid); > + > + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); > + imr.imr_ifindex = rxe->ndev->ifindex; > + rtnl_lock(); > + err = ip_mc_join_group(recv_sockets.sk4->sk, &imr); > + rtnl_unlock(); > + if (err && err != -EADDRINUSE) > + goto err_out; > + > + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); > + err = dev_mc_add(rxe->ndev, ll_addr); > + if (err) > + goto err_leave; > + > + return 0; > + > +err_leave: > + rtnl_lock(); > + ip_mc_leave_group(recv_sockets.sk4->sk, &imr); > + rtnl_unlock(); > +err_out: > + return err; > +} > + > +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid) > +{ > + struct sock *sk = recv_sockets.sk6->sk; > + unsigned char ll_addr[ETH_ALEN]; > + int err, err2; > > ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); > + err = dev_mc_del(rxe->ndev, ll_addr); > + > + spin_lock_bh(&sk->sk_lock.slock); > + rtnl_lock(); > + err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, > + (struct in6_addr *)mgid); > + rtnl_unlock(); > + spin_unlock_bh(&sk->sk_lock.slock); > + > + return err ?: err2; > +} > + > +static int rxe_mcast_del(struct rxe_mcg *mcg) > +{ > + struct rxe_dev *rxe = mcg->rxe; > + union ib_gid *mgid = &mcg->mgid; > + unsigned char ll_addr[ETH_ALEN]; > + struct ip_mreqn imr = {}; > + int err, err2; > + > + if (mcg->is_ipv6) > + return rxe_mcast_del6(rxe, mgid); > + > + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); > + imr.imr_ifindex = rxe->ndev->ifindex; > + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); > + err = dev_mc_del(rxe->ndev, ll_addr); > + > + rtnl_lock(); > + err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr); > + rtnl_unlock(); > > - return dev_mc_del(rxe->ndev, ll_addr); > + return err ?: err2; > } > > /** > @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, > { > kref_init(&mcg->ref_cnt); > memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); > + mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid); > INIT_LIST_HEAD(&mcg->qp_list); > mcg->rxe = rxe; > > @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > spin_unlock_bh(&rxe->mcg_lock); > > /* add mcast address outside of lock */ > - err = rxe_mcast_add(rxe, mgid); > + err = rxe_mcast_add(mcg); > if (!err) > return mcg; > > @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) > static void rxe_destroy_mcg(struct rxe_mcg *mcg) > { > /* delete mcast address outside of lock */ > - rxe_mcast_del(mcg->rxe, &mcg->mgid); > + rxe_mcast_del(mcg); > > spin_lock_bh(&mcg->rxe->mcg_lock); > __rxe_destroy_mcg(mcg); > diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c > index 58c3f3759bf0..b481f8da2002 100644 > --- a/drivers/infiniband/sw/rxe/rxe_net.c > +++ b/drivers/infiniband/sw/rxe/rxe_net.c > @@ -18,7 +18,7 @@ > #include "rxe_net.h" > #include "rxe_loc.h" > > -static struct rxe_recv_sockets recv_sockets; > +struct rxe_recv_sockets recv_sockets; > > static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, > struct net_device *ndev, > diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h > index 45d80d00f86b..89cee7d5340f 100644 > --- a/drivers/infiniband/sw/rxe/rxe_net.h > +++ b/drivers/infiniband/sw/rxe/rxe_net.h > @@ -15,6 +15,7 @@ struct rxe_recv_sockets { > struct socket *sk4; > struct socket *sk6; > }; > +extern struct rxe_recv_sockets recv_sockets; > > int rxe_net_add(const char *ibdev_name, struct net_device *ndev); > > diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h > index ccb9d19ffe8a..7be9e6232dd9 100644 > --- a/drivers/infiniband/sw/rxe/rxe_verbs.h > +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h > @@ -352,6 +352,7 @@ struct rxe_mcg { > atomic_t qp_num; > u32 qkey; > u16 pkey; > + bool is_ipv6; > }; > > struct rxe_mca {
在 2023/12/5 13:55, Zhu Yanjun 写道: > Add David S. Miller and David Ahern. > > They are the maintainers in netdev and very familiar with mcast. > > Zhu Yanjun > > 在 2023/12/5 8:26, Bob Pearson 写道: >> Currently the rdma_rxe driver does not receive mcast packets at all. >> >> Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister >> the IP mcast address. This is required for mcast traffic to reach the >> rxe driver when coming from an external source. >> >> Fixes: 8700e3e7c485 ("Soft RoCE driver") >> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> >> --- >> drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++----- >> drivers/infiniband/sw/rxe/rxe_net.c | 2 +- >> drivers/infiniband/sw/rxe/rxe_net.h | 1 + >> drivers/infiniband/sw/rxe/rxe_verbs.h | 1 + >> 4 files changed, 102 insertions(+), 21 deletions(-) >> >> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c >> b/drivers/infiniband/sw/rxe/rxe_mcast.c >> index 86cc2e18a7fd..54735d07cee5 100644 >> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c >> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c >> @@ -19,38 +19,116 @@ >> * mcast packets in the rxe receive path. >> */ >> +#include <linux/igmp.h> >> + >> #include "rxe.h" >> -/** >> - * rxe_mcast_add - add multicast address to rxe device >> - * @rxe: rxe device object >> - * @mgid: multicast address as a gid >> - * >> - * Returns 0 on success else an error >> - */ >> -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) >> +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid) >> { >> + struct in6_addr *addr6 = (struct in6_addr *)mgid; >> + struct sock *sk = recv_sockets.sk6->sk; >> unsigned char ll_addr[ETH_ALEN]; >> + int err; >> + >> + spin_lock_bh(&sk->sk_lock.slock); >> + rtnl_lock(); >> + err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6); Normally sk_lock is used. Not sure if spin_lock_bh is correct or not. Please Jason or experts from netdev comment on this. Thanks, Zhu Yanjun >> + rtnl_unlock(); >> + spin_unlock_bh(&sk->sk_lock.slock); >> + if (err && err != -EADDRINUSE) >> + goto err_out; >> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); >> + err = dev_mc_add(rxe->ndev, ll_addr); >> + if (err) >> + goto err_drop; >> + >> + return 0; >> - return dev_mc_add(rxe->ndev, ll_addr); >> +err_drop: >> + spin_lock_bh(&sk->sk_lock.slock); >> + rtnl_lock(); >> + ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6); >> + rtnl_unlock(); >> + spin_unlock_bh(&sk->sk_lock.slock); >> +err_out: >> + return err; >> } >> -/** >> - * rxe_mcast_del - delete multicast address from rxe device >> - * @rxe: rxe device object >> - * @mgid: multicast address as a gid >> - * >> - * Returns 0 on success else an error >> - */ >> -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) >> +static int rxe_mcast_add(struct rxe_mcg *mcg) >> { >> + struct rxe_dev *rxe = mcg->rxe; >> + union ib_gid *mgid = &mcg->mgid; >> unsigned char ll_addr[ETH_ALEN]; >> + struct ip_mreqn imr = {}; >> + int err; >> + >> + if (mcg->is_ipv6) >> + return rxe_mcast_add6(rxe, mgid); >> + >> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); >> + imr.imr_ifindex = rxe->ndev->ifindex; >> + rtnl_lock(); >> + err = ip_mc_join_group(recv_sockets.sk4->sk, &imr); >> + rtnl_unlock(); >> + if (err && err != -EADDRINUSE) >> + goto err_out; >> + >> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); >> + err = dev_mc_add(rxe->ndev, ll_addr); >> + if (err) >> + goto err_leave; >> + >> + return 0; >> + >> +err_leave: >> + rtnl_lock(); >> + ip_mc_leave_group(recv_sockets.sk4->sk, &imr); >> + rtnl_unlock(); >> +err_out: >> + return err; >> +} >> + >> +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid) >> +{ >> + struct sock *sk = recv_sockets.sk6->sk; >> + unsigned char ll_addr[ETH_ALEN]; >> + int err, err2; >> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); >> + err = dev_mc_del(rxe->ndev, ll_addr); >> + >> + spin_lock_bh(&sk->sk_lock.slock); >> + rtnl_lock(); >> + err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, >> + (struct in6_addr *)mgid); >> + rtnl_unlock(); >> + spin_unlock_bh(&sk->sk_lock.slock); >> + >> + return err ?: err2; >> +} >> + >> +static int rxe_mcast_del(struct rxe_mcg *mcg) >> +{ >> + struct rxe_dev *rxe = mcg->rxe; >> + union ib_gid *mgid = &mcg->mgid; >> + unsigned char ll_addr[ETH_ALEN]; >> + struct ip_mreqn imr = {}; >> + int err, err2; >> + >> + if (mcg->is_ipv6) >> + return rxe_mcast_del6(rxe, mgid); >> + >> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); >> + imr.imr_ifindex = rxe->ndev->ifindex; >> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); >> + err = dev_mc_del(rxe->ndev, ll_addr); >> + >> + rtnl_lock(); >> + err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr); >> + rtnl_unlock(); >> - return dev_mc_del(rxe->ndev, ll_addr); >> + return err ?: err2; >> } >> /** >> @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe, >> union ib_gid *mgid, >> { >> kref_init(&mcg->ref_cnt); >> memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); >> + mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid); >> INIT_LIST_HEAD(&mcg->qp_list); >> mcg->rxe = rxe; >> @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct >> rxe_dev *rxe, union ib_gid *mgid) >> spin_unlock_bh(&rxe->mcg_lock); >> /* add mcast address outside of lock */ >> - err = rxe_mcast_add(rxe, mgid); >> + err = rxe_mcast_add(mcg); >> if (!err) >> return mcg; >> @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) >> static void rxe_destroy_mcg(struct rxe_mcg *mcg) >> { >> /* delete mcast address outside of lock */ >> - rxe_mcast_del(mcg->rxe, &mcg->mgid); >> + rxe_mcast_del(mcg); >> spin_lock_bh(&mcg->rxe->mcg_lock); >> __rxe_destroy_mcg(mcg); >> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c >> b/drivers/infiniband/sw/rxe/rxe_net.c >> index 58c3f3759bf0..b481f8da2002 100644 >> --- a/drivers/infiniband/sw/rxe/rxe_net.c >> +++ b/drivers/infiniband/sw/rxe/rxe_net.c >> @@ -18,7 +18,7 @@ >> #include "rxe_net.h" >> #include "rxe_loc.h" >> -static struct rxe_recv_sockets recv_sockets; >> +struct rxe_recv_sockets recv_sockets; >> static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, >> struct net_device *ndev, >> diff --git a/drivers/infiniband/sw/rxe/rxe_net.h >> b/drivers/infiniband/sw/rxe/rxe_net.h >> index 45d80d00f86b..89cee7d5340f 100644 >> --- a/drivers/infiniband/sw/rxe/rxe_net.h >> +++ b/drivers/infiniband/sw/rxe/rxe_net.h >> @@ -15,6 +15,7 @@ struct rxe_recv_sockets { >> struct socket *sk4; >> struct socket *sk6; >> }; >> +extern struct rxe_recv_sockets recv_sockets; >> int rxe_net_add(const char *ibdev_name, struct net_device *ndev); >> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h >> b/drivers/infiniband/sw/rxe/rxe_verbs.h >> index ccb9d19ffe8a..7be9e6232dd9 100644 >> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h >> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h >> @@ -352,6 +352,7 @@ struct rxe_mcg { >> atomic_t qp_num; >> u32 qkey; >> u16 pkey; >> + bool is_ipv6; >> }; >> struct rxe_mca {
On Tue, Dec 5, 2023 at 6:30 PM Zhu Yanjun <yanjun.zhu@linux.dev> wrote: > > > 在 2023/12/5 13:55, Zhu Yanjun 写道: > > Add David S. Miller and David Ahern. > > > > They are the maintainers in netdev and very familiar with mcast. > > > > Zhu Yanjun > > > > 在 2023/12/5 8:26, Bob Pearson 写道: > >> Currently the rdma_rxe driver does not receive mcast packets at all. > >> > >> Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister > >> the IP mcast address. This is required for mcast traffic to reach the > >> rxe driver when coming from an external source. > >> > >> Fixes: 8700e3e7c485 ("Soft RoCE driver") > >> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> > >> --- > >> drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++----- > >> drivers/infiniband/sw/rxe/rxe_net.c | 2 +- > >> drivers/infiniband/sw/rxe/rxe_net.h | 1 + > >> drivers/infiniband/sw/rxe/rxe_verbs.h | 1 + > >> 4 files changed, 102 insertions(+), 21 deletions(-) > >> > >> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c > >> b/drivers/infiniband/sw/rxe/rxe_mcast.c > >> index 86cc2e18a7fd..54735d07cee5 100644 > >> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c > >> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c > >> @@ -19,38 +19,116 @@ > >> * mcast packets in the rxe receive path. > >> */ > >> +#include <linux/igmp.h> > >> + > >> #include "rxe.h" > >> -/** > >> - * rxe_mcast_add - add multicast address to rxe device > >> - * @rxe: rxe device object > >> - * @mgid: multicast address as a gid > >> - * > >> - * Returns 0 on success else an error > >> - */ > >> -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) > >> +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid) > >> { > >> + struct in6_addr *addr6 = (struct in6_addr *)mgid; > >> + struct sock *sk = recv_sockets.sk6->sk; > >> unsigned char ll_addr[ETH_ALEN]; > >> + int err; > >> + > >> + spin_lock_bh(&sk->sk_lock.slock); > >> + rtnl_lock(); > >> + err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6); > > > Normally sk_lock is used. Not sure if spin_lock_bh is correct or not. ./net/ipv6/addrconf.c-2915- lock_sock(sk); ./net/ipv6/addrconf.c-2916- if (join) ./net/ipv6/addrconf.c:2917: ret = ipv6_sock_mc_join(sk, ifindex, addr); ./net/ipv6/addrconf.c-2918- else ./net/ipv6/addrconf.c-2919- ret = ipv6_sock_mc_drop(sk, ifindex, addr); ./net/ipv6/addrconf.c-2920- release_sock(sk); Should be lock_sock? > > Please Jason or experts from netdev comment on this. > > Thanks, > > Zhu Yanjun > > > >> + rtnl_unlock(); > >> + spin_unlock_bh(&sk->sk_lock.slock); > >> + if (err && err != -EADDRINUSE) > >> + goto err_out; > >> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); > >> + err = dev_mc_add(rxe->ndev, ll_addr); > >> + if (err) > >> + goto err_drop; > >> + > >> + return 0; > >> - return dev_mc_add(rxe->ndev, ll_addr); > >> +err_drop: > >> + spin_lock_bh(&sk->sk_lock.slock); > >> + rtnl_lock(); > >> + ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6); > >> + rtnl_unlock(); > >> + spin_unlock_bh(&sk->sk_lock.slock); > >> +err_out: > >> + return err; > >> } > >> -/** > >> - * rxe_mcast_del - delete multicast address from rxe device > >> - * @rxe: rxe device object > >> - * @mgid: multicast address as a gid > >> - * > >> - * Returns 0 on success else an error > >> - */ > >> -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) > >> +static int rxe_mcast_add(struct rxe_mcg *mcg) > >> { > >> + struct rxe_dev *rxe = mcg->rxe; > >> + union ib_gid *mgid = &mcg->mgid; > >> unsigned char ll_addr[ETH_ALEN]; > >> + struct ip_mreqn imr = {}; > >> + int err; > >> + > >> + if (mcg->is_ipv6) > >> + return rxe_mcast_add6(rxe, mgid); > >> + > >> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); > >> + imr.imr_ifindex = rxe->ndev->ifindex; > >> + rtnl_lock(); > >> + err = ip_mc_join_group(recv_sockets.sk4->sk, &imr); > >> + rtnl_unlock(); > >> + if (err && err != -EADDRINUSE) > >> + goto err_out; > >> + > >> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); > >> + err = dev_mc_add(rxe->ndev, ll_addr); > >> + if (err) > >> + goto err_leave; > >> + > >> + return 0; > >> + > >> +err_leave: > >> + rtnl_lock(); > >> + ip_mc_leave_group(recv_sockets.sk4->sk, &imr); > >> + rtnl_unlock(); > >> +err_out: > >> + return err; > >> +} > >> + > >> +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid) > >> +{ > >> + struct sock *sk = recv_sockets.sk6->sk; > >> + unsigned char ll_addr[ETH_ALEN]; > >> + int err, err2; > >> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); > >> + err = dev_mc_del(rxe->ndev, ll_addr); > >> + > >> + spin_lock_bh(&sk->sk_lock.slock); > >> + rtnl_lock(); > >> + err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, > >> + (struct in6_addr *)mgid); > >> + rtnl_unlock(); > >> + spin_unlock_bh(&sk->sk_lock.slock); > >> + > >> + return err ?: err2; > >> +} > >> + > >> +static int rxe_mcast_del(struct rxe_mcg *mcg) > >> +{ > >> + struct rxe_dev *rxe = mcg->rxe; > >> + union ib_gid *mgid = &mcg->mgid; > >> + unsigned char ll_addr[ETH_ALEN]; > >> + struct ip_mreqn imr = {}; > >> + int err, err2; > >> + > >> + if (mcg->is_ipv6) > >> + return rxe_mcast_del6(rxe, mgid); > >> + > >> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); > >> + imr.imr_ifindex = rxe->ndev->ifindex; > >> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); > >> + err = dev_mc_del(rxe->ndev, ll_addr); > >> + > >> + rtnl_lock(); > >> + err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr); > >> + rtnl_unlock(); > >> - return dev_mc_del(rxe->ndev, ll_addr); > >> + return err ?: err2; > >> } > >> /** > >> @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe, > >> union ib_gid *mgid, > >> { > >> kref_init(&mcg->ref_cnt); > >> memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); > >> + mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid); > >> INIT_LIST_HEAD(&mcg->qp_list); > >> mcg->rxe = rxe; > >> @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct > >> rxe_dev *rxe, union ib_gid *mgid) > >> spin_unlock_bh(&rxe->mcg_lock); > >> /* add mcast address outside of lock */ > >> - err = rxe_mcast_add(rxe, mgid); > >> + err = rxe_mcast_add(mcg); > >> if (!err) > >> return mcg; > >> @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) > >> static void rxe_destroy_mcg(struct rxe_mcg *mcg) > >> { > >> /* delete mcast address outside of lock */ > >> - rxe_mcast_del(mcg->rxe, &mcg->mgid); > >> + rxe_mcast_del(mcg); > >> spin_lock_bh(&mcg->rxe->mcg_lock); > >> __rxe_destroy_mcg(mcg); > >> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c > >> b/drivers/infiniband/sw/rxe/rxe_net.c > >> index 58c3f3759bf0..b481f8da2002 100644 > >> --- a/drivers/infiniband/sw/rxe/rxe_net.c > >> +++ b/drivers/infiniband/sw/rxe/rxe_net.c > >> @@ -18,7 +18,7 @@ > >> #include "rxe_net.h" > >> #include "rxe_loc.h" > >> -static struct rxe_recv_sockets recv_sockets; > >> +struct rxe_recv_sockets recv_sockets; > >> static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, > >> struct net_device *ndev, > >> diff --git a/drivers/infiniband/sw/rxe/rxe_net.h > >> b/drivers/infiniband/sw/rxe/rxe_net.h > >> index 45d80d00f86b..89cee7d5340f 100644 > >> --- a/drivers/infiniband/sw/rxe/rxe_net.h > >> +++ b/drivers/infiniband/sw/rxe/rxe_net.h > >> @@ -15,6 +15,7 @@ struct rxe_recv_sockets { > >> struct socket *sk4; > >> struct socket *sk6; > >> }; > >> +extern struct rxe_recv_sockets recv_sockets; > >> int rxe_net_add(const char *ibdev_name, struct net_device *ndev); > >> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h > >> b/drivers/infiniband/sw/rxe/rxe_verbs.h > >> index ccb9d19ffe8a..7be9e6232dd9 100644 > >> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h > >> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h > >> @@ -352,6 +352,7 @@ struct rxe_mcg { > >> atomic_t qp_num; > >> u32 qkey; > >> u16 pkey; > >> + bool is_ipv6; > >> }; > >> struct rxe_mca { >
On 12/6/23 19:47, Rain River wrote: > On Tue, Dec 5, 2023 at 6:30 PM Zhu Yanjun <yanjun.zhu@linux.dev> wrote: >> >> >> 在 2023/12/5 13:55, Zhu Yanjun 写道: >>> Add David S. Miller and David Ahern. >>> >>> They are the maintainers in netdev and very familiar with mcast. >>> >>> Zhu Yanjun >>> >>> 在 2023/12/5 8:26, Bob Pearson 写道: >>>> Currently the rdma_rxe driver does not receive mcast packets at all. >>>> >>>> Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister >>>> the IP mcast address. This is required for mcast traffic to reach the >>>> rxe driver when coming from an external source. >>>> >>>> Fixes: 8700e3e7c485 ("Soft RoCE driver") >>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> >>>> --- >>>> drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++----- >>>> drivers/infiniband/sw/rxe/rxe_net.c | 2 +- >>>> drivers/infiniband/sw/rxe/rxe_net.h | 1 + >>>> drivers/infiniband/sw/rxe/rxe_verbs.h | 1 + >>>> 4 files changed, 102 insertions(+), 21 deletions(-) >>>> >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c >>>> b/drivers/infiniband/sw/rxe/rxe_mcast.c >>>> index 86cc2e18a7fd..54735d07cee5 100644 >>>> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c >>>> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c >>>> @@ -19,38 +19,116 @@ >>>> * mcast packets in the rxe receive path. >>>> */ >>>> +#include <linux/igmp.h> >>>> + >>>> #include "rxe.h" >>>> -/** >>>> - * rxe_mcast_add - add multicast address to rxe device >>>> - * @rxe: rxe device object >>>> - * @mgid: multicast address as a gid >>>> - * >>>> - * Returns 0 on success else an error >>>> - */ >>>> -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) >>>> +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid) >>>> { >>>> + struct in6_addr *addr6 = (struct in6_addr *)mgid; >>>> + struct sock *sk = recv_sockets.sk6->sk; >>>> unsigned char ll_addr[ETH_ALEN]; >>>> + int err; >>>> + >>>> + spin_lock_bh(&sk->sk_lock.slock); >>>> + rtnl_lock(); >>>> + err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6); >> >> >> Normally sk_lock is used. Not sure if spin_lock_bh is correct or not. > > ./net/ipv6/addrconf.c-2915- lock_sock(sk); > ./net/ipv6/addrconf.c-2916- if (join) > ./net/ipv6/addrconf.c:2917: ret = ipv6_sock_mc_join(sk, > ifindex, addr); > ./net/ipv6/addrconf.c-2918- else > ./net/ipv6/addrconf.c-2919- ret = ipv6_sock_mc_drop(sk, > ifindex, addr); > ./net/ipv6/addrconf.c-2920- release_sock(sk); > > Should be lock_sock? It works as well as spin_lock_bh() in preventing the RCU splat and looks like the preferred way. I'll make this change. Bob > >> >> Please Jason or experts from netdev comment on this. >> >> Thanks, >> >> Zhu Yanjun >> >> >>>> + rtnl_unlock(); >>>> + spin_unlock_bh(&sk->sk_lock.slock); >>>> + if (err && err != -EADDRINUSE) >>>> + goto err_out; >>>> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); >>>> + err = dev_mc_add(rxe->ndev, ll_addr); >>>> + if (err) >>>> + goto err_drop; >>>> + >>>> + return 0; >>>> - return dev_mc_add(rxe->ndev, ll_addr); >>>> +err_drop: >>>> + spin_lock_bh(&sk->sk_lock.slock); >>>> + rtnl_lock(); >>>> + ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6); >>>> + rtnl_unlock(); >>>> + spin_unlock_bh(&sk->sk_lock.slock); >>>> +err_out: >>>> + return err; >>>> } >>>> -/** >>>> - * rxe_mcast_del - delete multicast address from rxe device >>>> - * @rxe: rxe device object >>>> - * @mgid: multicast address as a gid >>>> - * >>>> - * Returns 0 on success else an error >>>> - */ >>>> -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) >>>> +static int rxe_mcast_add(struct rxe_mcg *mcg) >>>> { >>>> + struct rxe_dev *rxe = mcg->rxe; >>>> + union ib_gid *mgid = &mcg->mgid; >>>> unsigned char ll_addr[ETH_ALEN]; >>>> + struct ip_mreqn imr = {}; >>>> + int err; >>>> + >>>> + if (mcg->is_ipv6) >>>> + return rxe_mcast_add6(rxe, mgid); >>>> + >>>> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); >>>> + imr.imr_ifindex = rxe->ndev->ifindex; >>>> + rtnl_lock(); >>>> + err = ip_mc_join_group(recv_sockets.sk4->sk, &imr); >>>> + rtnl_unlock(); >>>> + if (err && err != -EADDRINUSE) >>>> + goto err_out; >>>> + >>>> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); >>>> + err = dev_mc_add(rxe->ndev, ll_addr); >>>> + if (err) >>>> + goto err_leave; >>>> + >>>> + return 0; >>>> + >>>> +err_leave: >>>> + rtnl_lock(); >>>> + ip_mc_leave_group(recv_sockets.sk4->sk, &imr); >>>> + rtnl_unlock(); >>>> +err_out: >>>> + return err; >>>> +} >>>> + >>>> +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid) >>>> +{ >>>> + struct sock *sk = recv_sockets.sk6->sk; >>>> + unsigned char ll_addr[ETH_ALEN]; >>>> + int err, err2; >>>> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); >>>> + err = dev_mc_del(rxe->ndev, ll_addr); >>>> + >>>> + spin_lock_bh(&sk->sk_lock.slock); >>>> + rtnl_lock(); >>>> + err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, >>>> + (struct in6_addr *)mgid); >>>> + rtnl_unlock(); >>>> + spin_unlock_bh(&sk->sk_lock.slock); >>>> + >>>> + return err ?: err2; >>>> +} >>>> + >>>> +static int rxe_mcast_del(struct rxe_mcg *mcg) >>>> +{ >>>> + struct rxe_dev *rxe = mcg->rxe; >>>> + union ib_gid *mgid = &mcg->mgid; >>>> + unsigned char ll_addr[ETH_ALEN]; >>>> + struct ip_mreqn imr = {}; >>>> + int err, err2; >>>> + >>>> + if (mcg->is_ipv6) >>>> + return rxe_mcast_del6(rxe, mgid); >>>> + >>>> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); >>>> + imr.imr_ifindex = rxe->ndev->ifindex; >>>> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); >>>> + err = dev_mc_del(rxe->ndev, ll_addr); >>>> + >>>> + rtnl_lock(); >>>> + err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr); >>>> + rtnl_unlock(); >>>> - return dev_mc_del(rxe->ndev, ll_addr); >>>> + return err ?: err2; >>>> } >>>> /** >>>> @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe, >>>> union ib_gid *mgid, >>>> { >>>> kref_init(&mcg->ref_cnt); >>>> memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); >>>> + mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid); >>>> INIT_LIST_HEAD(&mcg->qp_list); >>>> mcg->rxe = rxe; >>>> @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct >>>> rxe_dev *rxe, union ib_gid *mgid) >>>> spin_unlock_bh(&rxe->mcg_lock); >>>> /* add mcast address outside of lock */ >>>> - err = rxe_mcast_add(rxe, mgid); >>>> + err = rxe_mcast_add(mcg); >>>> if (!err) >>>> return mcg; >>>> @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) >>>> static void rxe_destroy_mcg(struct rxe_mcg *mcg) >>>> { >>>> /* delete mcast address outside of lock */ >>>> - rxe_mcast_del(mcg->rxe, &mcg->mgid); >>>> + rxe_mcast_del(mcg); >>>> spin_lock_bh(&mcg->rxe->mcg_lock); >>>> __rxe_destroy_mcg(mcg); >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c >>>> b/drivers/infiniband/sw/rxe/rxe_net.c >>>> index 58c3f3759bf0..b481f8da2002 100644 >>>> --- a/drivers/infiniband/sw/rxe/rxe_net.c >>>> +++ b/drivers/infiniband/sw/rxe/rxe_net.c >>>> @@ -18,7 +18,7 @@ >>>> #include "rxe_net.h" >>>> #include "rxe_loc.h" >>>> -static struct rxe_recv_sockets recv_sockets; >>>> +struct rxe_recv_sockets recv_sockets; >>>> static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, >>>> struct net_device *ndev, >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.h >>>> b/drivers/infiniband/sw/rxe/rxe_net.h >>>> index 45d80d00f86b..89cee7d5340f 100644 >>>> --- a/drivers/infiniband/sw/rxe/rxe_net.h >>>> +++ b/drivers/infiniband/sw/rxe/rxe_net.h >>>> @@ -15,6 +15,7 @@ struct rxe_recv_sockets { >>>> struct socket *sk4; >>>> struct socket *sk6; >>>> }; >>>> +extern struct rxe_recv_sockets recv_sockets; >>>> int rxe_net_add(const char *ibdev_name, struct net_device *ndev); >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h >>>> b/drivers/infiniband/sw/rxe/rxe_verbs.h >>>> index ccb9d19ffe8a..7be9e6232dd9 100644 >>>> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h >>>> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h >>>> @@ -352,6 +352,7 @@ struct rxe_mcg { >>>> atomic_t qp_num; >>>> u32 qkey; >>>> u16 pkey; >>>> + bool is_ipv6; >>>> }; >>>> struct rxe_mca { >>
On Fri, Dec 8, 2023 at 3:07 AM Bob Pearson <rpearsonhpe@gmail.com> wrote: > > > > On 12/6/23 19:47, Rain River wrote: > > On Tue, Dec 5, 2023 at 6:30 PM Zhu Yanjun <yanjun.zhu@linux.dev> wrote: > >> > >> > >> 在 2023/12/5 13:55, Zhu Yanjun 写道: > >>> Add David S. Miller and David Ahern. > >>> > >>> They are the maintainers in netdev and very familiar with mcast. > >>> > >>> Zhu Yanjun > >>> > >>> 在 2023/12/5 8:26, Bob Pearson 写道: > >>>> Currently the rdma_rxe driver does not receive mcast packets at all. > >>>> > >>>> Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister > >>>> the IP mcast address. This is required for mcast traffic to reach the > >>>> rxe driver when coming from an external source. > >>>> > >>>> Fixes: 8700e3e7c485 ("Soft RoCE driver") > >>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> > >>>> --- > >>>> drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++----- > >>>> drivers/infiniband/sw/rxe/rxe_net.c | 2 +- > >>>> drivers/infiniband/sw/rxe/rxe_net.h | 1 + > >>>> drivers/infiniband/sw/rxe/rxe_verbs.h | 1 + > >>>> 4 files changed, 102 insertions(+), 21 deletions(-) > >>>> > >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c > >>>> b/drivers/infiniband/sw/rxe/rxe_mcast.c > >>>> index 86cc2e18a7fd..54735d07cee5 100644 > >>>> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c > >>>> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c > >>>> @@ -19,38 +19,116 @@ > >>>> * mcast packets in the rxe receive path. > >>>> */ > >>>> +#include <linux/igmp.h> > >>>> + > >>>> #include "rxe.h" > >>>> -/** > >>>> - * rxe_mcast_add - add multicast address to rxe device > >>>> - * @rxe: rxe device object > >>>> - * @mgid: multicast address as a gid > >>>> - * > >>>> - * Returns 0 on success else an error > >>>> - */ > >>>> -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) > >>>> +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid) > >>>> { > >>>> + struct in6_addr *addr6 = (struct in6_addr *)mgid; > >>>> + struct sock *sk = recv_sockets.sk6->sk; > >>>> unsigned char ll_addr[ETH_ALEN]; > >>>> + int err; > >>>> + > >>>> + spin_lock_bh(&sk->sk_lock.slock); > >>>> + rtnl_lock(); > >>>> + err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6); > >> > >> > >> Normally sk_lock is used. Not sure if spin_lock_bh is correct or not. > > > > ./net/ipv6/addrconf.c-2915- lock_sock(sk); > > ./net/ipv6/addrconf.c-2916- if (join) > > ./net/ipv6/addrconf.c:2917: ret = ipv6_sock_mc_join(sk, > > ifindex, addr); > > ./net/ipv6/addrconf.c-2918- else > > ./net/ipv6/addrconf.c-2919- ret = ipv6_sock_mc_drop(sk, > > ifindex, addr); > > ./net/ipv6/addrconf.c-2920- release_sock(sk); > > > > Should be lock_sock? > > It works as well as spin_lock_bh() in preventing the RCU splat and > looks like the preferred way. I'll make this change. This is the implementation of lock_sock. lock_sock has not only spin_lock_bh, but also other protections for sk. You should use lock_sock, instead of spin_lock_bh. void lock_sock_nested(struct sock *sk, int subclass) { /* The sk_lock has mutex_lock() semantics here. */ mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); might_sleep(); spin_lock_bh(&sk->sk_lock.slock); if (sock_owned_by_user_nocheck(sk)) __lock_sock(sk); sk->sk_lock.owned = 1; spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL(lock_sock_nested); > > Bob > > > >> > >> Please Jason or experts from netdev comment on this. > >> > >> Thanks, > >> > >> Zhu Yanjun > >> > >> > >>>> + rtnl_unlock(); > >>>> + spin_unlock_bh(&sk->sk_lock.slock); > >>>> + if (err && err != -EADDRINUSE) > >>>> + goto err_out; > >>>> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); > >>>> + err = dev_mc_add(rxe->ndev, ll_addr); > >>>> + if (err) > >>>> + goto err_drop; > >>>> + > >>>> + return 0; > >>>> - return dev_mc_add(rxe->ndev, ll_addr); > >>>> +err_drop: > >>>> + spin_lock_bh(&sk->sk_lock.slock); > >>>> + rtnl_lock(); > >>>> + ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6); > >>>> + rtnl_unlock(); > >>>> + spin_unlock_bh(&sk->sk_lock.slock); > >>>> +err_out: > >>>> + return err; > >>>> } > >>>> -/** > >>>> - * rxe_mcast_del - delete multicast address from rxe device > >>>> - * @rxe: rxe device object > >>>> - * @mgid: multicast address as a gid > >>>> - * > >>>> - * Returns 0 on success else an error > >>>> - */ > >>>> -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) > >>>> +static int rxe_mcast_add(struct rxe_mcg *mcg) > >>>> { > >>>> + struct rxe_dev *rxe = mcg->rxe; > >>>> + union ib_gid *mgid = &mcg->mgid; > >>>> unsigned char ll_addr[ETH_ALEN]; > >>>> + struct ip_mreqn imr = {}; > >>>> + int err; > >>>> + > >>>> + if (mcg->is_ipv6) > >>>> + return rxe_mcast_add6(rxe, mgid); > >>>> + > >>>> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); > >>>> + imr.imr_ifindex = rxe->ndev->ifindex; > >>>> + rtnl_lock(); > >>>> + err = ip_mc_join_group(recv_sockets.sk4->sk, &imr); > >>>> + rtnl_unlock(); > >>>> + if (err && err != -EADDRINUSE) > >>>> + goto err_out; > >>>> + > >>>> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); > >>>> + err = dev_mc_add(rxe->ndev, ll_addr); > >>>> + if (err) > >>>> + goto err_leave; > >>>> + > >>>> + return 0; > >>>> + > >>>> +err_leave: > >>>> + rtnl_lock(); > >>>> + ip_mc_leave_group(recv_sockets.sk4->sk, &imr); > >>>> + rtnl_unlock(); > >>>> +err_out: > >>>> + return err; > >>>> +} > >>>> + > >>>> +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid) > >>>> +{ > >>>> + struct sock *sk = recv_sockets.sk6->sk; > >>>> + unsigned char ll_addr[ETH_ALEN]; > >>>> + int err, err2; > >>>> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); > >>>> + err = dev_mc_del(rxe->ndev, ll_addr); > >>>> + > >>>> + spin_lock_bh(&sk->sk_lock.slock); > >>>> + rtnl_lock(); > >>>> + err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, > >>>> + (struct in6_addr *)mgid); > >>>> + rtnl_unlock(); > >>>> + spin_unlock_bh(&sk->sk_lock.slock); > >>>> + > >>>> + return err ?: err2; > >>>> +} > >>>> + > >>>> +static int rxe_mcast_del(struct rxe_mcg *mcg) > >>>> +{ > >>>> + struct rxe_dev *rxe = mcg->rxe; > >>>> + union ib_gid *mgid = &mcg->mgid; > >>>> + unsigned char ll_addr[ETH_ALEN]; > >>>> + struct ip_mreqn imr = {}; > >>>> + int err, err2; > >>>> + > >>>> + if (mcg->is_ipv6) > >>>> + return rxe_mcast_del6(rxe, mgid); > >>>> + > >>>> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); > >>>> + imr.imr_ifindex = rxe->ndev->ifindex; > >>>> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); > >>>> + err = dev_mc_del(rxe->ndev, ll_addr); > >>>> + > >>>> + rtnl_lock(); > >>>> + err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr); > >>>> + rtnl_unlock(); > >>>> - return dev_mc_del(rxe->ndev, ll_addr); > >>>> + return err ?: err2; > >>>> } > >>>> /** > >>>> @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe, > >>>> union ib_gid *mgid, > >>>> { > >>>> kref_init(&mcg->ref_cnt); > >>>> memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); > >>>> + mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid); > >>>> INIT_LIST_HEAD(&mcg->qp_list); > >>>> mcg->rxe = rxe; > >>>> @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct > >>>> rxe_dev *rxe, union ib_gid *mgid) > >>>> spin_unlock_bh(&rxe->mcg_lock); > >>>> /* add mcast address outside of lock */ > >>>> - err = rxe_mcast_add(rxe, mgid); > >>>> + err = rxe_mcast_add(mcg); > >>>> if (!err) > >>>> return mcg; > >>>> @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) > >>>> static void rxe_destroy_mcg(struct rxe_mcg *mcg) > >>>> { > >>>> /* delete mcast address outside of lock */ > >>>> - rxe_mcast_del(mcg->rxe, &mcg->mgid); > >>>> + rxe_mcast_del(mcg); > >>>> spin_lock_bh(&mcg->rxe->mcg_lock); > >>>> __rxe_destroy_mcg(mcg); > >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c > >>>> b/drivers/infiniband/sw/rxe/rxe_net.c > >>>> index 58c3f3759bf0..b481f8da2002 100644 > >>>> --- a/drivers/infiniband/sw/rxe/rxe_net.c > >>>> +++ b/drivers/infiniband/sw/rxe/rxe_net.c > >>>> @@ -18,7 +18,7 @@ > >>>> #include "rxe_net.h" > >>>> #include "rxe_loc.h" > >>>> -static struct rxe_recv_sockets recv_sockets; > >>>> +struct rxe_recv_sockets recv_sockets; > >>>> static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, > >>>> struct net_device *ndev, > >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.h > >>>> b/drivers/infiniband/sw/rxe/rxe_net.h > >>>> index 45d80d00f86b..89cee7d5340f 100644 > >>>> --- a/drivers/infiniband/sw/rxe/rxe_net.h > >>>> +++ b/drivers/infiniband/sw/rxe/rxe_net.h > >>>> @@ -15,6 +15,7 @@ struct rxe_recv_sockets { > >>>> struct socket *sk4; > >>>> struct socket *sk6; > >>>> }; > >>>> +extern struct rxe_recv_sockets recv_sockets; > >>>> int rxe_net_add(const char *ibdev_name, struct net_device *ndev); > >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h > >>>> b/drivers/infiniband/sw/rxe/rxe_verbs.h > >>>> index ccb9d19ffe8a..7be9e6232dd9 100644 > >>>> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h > >>>> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h > >>>> @@ -352,6 +352,7 @@ struct rxe_mcg { > >>>> atomic_t qp_num; > >>>> u32 qkey; > >>>> u16 pkey; > >>>> + bool is_ipv6; > >>>> }; > >>>> struct rxe_mca { > >> >
diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c index 86cc2e18a7fd..54735d07cee5 100644 --- a/drivers/infiniband/sw/rxe/rxe_mcast.c +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c @@ -19,38 +19,116 @@ * mcast packets in the rxe receive path. */ +#include <linux/igmp.h> + #include "rxe.h" -/** - * rxe_mcast_add - add multicast address to rxe device - * @rxe: rxe device object - * @mgid: multicast address as a gid - * - * Returns 0 on success else an error - */ -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid) { + struct in6_addr *addr6 = (struct in6_addr *)mgid; + struct sock *sk = recv_sockets.sk6->sk; unsigned char ll_addr[ETH_ALEN]; + int err; + + spin_lock_bh(&sk->sk_lock.slock); + rtnl_lock(); + err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6); + rtnl_unlock(); + spin_unlock_bh(&sk->sk_lock.slock); + if (err && err != -EADDRINUSE) + goto err_out; ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); + err = dev_mc_add(rxe->ndev, ll_addr); + if (err) + goto err_drop; + + return 0; - return dev_mc_add(rxe->ndev, ll_addr); +err_drop: + spin_lock_bh(&sk->sk_lock.slock); + rtnl_lock(); + ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6); + rtnl_unlock(); + spin_unlock_bh(&sk->sk_lock.slock); +err_out: + return err; } -/** - * rxe_mcast_del - delete multicast address from rxe device - * @rxe: rxe device object - * @mgid: multicast address as a gid - * - * Returns 0 on success else an error - */ -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) +static int rxe_mcast_add(struct rxe_mcg *mcg) { + struct rxe_dev *rxe = mcg->rxe; + union ib_gid *mgid = &mcg->mgid; unsigned char ll_addr[ETH_ALEN]; + struct ip_mreqn imr = {}; + int err; + + if (mcg->is_ipv6) + return rxe_mcast_add6(rxe, mgid); + + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); + imr.imr_ifindex = rxe->ndev->ifindex; + rtnl_lock(); + err = ip_mc_join_group(recv_sockets.sk4->sk, &imr); + rtnl_unlock(); + if (err && err != -EADDRINUSE) + goto err_out; + + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); + err = dev_mc_add(rxe->ndev, ll_addr); + if (err) + goto err_leave; + + return 0; + +err_leave: + rtnl_lock(); + ip_mc_leave_group(recv_sockets.sk4->sk, &imr); + rtnl_unlock(); +err_out: + return err; +} + +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid) +{ + struct sock *sk = recv_sockets.sk6->sk; + unsigned char ll_addr[ETH_ALEN]; + int err, err2; ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); + err = dev_mc_del(rxe->ndev, ll_addr); + + spin_lock_bh(&sk->sk_lock.slock); + rtnl_lock(); + err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, + (struct in6_addr *)mgid); + rtnl_unlock(); + spin_unlock_bh(&sk->sk_lock.slock); + + return err ?: err2; +} + +static int rxe_mcast_del(struct rxe_mcg *mcg) +{ + struct rxe_dev *rxe = mcg->rxe; + union ib_gid *mgid = &mcg->mgid; + unsigned char ll_addr[ETH_ALEN]; + struct ip_mreqn imr = {}; + int err, err2; + + if (mcg->is_ipv6) + return rxe_mcast_del6(rxe, mgid); + + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); + imr.imr_ifindex = rxe->ndev->ifindex; + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); + err = dev_mc_del(rxe->ndev, ll_addr); + + rtnl_lock(); + err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr); + rtnl_unlock(); - return dev_mc_del(rxe->ndev, ll_addr); + return err ?: err2; } /** @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, { kref_init(&mcg->ref_cnt); memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); + mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid); INIT_LIST_HEAD(&mcg->qp_list); mcg->rxe = rxe; @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) spin_unlock_bh(&rxe->mcg_lock); /* add mcast address outside of lock */ - err = rxe_mcast_add(rxe, mgid); + err = rxe_mcast_add(mcg); if (!err) return mcg; @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) static void rxe_destroy_mcg(struct rxe_mcg *mcg) { /* delete mcast address outside of lock */ - rxe_mcast_del(mcg->rxe, &mcg->mgid); + rxe_mcast_del(mcg); spin_lock_bh(&mcg->rxe->mcg_lock); __rxe_destroy_mcg(mcg); diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 58c3f3759bf0..b481f8da2002 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -18,7 +18,7 @@ #include "rxe_net.h" #include "rxe_loc.h" -static struct rxe_recv_sockets recv_sockets; +struct rxe_recv_sockets recv_sockets; static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, struct net_device *ndev, diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h index 45d80d00f86b..89cee7d5340f 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.h +++ b/drivers/infiniband/sw/rxe/rxe_net.h @@ -15,6 +15,7 @@ struct rxe_recv_sockets { struct socket *sk4; struct socket *sk6; }; +extern struct rxe_recv_sockets recv_sockets; int rxe_net_add(const char *ibdev_name, struct net_device *ndev); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index ccb9d19ffe8a..7be9e6232dd9 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -352,6 +352,7 @@ struct rxe_mcg { atomic_t qp_num; u32 qkey; u16 pkey; + bool is_ipv6; }; struct rxe_mca {
Currently the rdma_rxe driver does not receive mcast packets at all. Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister the IP mcast address. This is required for mcast traffic to reach the rxe driver when coming from an external source. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> --- drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++----- drivers/infiniband/sw/rxe/rxe_net.c | 2 +- drivers/infiniband/sw/rxe/rxe_net.h | 1 + drivers/infiniband/sw/rxe/rxe_verbs.h | 1 + 4 files changed, 102 insertions(+), 21 deletions(-)