diff mbox series

[for-next,v5,3/7] RDMA/rxe: Register IP mcast address

Message ID 20231205002613.10219-1-rpearsonhpe@gmail.com (mailing list archive)
State Superseded
Headers show
Series RDMA/rxe: Make multicast work | expand

Commit Message

Bob Pearson Dec. 5, 2023, 12:26 a.m. UTC
Currently the rdma_rxe driver does not receive mcast packets at all.

Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister
the IP mcast address. This is required for mcast traffic to reach the
rxe driver when coming from an external source.

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++-----
 drivers/infiniband/sw/rxe/rxe_net.c   |   2 +-
 drivers/infiniband/sw/rxe/rxe_net.h   |   1 +
 drivers/infiniband/sw/rxe/rxe_verbs.h |   1 +
 4 files changed, 102 insertions(+), 21 deletions(-)

Comments

Zhu Yanjun Dec. 5, 2023, 5:55 a.m. UTC | #1
Add  David S. Miller and  David Ahern.

They are the maintainers in netdev and very familiar with mcast.

Zhu Yanjun

在 2023/12/5 8:26, Bob Pearson 写道:
> Currently the rdma_rxe driver does not receive mcast packets at all.
>
> Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister
> the IP mcast address. This is required for mcast traffic to reach the
> rxe driver when coming from an external source.
>
> Fixes: 8700e3e7c485 ("Soft RoCE driver")
> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> ---
>   drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++-----
>   drivers/infiniband/sw/rxe/rxe_net.c   |   2 +-
>   drivers/infiniband/sw/rxe/rxe_net.h   |   1 +
>   drivers/infiniband/sw/rxe/rxe_verbs.h |   1 +
>   4 files changed, 102 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
> index 86cc2e18a7fd..54735d07cee5 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
> @@ -19,38 +19,116 @@
>    * mcast packets in the rxe receive path.
>    */
>   
> +#include <linux/igmp.h>
> +
>   #include "rxe.h"
>   
> -/**
> - * rxe_mcast_add - add multicast address to rxe device
> - * @rxe: rxe device object
> - * @mgid: multicast address as a gid
> - *
> - * Returns 0 on success else an error
> - */
> -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
> +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid)
>   {
> +	struct in6_addr *addr6 = (struct in6_addr *)mgid;
> +	struct sock *sk = recv_sockets.sk6->sk;
>   	unsigned char ll_addr[ETH_ALEN];
> +	int err;
> +
> +	spin_lock_bh(&sk->sk_lock.slock);
> +	rtnl_lock();
> +	err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6);
> +	rtnl_unlock();
> +	spin_unlock_bh(&sk->sk_lock.slock);
> +	if (err && err != -EADDRINUSE)
> +		goto err_out;
>   
>   	ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
> +	err = dev_mc_add(rxe->ndev, ll_addr);
> +	if (err)
> +		goto err_drop;
> +
> +	return 0;
>   
> -	return dev_mc_add(rxe->ndev, ll_addr);
> +err_drop:
> +	spin_lock_bh(&sk->sk_lock.slock);
> +	rtnl_lock();
> +	ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6);
> +	rtnl_unlock();
> +	spin_unlock_bh(&sk->sk_lock.slock);
> +err_out:
> +	return err;
>   }
>   
> -/**
> - * rxe_mcast_del - delete multicast address from rxe device
> - * @rxe: rxe device object
> - * @mgid: multicast address as a gid
> - *
> - * Returns 0 on success else an error
> - */
> -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid)
> +static int rxe_mcast_add(struct rxe_mcg *mcg)
>   {
> +	struct rxe_dev *rxe = mcg->rxe;
> +	union ib_gid *mgid = &mcg->mgid;
>   	unsigned char ll_addr[ETH_ALEN];
> +	struct ip_mreqn imr = {};
> +	int err;
> +
> +	if (mcg->is_ipv6)
> +		return rxe_mcast_add6(rxe, mgid);
> +
> +	imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
> +	imr.imr_ifindex = rxe->ndev->ifindex;
> +	rtnl_lock();
> +	err = ip_mc_join_group(recv_sockets.sk4->sk, &imr);
> +	rtnl_unlock();
> +	if (err && err != -EADDRINUSE)
> +		goto err_out;
> +
> +	ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
> +	err = dev_mc_add(rxe->ndev, ll_addr);
> +	if (err)
> +		goto err_leave;
> +
> +	return 0;
> +
> +err_leave:
> +	rtnl_lock();
> +	ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
> +	rtnl_unlock();
> +err_out:
> +	return err;
> +}
> +
> +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid)
> +{
> +	struct sock *sk = recv_sockets.sk6->sk;
> +	unsigned char ll_addr[ETH_ALEN];
> +	int err, err2;
>   
>   	ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
> +	err = dev_mc_del(rxe->ndev, ll_addr);
> +
> +	spin_lock_bh(&sk->sk_lock.slock);
> +	rtnl_lock();
> +	err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex,
> +			(struct in6_addr *)mgid);
> +	rtnl_unlock();
> +	spin_unlock_bh(&sk->sk_lock.slock);
> +
> +	return err ?: err2;
> +}
> +
> +static int rxe_mcast_del(struct rxe_mcg *mcg)
> +{
> +	struct rxe_dev *rxe = mcg->rxe;
> +	union ib_gid *mgid = &mcg->mgid;
> +	unsigned char ll_addr[ETH_ALEN];
> +	struct ip_mreqn imr = {};
> +	int err, err2;
> +
> +	if (mcg->is_ipv6)
> +		return rxe_mcast_del6(rxe, mgid);
> +
> +	imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
> +	imr.imr_ifindex = rxe->ndev->ifindex;
> +	ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
> +	err = dev_mc_del(rxe->ndev, ll_addr);
> +
> +	rtnl_lock();
> +	err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
> +	rtnl_unlock();
>   
> -	return dev_mc_del(rxe->ndev, ll_addr);
> +	return err ?: err2;
>   }
>   
>   /**
> @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
>   {
>   	kref_init(&mcg->ref_cnt);
>   	memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid));
> +	mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid);
>   	INIT_LIST_HEAD(&mcg->qp_list);
>   	mcg->rxe = rxe;
>   
> @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>   	spin_unlock_bh(&rxe->mcg_lock);
>   
>   	/* add mcast address outside of lock */
> -	err = rxe_mcast_add(rxe, mgid);
> +	err = rxe_mcast_add(mcg);
>   	if (!err)
>   		return mcg;
>   
> @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
>   static void rxe_destroy_mcg(struct rxe_mcg *mcg)
>   {
>   	/* delete mcast address outside of lock */
> -	rxe_mcast_del(mcg->rxe, &mcg->mgid);
> +	rxe_mcast_del(mcg);
>   
>   	spin_lock_bh(&mcg->rxe->mcg_lock);
>   	__rxe_destroy_mcg(mcg);
> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
> index 58c3f3759bf0..b481f8da2002 100644
> --- a/drivers/infiniband/sw/rxe/rxe_net.c
> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
> @@ -18,7 +18,7 @@
>   #include "rxe_net.h"
>   #include "rxe_loc.h"
>   
> -static struct rxe_recv_sockets recv_sockets;
> +struct rxe_recv_sockets recv_sockets;
>   
>   static struct dst_entry *rxe_find_route4(struct rxe_qp *qp,
>   					 struct net_device *ndev,
> diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h
> index 45d80d00f86b..89cee7d5340f 100644
> --- a/drivers/infiniband/sw/rxe/rxe_net.h
> +++ b/drivers/infiniband/sw/rxe/rxe_net.h
> @@ -15,6 +15,7 @@ struct rxe_recv_sockets {
>   	struct socket *sk4;
>   	struct socket *sk6;
>   };
> +extern struct rxe_recv_sockets recv_sockets;
>   
>   int rxe_net_add(const char *ibdev_name, struct net_device *ndev);
>   
> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
> index ccb9d19ffe8a..7be9e6232dd9 100644
> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
> @@ -352,6 +352,7 @@ struct rxe_mcg {
>   	atomic_t		qp_num;
>   	u32			qkey;
>   	u16			pkey;
> +	bool			is_ipv6;
>   };
>   
>   struct rxe_mca {
Zhu Yanjun Dec. 5, 2023, 10:29 a.m. UTC | #2
在 2023/12/5 13:55, Zhu Yanjun 写道:
> Add David S. Miller and  David Ahern.
>
> They are the maintainers in netdev and very familiar with mcast.
>
> Zhu Yanjun
>
> 在 2023/12/5 8:26, Bob Pearson 写道:
>> Currently the rdma_rxe driver does not receive mcast packets at all.
>>
>> Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister
>> the IP mcast address. This is required for mcast traffic to reach the
>> rxe driver when coming from an external source.
>>
>> Fixes: 8700e3e7c485 ("Soft RoCE driver")
>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
>> ---
>>   drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++-----
>>   drivers/infiniband/sw/rxe/rxe_net.c   |   2 +-
>>   drivers/infiniband/sw/rxe/rxe_net.h   |   1 +
>>   drivers/infiniband/sw/rxe/rxe_verbs.h |   1 +
>>   4 files changed, 102 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c 
>> b/drivers/infiniband/sw/rxe/rxe_mcast.c
>> index 86cc2e18a7fd..54735d07cee5 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
>> @@ -19,38 +19,116 @@
>>    * mcast packets in the rxe receive path.
>>    */
>>   +#include <linux/igmp.h>
>> +
>>   #include "rxe.h"
>>   -/**
>> - * rxe_mcast_add - add multicast address to rxe device
>> - * @rxe: rxe device object
>> - * @mgid: multicast address as a gid
>> - *
>> - * Returns 0 on success else an error
>> - */
>> -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
>> +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid)
>>   {
>> +    struct in6_addr *addr6 = (struct in6_addr *)mgid;
>> +    struct sock *sk = recv_sockets.sk6->sk;
>>       unsigned char ll_addr[ETH_ALEN];
>> +    int err;
>> +
>> +    spin_lock_bh(&sk->sk_lock.slock);
>> +    rtnl_lock();
>> +    err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6);


Normally sk_lock is used. Not sure if spin_lock_bh is correct or not.

Please Jason or experts from netdev comment on this.

Thanks,

Zhu Yanjun


>> +    rtnl_unlock();
>> +    spin_unlock_bh(&sk->sk_lock.slock);
>> +    if (err && err != -EADDRINUSE)
>> +        goto err_out;
>>         ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
>> +    err = dev_mc_add(rxe->ndev, ll_addr);
>> +    if (err)
>> +        goto err_drop;
>> +
>> +    return 0;
>>   -    return dev_mc_add(rxe->ndev, ll_addr);
>> +err_drop:
>> +    spin_lock_bh(&sk->sk_lock.slock);
>> +    rtnl_lock();
>> +    ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6);
>> +    rtnl_unlock();
>> +    spin_unlock_bh(&sk->sk_lock.slock);
>> +err_out:
>> +    return err;
>>   }
>>   -/**
>> - * rxe_mcast_del - delete multicast address from rxe device
>> - * @rxe: rxe device object
>> - * @mgid: multicast address as a gid
>> - *
>> - * Returns 0 on success else an error
>> - */
>> -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid)
>> +static int rxe_mcast_add(struct rxe_mcg *mcg)
>>   {
>> +    struct rxe_dev *rxe = mcg->rxe;
>> +    union ib_gid *mgid = &mcg->mgid;
>>       unsigned char ll_addr[ETH_ALEN];
>> +    struct ip_mreqn imr = {};
>> +    int err;
>> +
>> +    if (mcg->is_ipv6)
>> +        return rxe_mcast_add6(rxe, mgid);
>> +
>> +    imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
>> +    imr.imr_ifindex = rxe->ndev->ifindex;
>> +    rtnl_lock();
>> +    err = ip_mc_join_group(recv_sockets.sk4->sk, &imr);
>> +    rtnl_unlock();
>> +    if (err && err != -EADDRINUSE)
>> +        goto err_out;
>> +
>> +    ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
>> +    err = dev_mc_add(rxe->ndev, ll_addr);
>> +    if (err)
>> +        goto err_leave;
>> +
>> +    return 0;
>> +
>> +err_leave:
>> +    rtnl_lock();
>> +    ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
>> +    rtnl_unlock();
>> +err_out:
>> +    return err;
>> +}
>> +
>> +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid)
>> +{
>> +    struct sock *sk = recv_sockets.sk6->sk;
>> +    unsigned char ll_addr[ETH_ALEN];
>> +    int err, err2;
>>         ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
>> +    err = dev_mc_del(rxe->ndev, ll_addr);
>> +
>> +    spin_lock_bh(&sk->sk_lock.slock);
>> +    rtnl_lock();
>> +    err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex,
>> +            (struct in6_addr *)mgid);
>> +    rtnl_unlock();
>> +    spin_unlock_bh(&sk->sk_lock.slock);
>> +
>> +    return err ?: err2;
>> +}
>> +
>> +static int rxe_mcast_del(struct rxe_mcg *mcg)
>> +{
>> +    struct rxe_dev *rxe = mcg->rxe;
>> +    union ib_gid *mgid = &mcg->mgid;
>> +    unsigned char ll_addr[ETH_ALEN];
>> +    struct ip_mreqn imr = {};
>> +    int err, err2;
>> +
>> +    if (mcg->is_ipv6)
>> +        return rxe_mcast_del6(rxe, mgid);
>> +
>> +    imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
>> +    imr.imr_ifindex = rxe->ndev->ifindex;
>> +    ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
>> +    err = dev_mc_del(rxe->ndev, ll_addr);
>> +
>> +    rtnl_lock();
>> +    err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
>> +    rtnl_unlock();
>>   -    return dev_mc_del(rxe->ndev, ll_addr);
>> +    return err ?: err2;
>>   }
>>     /**
>> @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe, 
>> union ib_gid *mgid,
>>   {
>>       kref_init(&mcg->ref_cnt);
>>       memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid));
>> +    mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid);
>>       INIT_LIST_HEAD(&mcg->qp_list);
>>       mcg->rxe = rxe;
>>   @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct 
>> rxe_dev *rxe, union ib_gid *mgid)
>>       spin_unlock_bh(&rxe->mcg_lock);
>>         /* add mcast address outside of lock */
>> -    err = rxe_mcast_add(rxe, mgid);
>> +    err = rxe_mcast_add(mcg);
>>       if (!err)
>>           return mcg;
>>   @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
>>   static void rxe_destroy_mcg(struct rxe_mcg *mcg)
>>   {
>>       /* delete mcast address outside of lock */
>> -    rxe_mcast_del(mcg->rxe, &mcg->mgid);
>> +    rxe_mcast_del(mcg);
>>         spin_lock_bh(&mcg->rxe->mcg_lock);
>>       __rxe_destroy_mcg(mcg);
>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c 
>> b/drivers/infiniband/sw/rxe/rxe_net.c
>> index 58c3f3759bf0..b481f8da2002 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_net.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
>> @@ -18,7 +18,7 @@
>>   #include "rxe_net.h"
>>   #include "rxe_loc.h"
>>   -static struct rxe_recv_sockets recv_sockets;
>> +struct rxe_recv_sockets recv_sockets;
>>     static struct dst_entry *rxe_find_route4(struct rxe_qp *qp,
>>                        struct net_device *ndev,
>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.h 
>> b/drivers/infiniband/sw/rxe/rxe_net.h
>> index 45d80d00f86b..89cee7d5340f 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_net.h
>> +++ b/drivers/infiniband/sw/rxe/rxe_net.h
>> @@ -15,6 +15,7 @@ struct rxe_recv_sockets {
>>       struct socket *sk4;
>>       struct socket *sk6;
>>   };
>> +extern struct rxe_recv_sockets recv_sockets;
>>     int rxe_net_add(const char *ibdev_name, struct net_device *ndev);
>>   diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h 
>> b/drivers/infiniband/sw/rxe/rxe_verbs.h
>> index ccb9d19ffe8a..7be9e6232dd9 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
>> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
>> @@ -352,6 +352,7 @@ struct rxe_mcg {
>>       atomic_t        qp_num;
>>       u32            qkey;
>>       u16            pkey;
>> +    bool            is_ipv6;
>>   };
>>     struct rxe_mca {
Rain River Dec. 7, 2023, 1:47 a.m. UTC | #3
On Tue, Dec 5, 2023 at 6:30 PM Zhu Yanjun <yanjun.zhu@linux.dev> wrote:
>
>
> 在 2023/12/5 13:55, Zhu Yanjun 写道:
> > Add David S. Miller and  David Ahern.
> >
> > They are the maintainers in netdev and very familiar with mcast.
> >
> > Zhu Yanjun
> >
> > 在 2023/12/5 8:26, Bob Pearson 写道:
> >> Currently the rdma_rxe driver does not receive mcast packets at all.
> >>
> >> Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister
> >> the IP mcast address. This is required for mcast traffic to reach the
> >> rxe driver when coming from an external source.
> >>
> >> Fixes: 8700e3e7c485 ("Soft RoCE driver")
> >> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> >> ---
> >>   drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++-----
> >>   drivers/infiniband/sw/rxe/rxe_net.c   |   2 +-
> >>   drivers/infiniband/sw/rxe/rxe_net.h   |   1 +
> >>   drivers/infiniband/sw/rxe/rxe_verbs.h |   1 +
> >>   4 files changed, 102 insertions(+), 21 deletions(-)
> >>
> >> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c
> >> b/drivers/infiniband/sw/rxe/rxe_mcast.c
> >> index 86cc2e18a7fd..54735d07cee5 100644
> >> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
> >> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
> >> @@ -19,38 +19,116 @@
> >>    * mcast packets in the rxe receive path.
> >>    */
> >>   +#include <linux/igmp.h>
> >> +
> >>   #include "rxe.h"
> >>   -/**
> >> - * rxe_mcast_add - add multicast address to rxe device
> >> - * @rxe: rxe device object
> >> - * @mgid: multicast address as a gid
> >> - *
> >> - * Returns 0 on success else an error
> >> - */
> >> -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
> >> +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid)
> >>   {
> >> +    struct in6_addr *addr6 = (struct in6_addr *)mgid;
> >> +    struct sock *sk = recv_sockets.sk6->sk;
> >>       unsigned char ll_addr[ETH_ALEN];
> >> +    int err;
> >> +
> >> +    spin_lock_bh(&sk->sk_lock.slock);
> >> +    rtnl_lock();
> >> +    err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6);
>
>
> Normally sk_lock is used. Not sure if spin_lock_bh is correct or not.

./net/ipv6/addrconf.c-2915-     lock_sock(sk);
./net/ipv6/addrconf.c-2916-     if (join)
./net/ipv6/addrconf.c:2917:             ret = ipv6_sock_mc_join(sk,
ifindex, addr);
./net/ipv6/addrconf.c-2918-     else
./net/ipv6/addrconf.c-2919-             ret = ipv6_sock_mc_drop(sk,
ifindex, addr);
./net/ipv6/addrconf.c-2920-     release_sock(sk);

Should be lock_sock?

>
> Please Jason or experts from netdev comment on this.
>
> Thanks,
>
> Zhu Yanjun
>
>
> >> +    rtnl_unlock();
> >> +    spin_unlock_bh(&sk->sk_lock.slock);
> >> +    if (err && err != -EADDRINUSE)
> >> +        goto err_out;
> >>         ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
> >> +    err = dev_mc_add(rxe->ndev, ll_addr);
> >> +    if (err)
> >> +        goto err_drop;
> >> +
> >> +    return 0;
> >>   -    return dev_mc_add(rxe->ndev, ll_addr);
> >> +err_drop:
> >> +    spin_lock_bh(&sk->sk_lock.slock);
> >> +    rtnl_lock();
> >> +    ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6);
> >> +    rtnl_unlock();
> >> +    spin_unlock_bh(&sk->sk_lock.slock);
> >> +err_out:
> >> +    return err;
> >>   }
> >>   -/**
> >> - * rxe_mcast_del - delete multicast address from rxe device
> >> - * @rxe: rxe device object
> >> - * @mgid: multicast address as a gid
> >> - *
> >> - * Returns 0 on success else an error
> >> - */
> >> -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid)
> >> +static int rxe_mcast_add(struct rxe_mcg *mcg)
> >>   {
> >> +    struct rxe_dev *rxe = mcg->rxe;
> >> +    union ib_gid *mgid = &mcg->mgid;
> >>       unsigned char ll_addr[ETH_ALEN];
> >> +    struct ip_mreqn imr = {};
> >> +    int err;
> >> +
> >> +    if (mcg->is_ipv6)
> >> +        return rxe_mcast_add6(rxe, mgid);
> >> +
> >> +    imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
> >> +    imr.imr_ifindex = rxe->ndev->ifindex;
> >> +    rtnl_lock();
> >> +    err = ip_mc_join_group(recv_sockets.sk4->sk, &imr);
> >> +    rtnl_unlock();
> >> +    if (err && err != -EADDRINUSE)
> >> +        goto err_out;
> >> +
> >> +    ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
> >> +    err = dev_mc_add(rxe->ndev, ll_addr);
> >> +    if (err)
> >> +        goto err_leave;
> >> +
> >> +    return 0;
> >> +
> >> +err_leave:
> >> +    rtnl_lock();
> >> +    ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
> >> +    rtnl_unlock();
> >> +err_out:
> >> +    return err;
> >> +}
> >> +
> >> +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid)
> >> +{
> >> +    struct sock *sk = recv_sockets.sk6->sk;
> >> +    unsigned char ll_addr[ETH_ALEN];
> >> +    int err, err2;
> >>         ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
> >> +    err = dev_mc_del(rxe->ndev, ll_addr);
> >> +
> >> +    spin_lock_bh(&sk->sk_lock.slock);
> >> +    rtnl_lock();
> >> +    err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex,
> >> +            (struct in6_addr *)mgid);
> >> +    rtnl_unlock();
> >> +    spin_unlock_bh(&sk->sk_lock.slock);
> >> +
> >> +    return err ?: err2;
> >> +}
> >> +
> >> +static int rxe_mcast_del(struct rxe_mcg *mcg)
> >> +{
> >> +    struct rxe_dev *rxe = mcg->rxe;
> >> +    union ib_gid *mgid = &mcg->mgid;
> >> +    unsigned char ll_addr[ETH_ALEN];
> >> +    struct ip_mreqn imr = {};
> >> +    int err, err2;
> >> +
> >> +    if (mcg->is_ipv6)
> >> +        return rxe_mcast_del6(rxe, mgid);
> >> +
> >> +    imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
> >> +    imr.imr_ifindex = rxe->ndev->ifindex;
> >> +    ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
> >> +    err = dev_mc_del(rxe->ndev, ll_addr);
> >> +
> >> +    rtnl_lock();
> >> +    err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
> >> +    rtnl_unlock();
> >>   -    return dev_mc_del(rxe->ndev, ll_addr);
> >> +    return err ?: err2;
> >>   }
> >>     /**
> >> @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe,
> >> union ib_gid *mgid,
> >>   {
> >>       kref_init(&mcg->ref_cnt);
> >>       memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid));
> >> +    mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid);
> >>       INIT_LIST_HEAD(&mcg->qp_list);
> >>       mcg->rxe = rxe;
> >>   @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct
> >> rxe_dev *rxe, union ib_gid *mgid)
> >>       spin_unlock_bh(&rxe->mcg_lock);
> >>         /* add mcast address outside of lock */
> >> -    err = rxe_mcast_add(rxe, mgid);
> >> +    err = rxe_mcast_add(mcg);
> >>       if (!err)
> >>           return mcg;
> >>   @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
> >>   static void rxe_destroy_mcg(struct rxe_mcg *mcg)
> >>   {
> >>       /* delete mcast address outside of lock */
> >> -    rxe_mcast_del(mcg->rxe, &mcg->mgid);
> >> +    rxe_mcast_del(mcg);
> >>         spin_lock_bh(&mcg->rxe->mcg_lock);
> >>       __rxe_destroy_mcg(mcg);
> >> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c
> >> b/drivers/infiniband/sw/rxe/rxe_net.c
> >> index 58c3f3759bf0..b481f8da2002 100644
> >> --- a/drivers/infiniband/sw/rxe/rxe_net.c
> >> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
> >> @@ -18,7 +18,7 @@
> >>   #include "rxe_net.h"
> >>   #include "rxe_loc.h"
> >>   -static struct rxe_recv_sockets recv_sockets;
> >> +struct rxe_recv_sockets recv_sockets;
> >>     static struct dst_entry *rxe_find_route4(struct rxe_qp *qp,
> >>                        struct net_device *ndev,
> >> diff --git a/drivers/infiniband/sw/rxe/rxe_net.h
> >> b/drivers/infiniband/sw/rxe/rxe_net.h
> >> index 45d80d00f86b..89cee7d5340f 100644
> >> --- a/drivers/infiniband/sw/rxe/rxe_net.h
> >> +++ b/drivers/infiniband/sw/rxe/rxe_net.h
> >> @@ -15,6 +15,7 @@ struct rxe_recv_sockets {
> >>       struct socket *sk4;
> >>       struct socket *sk6;
> >>   };
> >> +extern struct rxe_recv_sockets recv_sockets;
> >>     int rxe_net_add(const char *ibdev_name, struct net_device *ndev);
> >>   diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h
> >> b/drivers/infiniband/sw/rxe/rxe_verbs.h
> >> index ccb9d19ffe8a..7be9e6232dd9 100644
> >> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
> >> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
> >> @@ -352,6 +352,7 @@ struct rxe_mcg {
> >>       atomic_t        qp_num;
> >>       u32            qkey;
> >>       u16            pkey;
> >> +    bool            is_ipv6;
> >>   };
> >>     struct rxe_mca {
>
Bob Pearson Dec. 7, 2023, 7:07 p.m. UTC | #4
On 12/6/23 19:47, Rain River wrote:
> On Tue, Dec 5, 2023 at 6:30 PM Zhu Yanjun <yanjun.zhu@linux.dev> wrote:
>>
>>
>> 在 2023/12/5 13:55, Zhu Yanjun 写道:
>>> Add David S. Miller and  David Ahern.
>>>
>>> They are the maintainers in netdev and very familiar with mcast.
>>>
>>> Zhu Yanjun
>>>
>>> 在 2023/12/5 8:26, Bob Pearson 写道:
>>>> Currently the rdma_rxe driver does not receive mcast packets at all.
>>>>
>>>> Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister
>>>> the IP mcast address. This is required for mcast traffic to reach the
>>>> rxe driver when coming from an external source.
>>>>
>>>> Fixes: 8700e3e7c485 ("Soft RoCE driver")
>>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
>>>> ---
>>>>    drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++-----
>>>>    drivers/infiniband/sw/rxe/rxe_net.c   |   2 +-
>>>>    drivers/infiniband/sw/rxe/rxe_net.h   |   1 +
>>>>    drivers/infiniband/sw/rxe/rxe_verbs.h |   1 +
>>>>    4 files changed, 102 insertions(+), 21 deletions(-)
>>>>
>>>> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c
>>>> b/drivers/infiniband/sw/rxe/rxe_mcast.c
>>>> index 86cc2e18a7fd..54735d07cee5 100644
>>>> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
>>>> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
>>>> @@ -19,38 +19,116 @@
>>>>     * mcast packets in the rxe receive path.
>>>>     */
>>>>    +#include <linux/igmp.h>
>>>> +
>>>>    #include "rxe.h"
>>>>    -/**
>>>> - * rxe_mcast_add - add multicast address to rxe device
>>>> - * @rxe: rxe device object
>>>> - * @mgid: multicast address as a gid
>>>> - *
>>>> - * Returns 0 on success else an error
>>>> - */
>>>> -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
>>>> +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid)
>>>>    {
>>>> +    struct in6_addr *addr6 = (struct in6_addr *)mgid;
>>>> +    struct sock *sk = recv_sockets.sk6->sk;
>>>>        unsigned char ll_addr[ETH_ALEN];
>>>> +    int err;
>>>> +
>>>> +    spin_lock_bh(&sk->sk_lock.slock);
>>>> +    rtnl_lock();
>>>> +    err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6);
>>
>>
>> Normally sk_lock is used. Not sure if spin_lock_bh is correct or not.
> 
> ./net/ipv6/addrconf.c-2915-     lock_sock(sk);
> ./net/ipv6/addrconf.c-2916-     if (join)
> ./net/ipv6/addrconf.c:2917:             ret = ipv6_sock_mc_join(sk,
> ifindex, addr);
> ./net/ipv6/addrconf.c-2918-     else
> ./net/ipv6/addrconf.c-2919-             ret = ipv6_sock_mc_drop(sk,
> ifindex, addr);
> ./net/ipv6/addrconf.c-2920-     release_sock(sk);
> 
> Should be lock_sock?

It works as well as spin_lock_bh() in preventing the RCU splat and
looks like the preferred way. I'll make this change.

Bob
> 
>>
>> Please Jason or experts from netdev comment on this.
>>
>> Thanks,
>>
>> Zhu Yanjun
>>
>>
>>>> +    rtnl_unlock();
>>>> +    spin_unlock_bh(&sk->sk_lock.slock);
>>>> +    if (err && err != -EADDRINUSE)
>>>> +        goto err_out;
>>>>          ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
>>>> +    err = dev_mc_add(rxe->ndev, ll_addr);
>>>> +    if (err)
>>>> +        goto err_drop;
>>>> +
>>>> +    return 0;
>>>>    -    return dev_mc_add(rxe->ndev, ll_addr);
>>>> +err_drop:
>>>> +    spin_lock_bh(&sk->sk_lock.slock);
>>>> +    rtnl_lock();
>>>> +    ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6);
>>>> +    rtnl_unlock();
>>>> +    spin_unlock_bh(&sk->sk_lock.slock);
>>>> +err_out:
>>>> +    return err;
>>>>    }
>>>>    -/**
>>>> - * rxe_mcast_del - delete multicast address from rxe device
>>>> - * @rxe: rxe device object
>>>> - * @mgid: multicast address as a gid
>>>> - *
>>>> - * Returns 0 on success else an error
>>>> - */
>>>> -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid)
>>>> +static int rxe_mcast_add(struct rxe_mcg *mcg)
>>>>    {
>>>> +    struct rxe_dev *rxe = mcg->rxe;
>>>> +    union ib_gid *mgid = &mcg->mgid;
>>>>        unsigned char ll_addr[ETH_ALEN];
>>>> +    struct ip_mreqn imr = {};
>>>> +    int err;
>>>> +
>>>> +    if (mcg->is_ipv6)
>>>> +        return rxe_mcast_add6(rxe, mgid);
>>>> +
>>>> +    imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
>>>> +    imr.imr_ifindex = rxe->ndev->ifindex;
>>>> +    rtnl_lock();
>>>> +    err = ip_mc_join_group(recv_sockets.sk4->sk, &imr);
>>>> +    rtnl_unlock();
>>>> +    if (err && err != -EADDRINUSE)
>>>> +        goto err_out;
>>>> +
>>>> +    ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
>>>> +    err = dev_mc_add(rxe->ndev, ll_addr);
>>>> +    if (err)
>>>> +        goto err_leave;
>>>> +
>>>> +    return 0;
>>>> +
>>>> +err_leave:
>>>> +    rtnl_lock();
>>>> +    ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
>>>> +    rtnl_unlock();
>>>> +err_out:
>>>> +    return err;
>>>> +}
>>>> +
>>>> +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid)
>>>> +{
>>>> +    struct sock *sk = recv_sockets.sk6->sk;
>>>> +    unsigned char ll_addr[ETH_ALEN];
>>>> +    int err, err2;
>>>>          ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
>>>> +    err = dev_mc_del(rxe->ndev, ll_addr);
>>>> +
>>>> +    spin_lock_bh(&sk->sk_lock.slock);
>>>> +    rtnl_lock();
>>>> +    err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex,
>>>> +            (struct in6_addr *)mgid);
>>>> +    rtnl_unlock();
>>>> +    spin_unlock_bh(&sk->sk_lock.slock);
>>>> +
>>>> +    return err ?: err2;
>>>> +}
>>>> +
>>>> +static int rxe_mcast_del(struct rxe_mcg *mcg)
>>>> +{
>>>> +    struct rxe_dev *rxe = mcg->rxe;
>>>> +    union ib_gid *mgid = &mcg->mgid;
>>>> +    unsigned char ll_addr[ETH_ALEN];
>>>> +    struct ip_mreqn imr = {};
>>>> +    int err, err2;
>>>> +
>>>> +    if (mcg->is_ipv6)
>>>> +        return rxe_mcast_del6(rxe, mgid);
>>>> +
>>>> +    imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
>>>> +    imr.imr_ifindex = rxe->ndev->ifindex;
>>>> +    ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
>>>> +    err = dev_mc_del(rxe->ndev, ll_addr);
>>>> +
>>>> +    rtnl_lock();
>>>> +    err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
>>>> +    rtnl_unlock();
>>>>    -    return dev_mc_del(rxe->ndev, ll_addr);
>>>> +    return err ?: err2;
>>>>    }
>>>>      /**
>>>> @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe,
>>>> union ib_gid *mgid,
>>>>    {
>>>>        kref_init(&mcg->ref_cnt);
>>>>        memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid));
>>>> +    mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid);
>>>>        INIT_LIST_HEAD(&mcg->qp_list);
>>>>        mcg->rxe = rxe;
>>>>    @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct
>>>> rxe_dev *rxe, union ib_gid *mgid)
>>>>        spin_unlock_bh(&rxe->mcg_lock);
>>>>          /* add mcast address outside of lock */
>>>> -    err = rxe_mcast_add(rxe, mgid);
>>>> +    err = rxe_mcast_add(mcg);
>>>>        if (!err)
>>>>            return mcg;
>>>>    @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
>>>>    static void rxe_destroy_mcg(struct rxe_mcg *mcg)
>>>>    {
>>>>        /* delete mcast address outside of lock */
>>>> -    rxe_mcast_del(mcg->rxe, &mcg->mgid);
>>>> +    rxe_mcast_del(mcg);
>>>>          spin_lock_bh(&mcg->rxe->mcg_lock);
>>>>        __rxe_destroy_mcg(mcg);
>>>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c
>>>> b/drivers/infiniband/sw/rxe/rxe_net.c
>>>> index 58c3f3759bf0..b481f8da2002 100644
>>>> --- a/drivers/infiniband/sw/rxe/rxe_net.c
>>>> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
>>>> @@ -18,7 +18,7 @@
>>>>    #include "rxe_net.h"
>>>>    #include "rxe_loc.h"
>>>>    -static struct rxe_recv_sockets recv_sockets;
>>>> +struct rxe_recv_sockets recv_sockets;
>>>>      static struct dst_entry *rxe_find_route4(struct rxe_qp *qp,
>>>>                         struct net_device *ndev,
>>>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.h
>>>> b/drivers/infiniband/sw/rxe/rxe_net.h
>>>> index 45d80d00f86b..89cee7d5340f 100644
>>>> --- a/drivers/infiniband/sw/rxe/rxe_net.h
>>>> +++ b/drivers/infiniband/sw/rxe/rxe_net.h
>>>> @@ -15,6 +15,7 @@ struct rxe_recv_sockets {
>>>>        struct socket *sk4;
>>>>        struct socket *sk6;
>>>>    };
>>>> +extern struct rxe_recv_sockets recv_sockets;
>>>>      int rxe_net_add(const char *ibdev_name, struct net_device *ndev);
>>>>    diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h
>>>> b/drivers/infiniband/sw/rxe/rxe_verbs.h
>>>> index ccb9d19ffe8a..7be9e6232dd9 100644
>>>> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
>>>> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
>>>> @@ -352,6 +352,7 @@ struct rxe_mcg {
>>>>        atomic_t        qp_num;
>>>>        u32            qkey;
>>>>        u16            pkey;
>>>> +    bool            is_ipv6;
>>>>    };
>>>>      struct rxe_mca {
>>
Greg Sword Dec. 8, 2023, 1:24 a.m. UTC | #5
On Fri, Dec 8, 2023 at 3:07 AM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>
>
>
> On 12/6/23 19:47, Rain River wrote:
> > On Tue, Dec 5, 2023 at 6:30 PM Zhu Yanjun <yanjun.zhu@linux.dev> wrote:
> >>
> >>
> >> 在 2023/12/5 13:55, Zhu Yanjun 写道:
> >>> Add David S. Miller and  David Ahern.
> >>>
> >>> They are the maintainers in netdev and very familiar with mcast.
> >>>
> >>> Zhu Yanjun
> >>>
> >>> 在 2023/12/5 8:26, Bob Pearson 写道:
> >>>> Currently the rdma_rxe driver does not receive mcast packets at all.
> >>>>
> >>>> Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister
> >>>> the IP mcast address. This is required for mcast traffic to reach the
> >>>> rxe driver when coming from an external source.
> >>>>
> >>>> Fixes: 8700e3e7c485 ("Soft RoCE driver")
> >>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> >>>> ---
> >>>>    drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++-----
> >>>>    drivers/infiniband/sw/rxe/rxe_net.c   |   2 +-
> >>>>    drivers/infiniband/sw/rxe/rxe_net.h   |   1 +
> >>>>    drivers/infiniband/sw/rxe/rxe_verbs.h |   1 +
> >>>>    4 files changed, 102 insertions(+), 21 deletions(-)
> >>>>
> >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c
> >>>> b/drivers/infiniband/sw/rxe/rxe_mcast.c
> >>>> index 86cc2e18a7fd..54735d07cee5 100644
> >>>> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
> >>>> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
> >>>> @@ -19,38 +19,116 @@
> >>>>     * mcast packets in the rxe receive path.
> >>>>     */
> >>>>    +#include <linux/igmp.h>
> >>>> +
> >>>>    #include "rxe.h"
> >>>>    -/**
> >>>> - * rxe_mcast_add - add multicast address to rxe device
> >>>> - * @rxe: rxe device object
> >>>> - * @mgid: multicast address as a gid
> >>>> - *
> >>>> - * Returns 0 on success else an error
> >>>> - */
> >>>> -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
> >>>> +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid)
> >>>>    {
> >>>> +    struct in6_addr *addr6 = (struct in6_addr *)mgid;
> >>>> +    struct sock *sk = recv_sockets.sk6->sk;
> >>>>        unsigned char ll_addr[ETH_ALEN];
> >>>> +    int err;
> >>>> +
> >>>> +    spin_lock_bh(&sk->sk_lock.slock);
> >>>> +    rtnl_lock();
> >>>> +    err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6);
> >>
> >>
> >> Normally sk_lock is used. Not sure if spin_lock_bh is correct or not.
> >
> > ./net/ipv6/addrconf.c-2915-     lock_sock(sk);
> > ./net/ipv6/addrconf.c-2916-     if (join)
> > ./net/ipv6/addrconf.c:2917:             ret = ipv6_sock_mc_join(sk,
> > ifindex, addr);
> > ./net/ipv6/addrconf.c-2918-     else
> > ./net/ipv6/addrconf.c-2919-             ret = ipv6_sock_mc_drop(sk,
> > ifindex, addr);
> > ./net/ipv6/addrconf.c-2920-     release_sock(sk);
> >
> > Should be lock_sock?
>
> It works as well as spin_lock_bh() in preventing the RCU splat and
> looks like the preferred way. I'll make this change.

This is the implementation of lock_sock. lock_sock has not only spin_lock_bh,
but also other protections for sk.
You should use lock_sock, instead of spin_lock_bh.

void lock_sock_nested(struct sock *sk, int subclass)
{
        /* The sk_lock has mutex_lock() semantics here. */
        mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);

        might_sleep();
        spin_lock_bh(&sk->sk_lock.slock);
        if (sock_owned_by_user_nocheck(sk))
                __lock_sock(sk);
        sk->sk_lock.owned = 1;
        spin_unlock_bh(&sk->sk_lock.slock);
}
EXPORT_SYMBOL(lock_sock_nested);

>
> Bob
> >
> >>
> >> Please Jason or experts from netdev comment on this.
> >>
> >> Thanks,
> >>
> >> Zhu Yanjun
> >>
> >>
> >>>> +    rtnl_unlock();
> >>>> +    spin_unlock_bh(&sk->sk_lock.slock);
> >>>> +    if (err && err != -EADDRINUSE)
> >>>> +        goto err_out;
> >>>>          ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
> >>>> +    err = dev_mc_add(rxe->ndev, ll_addr);
> >>>> +    if (err)
> >>>> +        goto err_drop;
> >>>> +
> >>>> +    return 0;
> >>>>    -    return dev_mc_add(rxe->ndev, ll_addr);
> >>>> +err_drop:
> >>>> +    spin_lock_bh(&sk->sk_lock.slock);
> >>>> +    rtnl_lock();
> >>>> +    ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6);
> >>>> +    rtnl_unlock();
> >>>> +    spin_unlock_bh(&sk->sk_lock.slock);
> >>>> +err_out:
> >>>> +    return err;
> >>>>    }
> >>>>    -/**
> >>>> - * rxe_mcast_del - delete multicast address from rxe device
> >>>> - * @rxe: rxe device object
> >>>> - * @mgid: multicast address as a gid
> >>>> - *
> >>>> - * Returns 0 on success else an error
> >>>> - */
> >>>> -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid)
> >>>> +static int rxe_mcast_add(struct rxe_mcg *mcg)
> >>>>    {
> >>>> +    struct rxe_dev *rxe = mcg->rxe;
> >>>> +    union ib_gid *mgid = &mcg->mgid;
> >>>>        unsigned char ll_addr[ETH_ALEN];
> >>>> +    struct ip_mreqn imr = {};
> >>>> +    int err;
> >>>> +
> >>>> +    if (mcg->is_ipv6)
> >>>> +        return rxe_mcast_add6(rxe, mgid);
> >>>> +
> >>>> +    imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
> >>>> +    imr.imr_ifindex = rxe->ndev->ifindex;
> >>>> +    rtnl_lock();
> >>>> +    err = ip_mc_join_group(recv_sockets.sk4->sk, &imr);
> >>>> +    rtnl_unlock();
> >>>> +    if (err && err != -EADDRINUSE)
> >>>> +        goto err_out;
> >>>> +
> >>>> +    ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
> >>>> +    err = dev_mc_add(rxe->ndev, ll_addr);
> >>>> +    if (err)
> >>>> +        goto err_leave;
> >>>> +
> >>>> +    return 0;
> >>>> +
> >>>> +err_leave:
> >>>> +    rtnl_lock();
> >>>> +    ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
> >>>> +    rtnl_unlock();
> >>>> +err_out:
> >>>> +    return err;
> >>>> +}
> >>>> +
> >>>> +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid)
> >>>> +{
> >>>> +    struct sock *sk = recv_sockets.sk6->sk;
> >>>> +    unsigned char ll_addr[ETH_ALEN];
> >>>> +    int err, err2;
> >>>>          ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
> >>>> +    err = dev_mc_del(rxe->ndev, ll_addr);
> >>>> +
> >>>> +    spin_lock_bh(&sk->sk_lock.slock);
> >>>> +    rtnl_lock();
> >>>> +    err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex,
> >>>> +            (struct in6_addr *)mgid);
> >>>> +    rtnl_unlock();
> >>>> +    spin_unlock_bh(&sk->sk_lock.slock);
> >>>> +
> >>>> +    return err ?: err2;
> >>>> +}
> >>>> +
> >>>> +static int rxe_mcast_del(struct rxe_mcg *mcg)
> >>>> +{
> >>>> +    struct rxe_dev *rxe = mcg->rxe;
> >>>> +    union ib_gid *mgid = &mcg->mgid;
> >>>> +    unsigned char ll_addr[ETH_ALEN];
> >>>> +    struct ip_mreqn imr = {};
> >>>> +    int err, err2;
> >>>> +
> >>>> +    if (mcg->is_ipv6)
> >>>> +        return rxe_mcast_del6(rxe, mgid);
> >>>> +
> >>>> +    imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
> >>>> +    imr.imr_ifindex = rxe->ndev->ifindex;
> >>>> +    ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
> >>>> +    err = dev_mc_del(rxe->ndev, ll_addr);
> >>>> +
> >>>> +    rtnl_lock();
> >>>> +    err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
> >>>> +    rtnl_unlock();
> >>>>    -    return dev_mc_del(rxe->ndev, ll_addr);
> >>>> +    return err ?: err2;
> >>>>    }
> >>>>      /**
> >>>> @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe,
> >>>> union ib_gid *mgid,
> >>>>    {
> >>>>        kref_init(&mcg->ref_cnt);
> >>>>        memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid));
> >>>> +    mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid);
> >>>>        INIT_LIST_HEAD(&mcg->qp_list);
> >>>>        mcg->rxe = rxe;
> >>>>    @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct
> >>>> rxe_dev *rxe, union ib_gid *mgid)
> >>>>        spin_unlock_bh(&rxe->mcg_lock);
> >>>>          /* add mcast address outside of lock */
> >>>> -    err = rxe_mcast_add(rxe, mgid);
> >>>> +    err = rxe_mcast_add(mcg);
> >>>>        if (!err)
> >>>>            return mcg;
> >>>>    @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
> >>>>    static void rxe_destroy_mcg(struct rxe_mcg *mcg)
> >>>>    {
> >>>>        /* delete mcast address outside of lock */
> >>>> -    rxe_mcast_del(mcg->rxe, &mcg->mgid);
> >>>> +    rxe_mcast_del(mcg);
> >>>>          spin_lock_bh(&mcg->rxe->mcg_lock);
> >>>>        __rxe_destroy_mcg(mcg);
> >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c
> >>>> b/drivers/infiniband/sw/rxe/rxe_net.c
> >>>> index 58c3f3759bf0..b481f8da2002 100644
> >>>> --- a/drivers/infiniband/sw/rxe/rxe_net.c
> >>>> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
> >>>> @@ -18,7 +18,7 @@
> >>>>    #include "rxe_net.h"
> >>>>    #include "rxe_loc.h"
> >>>>    -static struct rxe_recv_sockets recv_sockets;
> >>>> +struct rxe_recv_sockets recv_sockets;
> >>>>      static struct dst_entry *rxe_find_route4(struct rxe_qp *qp,
> >>>>                         struct net_device *ndev,
> >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.h
> >>>> b/drivers/infiniband/sw/rxe/rxe_net.h
> >>>> index 45d80d00f86b..89cee7d5340f 100644
> >>>> --- a/drivers/infiniband/sw/rxe/rxe_net.h
> >>>> +++ b/drivers/infiniband/sw/rxe/rxe_net.h
> >>>> @@ -15,6 +15,7 @@ struct rxe_recv_sockets {
> >>>>        struct socket *sk4;
> >>>>        struct socket *sk6;
> >>>>    };
> >>>> +extern struct rxe_recv_sockets recv_sockets;
> >>>>      int rxe_net_add(const char *ibdev_name, struct net_device *ndev);
> >>>>    diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h
> >>>> b/drivers/infiniband/sw/rxe/rxe_verbs.h
> >>>> index ccb9d19ffe8a..7be9e6232dd9 100644
> >>>> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
> >>>> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
> >>>> @@ -352,6 +352,7 @@ struct rxe_mcg {
> >>>>        atomic_t        qp_num;
> >>>>        u32            qkey;
> >>>>        u16            pkey;
> >>>> +    bool            is_ipv6;
> >>>>    };
> >>>>      struct rxe_mca {
> >>
>
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
index 86cc2e18a7fd..54735d07cee5 100644
--- a/drivers/infiniband/sw/rxe/rxe_mcast.c
+++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
@@ -19,38 +19,116 @@ 
  * mcast packets in the rxe receive path.
  */
 
+#include <linux/igmp.h>
+
 #include "rxe.h"
 
-/**
- * rxe_mcast_add - add multicast address to rxe device
- * @rxe: rxe device object
- * @mgid: multicast address as a gid
- *
- * Returns 0 on success else an error
- */
-static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
+static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid)
 {
+	struct in6_addr *addr6 = (struct in6_addr *)mgid;
+	struct sock *sk = recv_sockets.sk6->sk;
 	unsigned char ll_addr[ETH_ALEN];
+	int err;
+
+	spin_lock_bh(&sk->sk_lock.slock);
+	rtnl_lock();
+	err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6);
+	rtnl_unlock();
+	spin_unlock_bh(&sk->sk_lock.slock);
+	if (err && err != -EADDRINUSE)
+		goto err_out;
 
 	ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
+	err = dev_mc_add(rxe->ndev, ll_addr);
+	if (err)
+		goto err_drop;
+
+	return 0;
 
-	return dev_mc_add(rxe->ndev, ll_addr);
+err_drop:
+	spin_lock_bh(&sk->sk_lock.slock);
+	rtnl_lock();
+	ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6);
+	rtnl_unlock();
+	spin_unlock_bh(&sk->sk_lock.slock);
+err_out:
+	return err;
 }
 
-/**
- * rxe_mcast_del - delete multicast address from rxe device
- * @rxe: rxe device object
- * @mgid: multicast address as a gid
- *
- * Returns 0 on success else an error
- */
-static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid)
+static int rxe_mcast_add(struct rxe_mcg *mcg)
 {
+	struct rxe_dev *rxe = mcg->rxe;
+	union ib_gid *mgid = &mcg->mgid;
 	unsigned char ll_addr[ETH_ALEN];
+	struct ip_mreqn imr = {};
+	int err;
+
+	if (mcg->is_ipv6)
+		return rxe_mcast_add6(rxe, mgid);
+
+	imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
+	imr.imr_ifindex = rxe->ndev->ifindex;
+	rtnl_lock();
+	err = ip_mc_join_group(recv_sockets.sk4->sk, &imr);
+	rtnl_unlock();
+	if (err && err != -EADDRINUSE)
+		goto err_out;
+
+	ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
+	err = dev_mc_add(rxe->ndev, ll_addr);
+	if (err)
+		goto err_leave;
+
+	return 0;
+
+err_leave:
+	rtnl_lock();
+	ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
+	rtnl_unlock();
+err_out:
+	return err;
+}
+
+static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid)
+{
+	struct sock *sk = recv_sockets.sk6->sk;
+	unsigned char ll_addr[ETH_ALEN];
+	int err, err2;
 
 	ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
+	err = dev_mc_del(rxe->ndev, ll_addr);
+
+	spin_lock_bh(&sk->sk_lock.slock);
+	rtnl_lock();
+	err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex,
+			(struct in6_addr *)mgid);
+	rtnl_unlock();
+	spin_unlock_bh(&sk->sk_lock.slock);
+
+	return err ?: err2;
+}
+
+static int rxe_mcast_del(struct rxe_mcg *mcg)
+{
+	struct rxe_dev *rxe = mcg->rxe;
+	union ib_gid *mgid = &mcg->mgid;
+	unsigned char ll_addr[ETH_ALEN];
+	struct ip_mreqn imr = {};
+	int err, err2;
+
+	if (mcg->is_ipv6)
+		return rxe_mcast_del6(rxe, mgid);
+
+	imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
+	imr.imr_ifindex = rxe->ndev->ifindex;
+	ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
+	err = dev_mc_del(rxe->ndev, ll_addr);
+
+	rtnl_lock();
+	err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
+	rtnl_unlock();
 
-	return dev_mc_del(rxe->ndev, ll_addr);
+	return err ?: err2;
 }
 
 /**
@@ -164,6 +242,7 @@  static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
 {
 	kref_init(&mcg->ref_cnt);
 	memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid));
+	mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid);
 	INIT_LIST_HEAD(&mcg->qp_list);
 	mcg->rxe = rxe;
 
@@ -225,7 +304,7 @@  static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
 	spin_unlock_bh(&rxe->mcg_lock);
 
 	/* add mcast address outside of lock */
-	err = rxe_mcast_add(rxe, mgid);
+	err = rxe_mcast_add(mcg);
 	if (!err)
 		return mcg;
 
@@ -273,7 +352,7 @@  static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
 static void rxe_destroy_mcg(struct rxe_mcg *mcg)
 {
 	/* delete mcast address outside of lock */
-	rxe_mcast_del(mcg->rxe, &mcg->mgid);
+	rxe_mcast_del(mcg);
 
 	spin_lock_bh(&mcg->rxe->mcg_lock);
 	__rxe_destroy_mcg(mcg);
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 58c3f3759bf0..b481f8da2002 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -18,7 +18,7 @@ 
 #include "rxe_net.h"
 #include "rxe_loc.h"
 
-static struct rxe_recv_sockets recv_sockets;
+struct rxe_recv_sockets recv_sockets;
 
 static struct dst_entry *rxe_find_route4(struct rxe_qp *qp,
 					 struct net_device *ndev,
diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h
index 45d80d00f86b..89cee7d5340f 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.h
+++ b/drivers/infiniband/sw/rxe/rxe_net.h
@@ -15,6 +15,7 @@  struct rxe_recv_sockets {
 	struct socket *sk4;
 	struct socket *sk6;
 };
+extern struct rxe_recv_sockets recv_sockets;
 
 int rxe_net_add(const char *ibdev_name, struct net_device *ndev);
 
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index ccb9d19ffe8a..7be9e6232dd9 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -352,6 +352,7 @@  struct rxe_mcg {
 	atomic_t		qp_num;
 	u32			qkey;
 	u16			pkey;
+	bool			is_ipv6;
 };
 
 struct rxe_mca {