diff mbox series

[RESEND,v2] RDMA/siw: Remove direct link to net_device

Message ID 20241212151848.564872-1-bmt@zurich.ibm.com (mailing list archive)
State Not Applicable
Headers show
Series [RESEND,v2] RDMA/siw: Remove direct link to net_device | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Bernard Metzler Dec. 12, 2024, 3:18 p.m. UTC
Do not manage a per device direct link to net_device. Rely
on associated ib_devices net_device management, not doubling
the effort locally. A badly managed local link to net_device
was causing a 'KASAN: slab-use-after-free' exception during
siw_query_port() call.

Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface")
Reported-by: syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com
Link: https://syzkaller.appspot.com/bug?extid=4b87489410b4efd181bf
Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
---
 drivers/infiniband/sw/siw/siw.h       |  7 +++---
 drivers/infiniband/sw/siw/siw_cm.c    | 31 +++++++++++++++++++-----
 drivers/infiniband/sw/siw/siw_main.c  | 15 +-----------
 drivers/infiniband/sw/siw/siw_verbs.c | 35 ++++++++++++++++++---------
 4 files changed, 53 insertions(+), 35 deletions(-)

Comments

Zhu Yanjun Dec. 14, 2024, 12:37 p.m. UTC | #1
在 2024/12/12 16:18, Bernard Metzler 写道:
> Do not manage a per device direct link to net_device. Rely
> on associated ib_devices net_device management, not doubling
> the effort locally. A badly managed local link to net_device
> was causing a 'KASAN: slab-use-after-free' exception during
> siw_query_port() call.
> 
> Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface")
> Reported-by: syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com
> Link: https://syzkaller.appspot.com/bug?extid=4b87489410b4efd181bf
> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
> ---
>   drivers/infiniband/sw/siw/siw.h       |  7 +++---
>   drivers/infiniband/sw/siw/siw_cm.c    | 31 +++++++++++++++++++-----
>   drivers/infiniband/sw/siw/siw_main.c  | 15 +-----------
>   drivers/infiniband/sw/siw/siw_verbs.c | 35 ++++++++++++++++++---------
>   4 files changed, 53 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
> index 86d4d6a2170e..ea5eee50dc39 100644
> --- a/drivers/infiniband/sw/siw/siw.h
> +++ b/drivers/infiniband/sw/siw/siw.h
> @@ -46,6 +46,9 @@
>    */
>   #define SIW_IRQ_MAXBURST_SQ_ACTIVE 4
>   
> +/* There is always only a port 1 per siw device */
> +#define SIW_PORT 1
> +
>   struct siw_dev_cap {
>   	int max_qp;
>   	int max_qp_wr;
> @@ -69,16 +72,12 @@ struct siw_pd {
>   
>   struct siw_device {
>   	struct ib_device base_dev;
> -	struct net_device *netdev;
>   	struct siw_dev_cap attrs;
>   
>   	u32 vendor_part_id;
>   	int numa_node;
>   	char raw_gid[ETH_ALEN];
>   
> -	/* physical port state (only one port per device) */
> -	enum ib_port_state state;
> -
>   	spinlock_t lock;
>   
>   	struct xarray qp_xa;
> diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
> index 86323918a570..b157bd01e70b 100644
> --- a/drivers/infiniband/sw/siw/siw_cm.c
> +++ b/drivers/infiniband/sw/siw/siw_cm.c
> @@ -1759,6 +1759,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
>   {
>   	struct socket *s;
>   	struct siw_cep *cep = NULL;
> +	struct net_device *ndev = NULL;
>   	struct siw_device *sdev = to_siw_dev(id->device);
>   	int addr_family = id->local_addr.ss_family;
>   	int rv = 0;
> @@ -1779,9 +1780,15 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
>   		struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
>   
>   		/* For wildcard addr, limit binding to current device only */
> -		if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
> -			s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
> -
> +		if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) {
> +			ndev = ib_device_get_netdev(id->device, SIW_PORT);
> +			if (ndev) {
> +				s->sk->sk_bound_dev_if = ndev->ifindex;
> +			} else {
> +				rv = -ENODEV;
> +				goto error;
> +			}
> +		}
>   		rv = s->ops->bind(s, (struct sockaddr *)laddr,
>   				  sizeof(struct sockaddr_in));
>   	} else {
> @@ -1797,9 +1804,15 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
>   		}
>   
>   		/* For wildcard addr, limit binding to current device only */
> -		if (ipv6_addr_any(&laddr->sin6_addr))
> -			s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
> -
> +		if (ipv6_addr_any(&laddr->sin6_addr)) {
> +			ndev = ib_device_get_netdev(id->device, SIW_PORT);
> +			if (ndev) {
> +				s->sk->sk_bound_dev_if = ndev->ifindex;
> +			} else {
> +				rv = -ENODEV;
> +				goto error;
> +			}
> +		}
>   		rv = s->ops->bind(s, (struct sockaddr *)laddr,
>   				  sizeof(struct sockaddr_in6));
>   	}
> @@ -1861,6 +1874,9 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
>   	list_add_tail(&cep->listenq, (struct list_head *)id->provider_data);
>   	cep->state = SIW_EPSTATE_LISTENING;
>   
> +	if (ndev)
> +		dev_put(ndev);
> +

<...>

>   	siw_dbg(id->device, "Listen at laddr %pISp\n", &id->local_addr);
>   
>   	return 0;
> @@ -1880,6 +1896,9 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
>   	}
>   	sock_release(s);
>   
> +	if (ndev)
> +		dev_put(ndev);
> +

dev_put will invoke netdev_put. In netdev_put, dev is checked.
Thus, no need to check ndev before dev_put function?

static inline void netdev_put(struct net_device *dev,
                   netdevice_tracker *tracker)
{
     if (dev) {
         netdev_tracker_free(dev, tracker);
         __dev_put(dev);
     }
}

>   	return rv;
>   }
>   
> diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
> index 17abef48abcd..14d3103aee6f 100644
> --- a/drivers/infiniband/sw/siw/siw_main.c
> +++ b/drivers/infiniband/sw/siw/siw_main.c
> @@ -287,7 +287,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
>   		return NULL;
>   
>   	base_dev = &sdev->base_dev;
> -	sdev->netdev = netdev;
>   
>   	if (netdev->addr_len) {
>   		memcpy(sdev->raw_gid, netdev->dev_addr,
> @@ -381,12 +380,10 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
>   
>   	switch (event) {
>   	case NETDEV_UP:
> -		sdev->state = IB_PORT_ACTIVE;
>   		siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE);
>   		break;
>   
>   	case NETDEV_DOWN:
> -		sdev->state = IB_PORT_DOWN;
>   		siw_port_event(sdev, 1, IB_EVENT_PORT_ERR);
>   		break;
>   
> @@ -407,12 +404,8 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
>   		siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE);
>   		break;
>   	/*
> -	 * Todo: Below netdev events are currently not handled.
> +	 * All other events are not handled
>   	 */
> -	case NETDEV_CHANGEMTU:
> -	case NETDEV_CHANGE:
> -		break;
> -
>   	default:
>   		break;
>   	}
> @@ -442,12 +435,6 @@ static int siw_newlink(const char *basedev_name, struct net_device *netdev)
>   	sdev = siw_device_create(netdev);
>   	if (sdev) {
>   		dev_dbg(&netdev->dev, "siw: new device\n");
> -
> -		if (netif_running(netdev) && netif_carrier_ok(netdev))
> -			sdev->state = IB_PORT_ACTIVE;
> -		else
> -			sdev->state = IB_PORT_DOWN;
> -
>   		ib_mark_name_assigned_by_user(&sdev->base_dev);
>   		rv = siw_device_register(sdev, basedev_name);
>   		if (rv)
> diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
> index 986666c19378..7ca0297d68a4 100644
> --- a/drivers/infiniband/sw/siw/siw_verbs.c
> +++ b/drivers/infiniband/sw/siw/siw_verbs.c
> @@ -171,21 +171,29 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr,
>   int siw_query_port(struct ib_device *base_dev, u32 port,
>   		   struct ib_port_attr *attr)
>   {
> -	struct siw_device *sdev = to_siw_dev(base_dev);
> +	struct net_device *ndev;
>   	int rv;
>   
>   	memset(attr, 0, sizeof(*attr));
>   
>   	rv = ib_get_eth_speed(base_dev, port, &attr->active_speed,
>   			 &attr->active_width);
> +	if (rv)
> +		return rv;
> +
> +	ndev = ib_device_get_netdev(base_dev, SIW_PORT);
> +	if (!ndev)
> +		return -ENODEV;
> +
>   	attr->gid_tbl_len = 1;
>   	attr->max_msg_sz = -1;
> -	attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
> -	attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
> -	attr->phys_state = sdev->state == IB_PORT_ACTIVE ?
> +	attr->max_mtu = ib_mtu_int_to_enum(ndev->max_mtu);
> +	attr->active_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu));
> +	attr->phys_state = (netif_running(ndev) && netif_carrier_ok(ndev)) ?
>   		IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
> +	attr->state = attr->phys_state == IB_PORT_PHYS_STATE_LINK_UP ?
> +		IB_PORT_ACTIVE : IB_PORT_DOWN;
>   	attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
> -	attr->state = sdev->state;
>   	/*
>   	 * All zero
>   	 *
> @@ -199,6 +207,7 @@ int siw_query_port(struct ib_device *base_dev, u32 port,
>   	 * attr->subnet_timeout = 0;
>   	 * attr->init_type_repy = 0;
>   	 */
> +	dev_put(ndev);
>   	return rv;
>   }
>   
> @@ -505,21 +514,24 @@ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr,
>   		 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
>   {
>   	struct siw_qp *qp;
> -	struct siw_device *sdev;
> +	struct net_device *ndev;
>   
> -	if (base_qp && qp_attr && qp_init_attr) {
> +	if (base_qp && qp_attr && qp_init_attr)
>   		qp = to_siw_qp(base_qp);
> -		sdev = to_siw_dev(base_qp->device);
> -	} else {
> +	else
>   		return -EINVAL;
> -	}
> +
> +	ndev = ib_device_get_netdev(base_qp->device, SIW_PORT);
> +	if (!ndev)
> +		return -ENODEV;
> +
>   	qp_attr->qp_state = siw_qp_state_to_ib_qp_state[qp->attrs.state];
>   	qp_attr->cap.max_inline_data = SIW_MAX_INLINE;
>   	qp_attr->cap.max_send_wr = qp->attrs.sq_size;
>   	qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges;
>   	qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
>   	qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges;
> -	qp_attr->path_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
> +	qp_attr->path_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu));
>   	qp_attr->max_rd_atomic = qp->attrs.irq_size;
>   	qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
>   
> @@ -534,6 +546,7 @@ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr,
>   
>   	qp_init_attr->cap = qp_attr->cap;
>   
> +	dev_put(ndev);
>   	return 0;
>   }
>
Zhu Yanjun Dec. 15, 2024, 5:37 p.m. UTC | #2
在 2024/12/14 13:37, Zhu Yanjun 写道:
> 在 2024/12/12 16:18, Bernard Metzler 写道:
>> Do not manage a per device direct link to net_device. Rely
>> on associated ib_devices net_device management, not doubling
>> the effort locally. A badly managed local link to net_device
>> was causing a 'KASAN: slab-use-after-free' exception during
>> siw_query_port() call.
>>
>> Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface")
>> Reported-by: syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com
>> Link: https://syzkaller.appspot.com/bug?extid=4b87489410b4efd181bf
>> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
>> ---
>>   drivers/infiniband/sw/siw/siw.h       |  7 +++---
>>   drivers/infiniband/sw/siw/siw_cm.c    | 31 +++++++++++++++++++-----
>>   drivers/infiniband/sw/siw/siw_main.c  | 15 +-----------
>>   drivers/infiniband/sw/siw/siw_verbs.c | 35 ++++++++++++++++++---------
>>   4 files changed, 53 insertions(+), 35 deletions(-)
>>
>> diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/ 
>> siw/siw.h
>> index 86d4d6a2170e..ea5eee50dc39 100644
>> --- a/drivers/infiniband/sw/siw/siw.h
>> +++ b/drivers/infiniband/sw/siw/siw.h
>> @@ -46,6 +46,9 @@
>>    */
>>   #define SIW_IRQ_MAXBURST_SQ_ACTIVE 4
>> +/* There is always only a port 1 per siw device */
>> +#define SIW_PORT 1
>> +
>>   struct siw_dev_cap {
>>       int max_qp;
>>       int max_qp_wr;
>> @@ -69,16 +72,12 @@ struct siw_pd {
>>   struct siw_device {
>>       struct ib_device base_dev;
>> -    struct net_device *netdev;
>>       struct siw_dev_cap attrs;
>>       u32 vendor_part_id;
>>       int numa_node;
>>       char raw_gid[ETH_ALEN];
>> -    /* physical port state (only one port per device) */
>> -    enum ib_port_state state;
>> -
>>       spinlock_t lock;
>>       struct xarray qp_xa;
>> diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/ 
>> sw/siw/siw_cm.c
>> index 86323918a570..b157bd01e70b 100644
>> --- a/drivers/infiniband/sw/siw/siw_cm.c
>> +++ b/drivers/infiniband/sw/siw/siw_cm.c
>> @@ -1759,6 +1759,7 @@ int siw_create_listen(struct iw_cm_id *id, int 
>> backlog)
>>   {
>>       struct socket *s;
>>       struct siw_cep *cep = NULL;
>> +    struct net_device *ndev = NULL;
>>       struct siw_device *sdev = to_siw_dev(id->device);
>>       int addr_family = id->local_addr.ss_family;
>>       int rv = 0;
>> @@ -1779,9 +1780,15 @@ int siw_create_listen(struct iw_cm_id *id, int 
>> backlog)
>>           struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
>>           /* For wildcard addr, limit binding to current device only */
>> -        if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
>> -            s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
>> -
>> +        if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) {
>> +            ndev = ib_device_get_netdev(id->device, SIW_PORT);
>> +            if (ndev) {
>> +                s->sk->sk_bound_dev_if = ndev->ifindex;
>> +            } else {
>> +                rv = -ENODEV;
>> +                goto error;
>> +            }
>> +        }
>>           rv = s->ops->bind(s, (struct sockaddr *)laddr,
>>                     sizeof(struct sockaddr_in));
>>       } else {
>> @@ -1797,9 +1804,15 @@ int siw_create_listen(struct iw_cm_id *id, int 
>> backlog)
>>           }
>>           /* For wildcard addr, limit binding to current device only */
>> -        if (ipv6_addr_any(&laddr->sin6_addr))
>> -            s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
>> -
>> +        if (ipv6_addr_any(&laddr->sin6_addr)) {
>> +            ndev = ib_device_get_netdev(id->device, SIW_PORT);
>> +            if (ndev) {
>> +                s->sk->sk_bound_dev_if = ndev->ifindex;
>> +            } else {
>> +                rv = -ENODEV;
>> +                goto error;
>> +            }
>> +        }
>>           rv = s->ops->bind(s, (struct sockaddr *)laddr,
>>                     sizeof(struct sockaddr_in6));
>>       }
>> @@ -1861,6 +1874,9 @@ int siw_create_listen(struct iw_cm_id *id, int 
>> backlog)
>>       list_add_tail(&cep->listenq, (struct list_head *)id- 
>> >provider_data);
>>       cep->state = SIW_EPSTATE_LISTENING;
>> +    if (ndev)
>> +        dev_put(ndev);
>> +
> 
> <...>
> 
>>       siw_dbg(id->device, "Listen at laddr %pISp\n", &id->local_addr);
>>       return 0;
>> @@ -1880,6 +1896,9 @@ int siw_create_listen(struct iw_cm_id *id, int 
>> backlog)
>>       }
>>       sock_release(s);
>> +    if (ndev)
>> +        dev_put(ndev);
>> +
> 
> dev_put will invoke netdev_put. In netdev_put, dev is checked.
> Thus, no need to check ndev before dev_put function?

Reviewed-by: Zhu Yanjun <yanjun.zhu@linux.dev>

Zhu Yanjun

> 
> static inline void netdev_put(struct net_device *dev,
>                    netdevice_tracker *tracker)
> {
>      if (dev) {
>          netdev_tracker_free(dev, tracker);
>          __dev_put(dev);
>      }
> }
> 
>>       return rv;
>>   }
>> diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/ 
>> infiniband/sw/siw/siw_main.c
>> index 17abef48abcd..14d3103aee6f 100644
>> --- a/drivers/infiniband/sw/siw/siw_main.c
>> +++ b/drivers/infiniband/sw/siw/siw_main.c
>> @@ -287,7 +287,6 @@ static struct siw_device *siw_device_create(struct 
>> net_device *netdev)
>>           return NULL;
>>       base_dev = &sdev->base_dev;
>> -    sdev->netdev = netdev;
>>       if (netdev->addr_len) {
>>           memcpy(sdev->raw_gid, netdev->dev_addr,
>> @@ -381,12 +380,10 @@ static int siw_netdev_event(struct 
>> notifier_block *nb, unsigned long event,
>>       switch (event) {
>>       case NETDEV_UP:
>> -        sdev->state = IB_PORT_ACTIVE;
>>           siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE);
>>           break;
>>       case NETDEV_DOWN:
>> -        sdev->state = IB_PORT_DOWN;
>>           siw_port_event(sdev, 1, IB_EVENT_PORT_ERR);
>>           break;
>> @@ -407,12 +404,8 @@ static int siw_netdev_event(struct notifier_block 
>> *nb, unsigned long event,
>>           siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE);
>>           break;
>>       /*
>> -     * Todo: Below netdev events are currently not handled.
>> +     * All other events are not handled
>>        */
>> -    case NETDEV_CHANGEMTU:
>> -    case NETDEV_CHANGE:
>> -        break;
>> -
>>       default:
>>           break;
>>       }
>> @@ -442,12 +435,6 @@ static int siw_newlink(const char *basedev_name, 
>> struct net_device *netdev)
>>       sdev = siw_device_create(netdev);
>>       if (sdev) {
>>           dev_dbg(&netdev->dev, "siw: new device\n");
>> -
>> -        if (netif_running(netdev) && netif_carrier_ok(netdev))
>> -            sdev->state = IB_PORT_ACTIVE;
>> -        else
>> -            sdev->state = IB_PORT_DOWN;
>> -
>>           ib_mark_name_assigned_by_user(&sdev->base_dev);
>>           rv = siw_device_register(sdev, basedev_name);
>>           if (rv)
>> diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/ 
>> infiniband/sw/siw/siw_verbs.c
>> index 986666c19378..7ca0297d68a4 100644
>> --- a/drivers/infiniband/sw/siw/siw_verbs.c
>> +++ b/drivers/infiniband/sw/siw/siw_verbs.c
>> @@ -171,21 +171,29 @@ int siw_query_device(struct ib_device *base_dev, 
>> struct ib_device_attr *attr,
>>   int siw_query_port(struct ib_device *base_dev, u32 port,
>>              struct ib_port_attr *attr)
>>   {
>> -    struct siw_device *sdev = to_siw_dev(base_dev);
>> +    struct net_device *ndev;
>>       int rv;
>>       memset(attr, 0, sizeof(*attr));
>>       rv = ib_get_eth_speed(base_dev, port, &attr->active_speed,
>>                &attr->active_width);
>> +    if (rv)
>> +        return rv;
>> +
>> +    ndev = ib_device_get_netdev(base_dev, SIW_PORT);
>> +    if (!ndev)
>> +        return -ENODEV;
>> +
>>       attr->gid_tbl_len = 1;
>>       attr->max_msg_sz = -1;
>> -    attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
>> -    attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
>> -    attr->phys_state = sdev->state == IB_PORT_ACTIVE ?
>> +    attr->max_mtu = ib_mtu_int_to_enum(ndev->max_mtu);
>> +    attr->active_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu));
>> +    attr->phys_state = (netif_running(ndev) && netif_carrier_ok(ndev)) ?
>>           IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
>> +    attr->state = attr->phys_state == IB_PORT_PHYS_STATE_LINK_UP ?
>> +        IB_PORT_ACTIVE : IB_PORT_DOWN;
>>       attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
>> -    attr->state = sdev->state;
>>       /*
>>        * All zero
>>        *
>> @@ -199,6 +207,7 @@ int siw_query_port(struct ib_device *base_dev, u32 
>> port,
>>        * attr->subnet_timeout = 0;
>>        * attr->init_type_repy = 0;
>>        */
>> +    dev_put(ndev);
>>       return rv;
>>   }
>> @@ -505,21 +514,24 @@ int siw_query_qp(struct ib_qp *base_qp, struct 
>> ib_qp_attr *qp_attr,
>>            int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
>>   {
>>       struct siw_qp *qp;
>> -    struct siw_device *sdev;
>> +    struct net_device *ndev;
>> -    if (base_qp && qp_attr && qp_init_attr) {
>> +    if (base_qp && qp_attr && qp_init_attr)
>>           qp = to_siw_qp(base_qp);
>> -        sdev = to_siw_dev(base_qp->device);
>> -    } else {
>> +    else
>>           return -EINVAL;
>> -    }
>> +
>> +    ndev = ib_device_get_netdev(base_qp->device, SIW_PORT);
>> +    if (!ndev)
>> +        return -ENODEV;
>> +
>>       qp_attr->qp_state = siw_qp_state_to_ib_qp_state[qp->attrs.state];
>>       qp_attr->cap.max_inline_data = SIW_MAX_INLINE;
>>       qp_attr->cap.max_send_wr = qp->attrs.sq_size;
>>       qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges;
>>       qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
>>       qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges;
>> -    qp_attr->path_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
>> +    qp_attr->path_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu));
>>       qp_attr->max_rd_atomic = qp->attrs.irq_size;
>>       qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
>> @@ -534,6 +546,7 @@ int siw_query_qp(struct ib_qp *base_qp, struct 
>> ib_qp_attr *qp_attr,
>>       qp_init_attr->cap = qp_attr->cap;
>> +    dev_put(ndev);
>>       return 0;
>>   }
>
Leon Romanovsky Dec. 19, 2024, 10:19 a.m. UTC | #3
On Thu, 12 Dec 2024 16:18:48 +0100, Bernard Metzler wrote:
> Do not manage a per device direct link to net_device. Rely
> on associated ib_devices net_device management, not doubling
> the effort locally. A badly managed local link to net_device
> was causing a 'KASAN: slab-use-after-free' exception during
> siw_query_port() call.
> 
> 
> [...]

Applied, thanks!

[1/1] RDMA/siw: Remove direct link to net_device
      https://git.kernel.org/rdma/rdma/c/16b87037b48889

Best regards,
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index 86d4d6a2170e..ea5eee50dc39 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -46,6 +46,9 @@ 
  */
 #define SIW_IRQ_MAXBURST_SQ_ACTIVE 4
 
+/* There is always only a port 1 per siw device */
+#define SIW_PORT 1
+
 struct siw_dev_cap {
 	int max_qp;
 	int max_qp_wr;
@@ -69,16 +72,12 @@  struct siw_pd {
 
 struct siw_device {
 	struct ib_device base_dev;
-	struct net_device *netdev;
 	struct siw_dev_cap attrs;
 
 	u32 vendor_part_id;
 	int numa_node;
 	char raw_gid[ETH_ALEN];
 
-	/* physical port state (only one port per device) */
-	enum ib_port_state state;
-
 	spinlock_t lock;
 
 	struct xarray qp_xa;
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index 86323918a570..b157bd01e70b 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -1759,6 +1759,7 @@  int siw_create_listen(struct iw_cm_id *id, int backlog)
 {
 	struct socket *s;
 	struct siw_cep *cep = NULL;
+	struct net_device *ndev = NULL;
 	struct siw_device *sdev = to_siw_dev(id->device);
 	int addr_family = id->local_addr.ss_family;
 	int rv = 0;
@@ -1779,9 +1780,15 @@  int siw_create_listen(struct iw_cm_id *id, int backlog)
 		struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
 
 		/* For wildcard addr, limit binding to current device only */
-		if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
-			s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
-
+		if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) {
+			ndev = ib_device_get_netdev(id->device, SIW_PORT);
+			if (ndev) {
+				s->sk->sk_bound_dev_if = ndev->ifindex;
+			} else {
+				rv = -ENODEV;
+				goto error;
+			}
+		}
 		rv = s->ops->bind(s, (struct sockaddr *)laddr,
 				  sizeof(struct sockaddr_in));
 	} else {
@@ -1797,9 +1804,15 @@  int siw_create_listen(struct iw_cm_id *id, int backlog)
 		}
 
 		/* For wildcard addr, limit binding to current device only */
-		if (ipv6_addr_any(&laddr->sin6_addr))
-			s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
-
+		if (ipv6_addr_any(&laddr->sin6_addr)) {
+			ndev = ib_device_get_netdev(id->device, SIW_PORT);
+			if (ndev) {
+				s->sk->sk_bound_dev_if = ndev->ifindex;
+			} else {
+				rv = -ENODEV;
+				goto error;
+			}
+		}
 		rv = s->ops->bind(s, (struct sockaddr *)laddr,
 				  sizeof(struct sockaddr_in6));
 	}
@@ -1861,6 +1874,9 @@  int siw_create_listen(struct iw_cm_id *id, int backlog)
 	list_add_tail(&cep->listenq, (struct list_head *)id->provider_data);
 	cep->state = SIW_EPSTATE_LISTENING;
 
+	if (ndev)
+		dev_put(ndev);
+
 	siw_dbg(id->device, "Listen at laddr %pISp\n", &id->local_addr);
 
 	return 0;
@@ -1880,6 +1896,9 @@  int siw_create_listen(struct iw_cm_id *id, int backlog)
 	}
 	sock_release(s);
 
+	if (ndev)
+		dev_put(ndev);
+
 	return rv;
 }
 
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index 17abef48abcd..14d3103aee6f 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -287,7 +287,6 @@  static struct siw_device *siw_device_create(struct net_device *netdev)
 		return NULL;
 
 	base_dev = &sdev->base_dev;
-	sdev->netdev = netdev;
 
 	if (netdev->addr_len) {
 		memcpy(sdev->raw_gid, netdev->dev_addr,
@@ -381,12 +380,10 @@  static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
 
 	switch (event) {
 	case NETDEV_UP:
-		sdev->state = IB_PORT_ACTIVE;
 		siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE);
 		break;
 
 	case NETDEV_DOWN:
-		sdev->state = IB_PORT_DOWN;
 		siw_port_event(sdev, 1, IB_EVENT_PORT_ERR);
 		break;
 
@@ -407,12 +404,8 @@  static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
 		siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE);
 		break;
 	/*
-	 * Todo: Below netdev events are currently not handled.
+	 * All other events are not handled
 	 */
-	case NETDEV_CHANGEMTU:
-	case NETDEV_CHANGE:
-		break;
-
 	default:
 		break;
 	}
@@ -442,12 +435,6 @@  static int siw_newlink(const char *basedev_name, struct net_device *netdev)
 	sdev = siw_device_create(netdev);
 	if (sdev) {
 		dev_dbg(&netdev->dev, "siw: new device\n");
-
-		if (netif_running(netdev) && netif_carrier_ok(netdev))
-			sdev->state = IB_PORT_ACTIVE;
-		else
-			sdev->state = IB_PORT_DOWN;
-
 		ib_mark_name_assigned_by_user(&sdev->base_dev);
 		rv = siw_device_register(sdev, basedev_name);
 		if (rv)
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 986666c19378..7ca0297d68a4 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -171,21 +171,29 @@  int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr,
 int siw_query_port(struct ib_device *base_dev, u32 port,
 		   struct ib_port_attr *attr)
 {
-	struct siw_device *sdev = to_siw_dev(base_dev);
+	struct net_device *ndev;
 	int rv;
 
 	memset(attr, 0, sizeof(*attr));
 
 	rv = ib_get_eth_speed(base_dev, port, &attr->active_speed,
 			 &attr->active_width);
+	if (rv)
+		return rv;
+
+	ndev = ib_device_get_netdev(base_dev, SIW_PORT);
+	if (!ndev)
+		return -ENODEV;
+
 	attr->gid_tbl_len = 1;
 	attr->max_msg_sz = -1;
-	attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
-	attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
-	attr->phys_state = sdev->state == IB_PORT_ACTIVE ?
+	attr->max_mtu = ib_mtu_int_to_enum(ndev->max_mtu);
+	attr->active_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu));
+	attr->phys_state = (netif_running(ndev) && netif_carrier_ok(ndev)) ?
 		IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
+	attr->state = attr->phys_state == IB_PORT_PHYS_STATE_LINK_UP ?
+		IB_PORT_ACTIVE : IB_PORT_DOWN;
 	attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
-	attr->state = sdev->state;
 	/*
 	 * All zero
 	 *
@@ -199,6 +207,7 @@  int siw_query_port(struct ib_device *base_dev, u32 port,
 	 * attr->subnet_timeout = 0;
 	 * attr->init_type_repy = 0;
 	 */
+	dev_put(ndev);
 	return rv;
 }
 
@@ -505,21 +514,24 @@  int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr,
 		 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
 {
 	struct siw_qp *qp;
-	struct siw_device *sdev;
+	struct net_device *ndev;
 
-	if (base_qp && qp_attr && qp_init_attr) {
+	if (base_qp && qp_attr && qp_init_attr)
 		qp = to_siw_qp(base_qp);
-		sdev = to_siw_dev(base_qp->device);
-	} else {
+	else
 		return -EINVAL;
-	}
+
+	ndev = ib_device_get_netdev(base_qp->device, SIW_PORT);
+	if (!ndev)
+		return -ENODEV;
+
 	qp_attr->qp_state = siw_qp_state_to_ib_qp_state[qp->attrs.state];
 	qp_attr->cap.max_inline_data = SIW_MAX_INLINE;
 	qp_attr->cap.max_send_wr = qp->attrs.sq_size;
 	qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges;
 	qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
 	qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges;
-	qp_attr->path_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
+	qp_attr->path_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu));
 	qp_attr->max_rd_atomic = qp->attrs.irq_size;
 	qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
 
@@ -534,6 +546,7 @@  int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr,
 
 	qp_init_attr->cap = qp_attr->cap;
 
+	dev_put(ndev);
 	return 0;
 }