Message ID | 20231228014633.3256862-3-dw@davidwei.uk (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | netdevsim: link and forward skbs between ports | expand |
Thu, Dec 28, 2023 at 02:46:30AM CET, dw@davidwei.uk wrote: >Add a debugfs file in >/sys/kernel/debug/netdevsim/netdevsimN/ports/A/peer > >Writing "M B" to this file will link port A of netdevsim N with port B >of netdevsim M. Reading this file will return the linked netdevsim id >and port, if any. > >During nsim_dev_peer_write(), nsim_dev_list_lock prevents concurrent >modifications to nsim_dev and peer's devlink->lock prevents concurrent >modifications to the peer's port_list. rtnl_lock ensures netdevices do >not change during the critical section where a link is established. > >The lock order is consistent with other parts that touch netdevsim and >should not deadlock. > >During nsim_dev_peer_read(), RCU read critical section ensures valid >values even if stale. > >Signed-off-by: David Wei <dw@davidwei.uk> >--- > drivers/net/netdevsim/dev.c | 134 +++++++++++++++++++++++++++--- > drivers/net/netdevsim/netdev.c | 6 ++ > drivers/net/netdevsim/netdevsim.h | 1 + > 3 files changed, 128 insertions(+), 13 deletions(-) > >diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c >index 8d477aa99f94..6d5e4ce08dfd 100644 >--- a/drivers/net/netdevsim/dev.c >+++ b/drivers/net/netdevsim/dev.c >@@ -391,6 +391,124 @@ static const struct file_operations nsim_dev_rate_parent_fops = { > .owner = THIS_MODULE, > }; > >+static struct nsim_dev *nsim_dev_find_by_id(unsigned int id) >+{ >+ struct nsim_dev *dev; >+ >+ list_for_each_entry(dev, &nsim_dev_list, list) >+ if (dev->nsim_bus_dev->dev.id == id) >+ return dev; >+ >+ return NULL; >+} >+ >+static struct nsim_dev_port * >+__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type, >+ unsigned int port_index) >+{ >+ struct nsim_dev_port *nsim_dev_port; >+ >+ port_index = nsim_dev_port_index(type, port_index); >+ list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) >+ if (nsim_dev_port->port_index == port_index) >+ return nsim_dev_port; >+ return NULL; >+} >+ >+static ssize_t nsim_dev_peer_read(struct file *file, char __user *data, >+ size_t count, loff_t *ppos) >+{ >+ struct nsim_dev_port *nsim_dev_port; >+ struct netdevsim *peer; >+ unsigned int id, port; >+ ssize_t ret = 0; >+ char buf[23]; >+ >+ nsim_dev_port = file->private_data; >+ rcu_read_lock(); >+ peer = rcu_dereference(nsim_dev_port->ns->peer); >+ if (!peer) { >+ rcu_read_unlock(); >+ return 0; >+ } >+ >+ id = peer->nsim_bus_dev->dev.id; >+ port = peer->nsim_dev_port->port_index; >+ ret = scnprintf(buf, sizeof(buf), "%u %u\n", id, port); >+ ret = simple_read_from_buffer(data, count, ppos, buf, ret); >+ >+ rcu_read_unlock(); >+ return ret; >+} >+ >+static ssize_t nsim_dev_peer_write(struct file *file, >+ const char __user *data, >+ size_t count, loff_t *ppos) >+{ >+ struct nsim_dev_port *nsim_dev_port, *peer_dev_port; >+ struct nsim_dev *peer_dev; >+ unsigned int id, port; >+ char buf[22]; >+ ssize_t ret; >+ >+ if (count >= sizeof(buf)) >+ return -ENOSPC; >+ >+ ret = copy_from_user(buf, data, count); >+ if (ret) >+ return -EFAULT; >+ buf[count] = '\0'; >+ >+ ret = sscanf(buf, "%u %u", &id, &port); >+ if (ret != 2) { >+ pr_err("Format is peer netdevsim \"id port\" (uint uint)\n"); >+ return -EINVAL; >+ } >+ >+ ret = -EINVAL; >+ mutex_lock(&nsim_dev_list_lock); >+ peer_dev = nsim_dev_find_by_id(id); >+ if (!peer_dev) { >+ pr_err("Peer netdevsim %u does not exist\n", id); >+ goto out_mutex; >+ } >+ >+ devl_lock(priv_to_devlink(peer_dev)); Why exactly do you take devlink instance mutex of the peer here? >+ rtnl_lock(); >+ nsim_dev_port = file->private_data; >+ peer_dev_port = __nsim_dev_port_lookup(peer_dev, NSIM_DEV_PORT_TYPE_PF, >+ port); >+ if (!peer_dev_port) { >+ pr_err("Peer netdevsim %u port %u does not exist\n", id, port); >+ goto out_devl; >+ } >+ >+ if (nsim_dev_port == peer_dev_port) { >+ pr_err("Cannot link netdevsim to itself\n"); >+ goto out_devl; >+ } >+ >+ rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns); >+ rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns); >+ ret = count; >+ >+out_devl: >+ rtnl_unlock(); >+ devl_unlock(priv_to_devlink(peer_dev)); >+out_mutex: >+ mutex_unlock(&nsim_dev_list_lock); >+ >+ return ret; >+} >+ >+static const struct file_operations nsim_dev_peer_fops = { >+ .open = simple_open, >+ .read = nsim_dev_peer_read, >+ .write = nsim_dev_peer_write, >+ .llseek = generic_file_llseek, >+ .owner = THIS_MODULE, >+}; >+ > static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, > struct nsim_dev_port *nsim_dev_port) > { >@@ -421,6 +539,9 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, > } > debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name); > >+ debugfs_create_file("peer", 0600, nsim_dev_port->ddir, >+ nsim_dev_port, &nsim_dev_peer_fops); >+ > return 0; > } > >@@ -1704,19 +1825,6 @@ void nsim_drv_remove(struct nsim_bus_dev *nsim_bus_dev) > dev_set_drvdata(&nsim_bus_dev->dev, NULL); > } > >-static struct nsim_dev_port * >-__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type, >- unsigned int port_index) >-{ >- struct nsim_dev_port *nsim_dev_port; >- >- port_index = nsim_dev_port_index(type, port_index); >- list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) >- if (nsim_dev_port->port_index == port_index) >- return nsim_dev_port; >- return NULL; >-} >- > int nsim_drv_port_add(struct nsim_bus_dev *nsim_bus_dev, enum nsim_dev_port_type type, > unsigned int port_index) > { >diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c >index aecaf5f44374..434322f6a565 100644 >--- a/drivers/net/netdevsim/netdev.c >+++ b/drivers/net/netdevsim/netdev.c >@@ -388,6 +388,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) > ns->nsim_dev = nsim_dev; > ns->nsim_dev_port = nsim_dev_port; > ns->nsim_bus_dev = nsim_dev->nsim_bus_dev; >+ RCU_INIT_POINTER(ns->peer, NULL); > SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev); > SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port); > nsim_ethtool_init(ns); >@@ -407,8 +408,13 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) > void nsim_destroy(struct netdevsim *ns) > { > struct net_device *dev = ns->netdev; >+ struct netdevsim *peer; > > rtnl_lock(); >+ peer = rtnl_dereference(ns->peer); >+ if (peer) >+ RCU_INIT_POINTER(peer->peer, NULL); >+ RCU_INIT_POINTER(ns->peer, NULL); > unregister_netdevice(dev); > if (nsim_dev_port_is_pf(ns->nsim_dev_port)) { > nsim_macsec_teardown(ns); >diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h >index babb61d7790b..24fc3fbda791 100644 >--- a/drivers/net/netdevsim/netdevsim.h >+++ b/drivers/net/netdevsim/netdevsim.h >@@ -125,6 +125,7 @@ struct netdevsim { > } udp_ports; > > struct nsim_ethtool ethtool; >+ struct netdevsim __rcu *peer; > }; > > struct netdevsim * >-- >2.39.3 >
On 2024-01-02 03:11, Jiri Pirko wrote: > Thu, Dec 28, 2023 at 02:46:30AM CET, dw@davidwei.uk wrote: >> Add a debugfs file in >> /sys/kernel/debug/netdevsim/netdevsimN/ports/A/peer >> >> Writing "M B" to this file will link port A of netdevsim N with port B >> of netdevsim M. Reading this file will return the linked netdevsim id >> and port, if any. >> >> During nsim_dev_peer_write(), nsim_dev_list_lock prevents concurrent >> modifications to nsim_dev and peer's devlink->lock prevents concurrent >> modifications to the peer's port_list. rtnl_lock ensures netdevices do >> not change during the critical section where a link is established. >> >> The lock order is consistent with other parts that touch netdevsim and >> should not deadlock. >> >> During nsim_dev_peer_read(), RCU read critical section ensures valid >> values even if stale. >> >> Signed-off-by: David Wei <dw@davidwei.uk> >> --- >> drivers/net/netdevsim/dev.c | 134 +++++++++++++++++++++++++++--- >> drivers/net/netdevsim/netdev.c | 6 ++ >> drivers/net/netdevsim/netdevsim.h | 1 + >> 3 files changed, 128 insertions(+), 13 deletions(-) >> >> diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c >> index 8d477aa99f94..6d5e4ce08dfd 100644 >> --- a/drivers/net/netdevsim/dev.c >> +++ b/drivers/net/netdevsim/dev.c >> @@ -391,6 +391,124 @@ static const struct file_operations nsim_dev_rate_parent_fops = { >> .owner = THIS_MODULE, >> }; >> >> +static struct nsim_dev *nsim_dev_find_by_id(unsigned int id) >> +{ >> + struct nsim_dev *dev; >> + >> + list_for_each_entry(dev, &nsim_dev_list, list) >> + if (dev->nsim_bus_dev->dev.id == id) >> + return dev; >> + >> + return NULL; >> +} >> + >> +static struct nsim_dev_port * >> +__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type, >> + unsigned int port_index) >> +{ >> + struct nsim_dev_port *nsim_dev_port; >> + >> + port_index = nsim_dev_port_index(type, port_index); >> + list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) >> + if (nsim_dev_port->port_index == port_index) >> + return nsim_dev_port; >> + return NULL; >> +} >> + >> +static ssize_t nsim_dev_peer_read(struct file *file, char __user *data, >> + size_t count, loff_t *ppos) >> +{ >> + struct nsim_dev_port *nsim_dev_port; >> + struct netdevsim *peer; >> + unsigned int id, port; >> + ssize_t ret = 0; >> + char buf[23]; >> + >> + nsim_dev_port = file->private_data; >> + rcu_read_lock(); >> + peer = rcu_dereference(nsim_dev_port->ns->peer); >> + if (!peer) { >> + rcu_read_unlock(); >> + return 0; >> + } >> + >> + id = peer->nsim_bus_dev->dev.id; >> + port = peer->nsim_dev_port->port_index; >> + ret = scnprintf(buf, sizeof(buf), "%u %u\n", id, port); >> + ret = simple_read_from_buffer(data, count, ppos, buf, ret); >> + >> + rcu_read_unlock(); >> + return ret; >> +} >> + >> +static ssize_t nsim_dev_peer_write(struct file *file, >> + const char __user *data, >> + size_t count, loff_t *ppos) >> +{ >> + struct nsim_dev_port *nsim_dev_port, *peer_dev_port; >> + struct nsim_dev *peer_dev; >> + unsigned int id, port; >> + char buf[22]; >> + ssize_t ret; >> + >> + if (count >= sizeof(buf)) >> + return -ENOSPC; >> + >> + ret = copy_from_user(buf, data, count); >> + if (ret) >> + return -EFAULT; >> + buf[count] = '\0'; >> + >> + ret = sscanf(buf, "%u %u", &id, &port); >> + if (ret != 2) { >> + pr_err("Format is peer netdevsim \"id port\" (uint uint)\n"); >> + return -EINVAL; >> + } >> + >> + ret = -EINVAL; >> + mutex_lock(&nsim_dev_list_lock); >> + peer_dev = nsim_dev_find_by_id(id); >> + if (!peer_dev) { >> + pr_err("Peer netdevsim %u does not exist\n", id); >> + goto out_mutex; >> + } >> + >> + devl_lock(priv_to_devlink(peer_dev)); > > Why exactly do you take devlink instance mutex of the peer here? To make sure that port list do not change. Ports can be added or removed at will from nsim_drv_port_add() and nsim_drv_port_del() which both take the devlink lock. > > >> + rtnl_lock(); >> + nsim_dev_port = file->private_data; >> + peer_dev_port = __nsim_dev_port_lookup(peer_dev, NSIM_DEV_PORT_TYPE_PF, >> + port); >> + if (!peer_dev_port) { >> + pr_err("Peer netdevsim %u port %u does not exist\n", id, port); >> + goto out_devl; >> + } >> + >> + if (nsim_dev_port == peer_dev_port) { >> + pr_err("Cannot link netdevsim to itself\n"); >> + goto out_devl; >> + } >> + >> + rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns); >> + rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns); >> + ret = count; >> + >> +out_devl: >> + rtnl_unlock(); >> + devl_unlock(priv_to_devlink(peer_dev)); >> +out_mutex: >> + mutex_unlock(&nsim_dev_list_lock); >> + >> + return ret; >> +} >> + >> +static const struct file_operations nsim_dev_peer_fops = { >> + .open = simple_open, >> + .read = nsim_dev_peer_read, >> + .write = nsim_dev_peer_write, >> + .llseek = generic_file_llseek, >> + .owner = THIS_MODULE, >> +}; >> + >> static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, >> struct nsim_dev_port *nsim_dev_port) >> { >> @@ -421,6 +539,9 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, >> } >> debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name); >> >> + debugfs_create_file("peer", 0600, nsim_dev_port->ddir, >> + nsim_dev_port, &nsim_dev_peer_fops); >> + >> return 0; >> } >> >> @@ -1704,19 +1825,6 @@ void nsim_drv_remove(struct nsim_bus_dev *nsim_bus_dev) >> dev_set_drvdata(&nsim_bus_dev->dev, NULL); >> } >> >> -static struct nsim_dev_port * >> -__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type, >> - unsigned int port_index) >> -{ >> - struct nsim_dev_port *nsim_dev_port; >> - >> - port_index = nsim_dev_port_index(type, port_index); >> - list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) >> - if (nsim_dev_port->port_index == port_index) >> - return nsim_dev_port; >> - return NULL; >> -} >> - >> int nsim_drv_port_add(struct nsim_bus_dev *nsim_bus_dev, enum nsim_dev_port_type type, >> unsigned int port_index) >> { >> diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c >> index aecaf5f44374..434322f6a565 100644 >> --- a/drivers/net/netdevsim/netdev.c >> +++ b/drivers/net/netdevsim/netdev.c >> @@ -388,6 +388,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) >> ns->nsim_dev = nsim_dev; >> ns->nsim_dev_port = nsim_dev_port; >> ns->nsim_bus_dev = nsim_dev->nsim_bus_dev; >> + RCU_INIT_POINTER(ns->peer, NULL); >> SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev); >> SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port); >> nsim_ethtool_init(ns); >> @@ -407,8 +408,13 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) >> void nsim_destroy(struct netdevsim *ns) >> { >> struct net_device *dev = ns->netdev; >> + struct netdevsim *peer; >> >> rtnl_lock(); >> + peer = rtnl_dereference(ns->peer); >> + if (peer) >> + RCU_INIT_POINTER(peer->peer, NULL); >> + RCU_INIT_POINTER(ns->peer, NULL); >> unregister_netdevice(dev); >> if (nsim_dev_port_is_pf(ns->nsim_dev_port)) { >> nsim_macsec_teardown(ns); >> diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h >> index babb61d7790b..24fc3fbda791 100644 >> --- a/drivers/net/netdevsim/netdevsim.h >> +++ b/drivers/net/netdevsim/netdevsim.h >> @@ -125,6 +125,7 @@ struct netdevsim { >> } udp_ports; >> >> struct nsim_ethtool ethtool; >> + struct netdevsim __rcu *peer; >> }; >> >> struct netdevsim * >> -- >> 2.39.3 >>
On Wed, 27 Dec 2023 17:46:30 -0800 David Wei wrote: > +static ssize_t nsim_dev_peer_write(struct file *file, > + const char __user *data, > + size_t count, loff_t *ppos) > +{ > + struct nsim_dev_port *nsim_dev_port, *peer_dev_port; > + struct nsim_dev *peer_dev; > + unsigned int id, port; > + char buf[22]; > + ssize_t ret; > + > + if (count >= sizeof(buf)) > + return -ENOSPC; > + > + ret = copy_from_user(buf, data, count); > + if (ret) > + return -EFAULT; > + buf[count] = '\0'; > + > + ret = sscanf(buf, "%u %u", &id, &port); > + if (ret != 2) { > + pr_err("Format is peer netdevsim \"id port\" (uint uint)\n"); netif_err() or dev_err() ? Granted the rest of the file seems to use pr_err(), but I'm not sure why... > + return -EINVAL; > + } Could you put a sleep() here and test removing the device while some thread is stuck here? I don't recall exactly but I thought debugfs remove waits for concurrent reads and writes which could be problematic given we take all the locks under the sun here.. > + ret = -EINVAL; > + mutex_lock(&nsim_dev_list_lock); > + peer_dev = nsim_dev_find_by_id(id); > + if (!peer_dev) { > + pr_err("Peer netdevsim %u does not exist\n", id); > + goto out_mutex; > + } > + > + devl_lock(priv_to_devlink(peer_dev)); > + rtnl_lock(); > + nsim_dev_port = file->private_data; > + peer_dev_port = __nsim_dev_port_lookup(peer_dev, NSIM_DEV_PORT_TYPE_PF, > + port); > + if (!peer_dev_port) { > + pr_err("Peer netdevsim %u port %u does not exist\n", id, port); > + goto out_devl; > + } > + > + if (nsim_dev_port == peer_dev_port) { > + pr_err("Cannot link netdevsim to itself\n"); > + goto out_devl; > + } > + > + rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns); > + rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns); > + ret = count; > + > +out_devl: out_unlock_rtnl > + rtnl_unlock(); > + devl_unlock(priv_to_devlink(peer_dev)); > +out_mutex: out_unlock_dev_list > + mutex_unlock(&nsim_dev_list_lock); > + > + return ret; > +} > + > +static const struct file_operations nsim_dev_peer_fops = { > + .open = simple_open, > + .read = nsim_dev_peer_read, > + .write = nsim_dev_peer_write, > + .llseek = generic_file_llseek, You don't support seek, you want some form of no_seek here. > + .owner = THIS_MODULE, > +};
Wed, Jan 03, 2024 at 10:56:36PM CET, dw@davidwei.uk wrote: >On 2024-01-02 03:11, Jiri Pirko wrote: >> Thu, Dec 28, 2023 at 02:46:30AM CET, dw@davidwei.uk wrote: >>> Add a debugfs file in >>> /sys/kernel/debug/netdevsim/netdevsimN/ports/A/peer >>> >>> Writing "M B" to this file will link port A of netdevsim N with port B >>> of netdevsim M. Reading this file will return the linked netdevsim id >>> and port, if any. >>> >>> During nsim_dev_peer_write(), nsim_dev_list_lock prevents concurrent >>> modifications to nsim_dev and peer's devlink->lock prevents concurrent >>> modifications to the peer's port_list. rtnl_lock ensures netdevices do >>> not change during the critical section where a link is established. >>> >>> The lock order is consistent with other parts that touch netdevsim and >>> should not deadlock. >>> >>> During nsim_dev_peer_read(), RCU read critical section ensures valid >>> values even if stale. >>> >>> Signed-off-by: David Wei <dw@davidwei.uk> >>> --- >>> drivers/net/netdevsim/dev.c | 134 +++++++++++++++++++++++++++--- >>> drivers/net/netdevsim/netdev.c | 6 ++ >>> drivers/net/netdevsim/netdevsim.h | 1 + >>> 3 files changed, 128 insertions(+), 13 deletions(-) >>> >>> diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c >>> index 8d477aa99f94..6d5e4ce08dfd 100644 >>> --- a/drivers/net/netdevsim/dev.c >>> +++ b/drivers/net/netdevsim/dev.c >>> @@ -391,6 +391,124 @@ static const struct file_operations nsim_dev_rate_parent_fops = { >>> .owner = THIS_MODULE, >>> }; >>> >>> +static struct nsim_dev *nsim_dev_find_by_id(unsigned int id) >>> +{ >>> + struct nsim_dev *dev; >>> + >>> + list_for_each_entry(dev, &nsim_dev_list, list) >>> + if (dev->nsim_bus_dev->dev.id == id) >>> + return dev; >>> + >>> + return NULL; >>> +} >>> + >>> +static struct nsim_dev_port * >>> +__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type, >>> + unsigned int port_index) >>> +{ >>> + struct nsim_dev_port *nsim_dev_port; >>> + >>> + port_index = nsim_dev_port_index(type, port_index); >>> + list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) >>> + if (nsim_dev_port->port_index == port_index) >>> + return nsim_dev_port; >>> + return NULL; >>> +} >>> + >>> +static ssize_t nsim_dev_peer_read(struct file *file, char __user *data, >>> + size_t count, loff_t *ppos) >>> +{ >>> + struct nsim_dev_port *nsim_dev_port; >>> + struct netdevsim *peer; >>> + unsigned int id, port; >>> + ssize_t ret = 0; >>> + char buf[23]; >>> + >>> + nsim_dev_port = file->private_data; >>> + rcu_read_lock(); >>> + peer = rcu_dereference(nsim_dev_port->ns->peer); >>> + if (!peer) { >>> + rcu_read_unlock(); >>> + return 0; >>> + } >>> + >>> + id = peer->nsim_bus_dev->dev.id; >>> + port = peer->nsim_dev_port->port_index; >>> + ret = scnprintf(buf, sizeof(buf), "%u %u\n", id, port); >>> + ret = simple_read_from_buffer(data, count, ppos, buf, ret); >>> + >>> + rcu_read_unlock(); >>> + return ret; >>> +} >>> + >>> +static ssize_t nsim_dev_peer_write(struct file *file, >>> + const char __user *data, >>> + size_t count, loff_t *ppos) >>> +{ >>> + struct nsim_dev_port *nsim_dev_port, *peer_dev_port; >>> + struct nsim_dev *peer_dev; >>> + unsigned int id, port; >>> + char buf[22]; >>> + ssize_t ret; >>> + >>> + if (count >= sizeof(buf)) >>> + return -ENOSPC; >>> + >>> + ret = copy_from_user(buf, data, count); >>> + if (ret) >>> + return -EFAULT; >>> + buf[count] = '\0'; >>> + >>> + ret = sscanf(buf, "%u %u", &id, &port); >>> + if (ret != 2) { >>> + pr_err("Format is peer netdevsim \"id port\" (uint uint)\n"); >>> + return -EINVAL; >>> + } >>> + >>> + ret = -EINVAL; >>> + mutex_lock(&nsim_dev_list_lock); >>> + peer_dev = nsim_dev_find_by_id(id); >>> + if (!peer_dev) { >>> + pr_err("Peer netdevsim %u does not exist\n", id); >>> + goto out_mutex; >>> + } >>> + >>> + devl_lock(priv_to_devlink(peer_dev)); >> >> Why exactly do you take devlink instance mutex of the peer here? > >To make sure that port list do not change. Ports can be added or removed >at will from nsim_drv_port_add() and nsim_drv_port_del() which both take >the devlink lock. Ok. > >> >> >>> + rtnl_lock(); >>> + nsim_dev_port = file->private_data; >>> + peer_dev_port = __nsim_dev_port_lookup(peer_dev, NSIM_DEV_PORT_TYPE_PF, >>> + port); >>> + if (!peer_dev_port) { >>> + pr_err("Peer netdevsim %u port %u does not exist\n", id, port); >>> + goto out_devl; >>> + } >>> + >>> + if (nsim_dev_port == peer_dev_port) { >>> + pr_err("Cannot link netdevsim to itself\n"); >>> + goto out_devl; >>> + } >>> + >>> + rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns); >>> + rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns); >>> + ret = count; >>> + >>> +out_devl: >>> + rtnl_unlock(); >>> + devl_unlock(priv_to_devlink(peer_dev)); >>> +out_mutex: >>> + mutex_unlock(&nsim_dev_list_lock); >>> + >>> + return ret; >>> +} >>> + >>> +static const struct file_operations nsim_dev_peer_fops = { >>> + .open = simple_open, >>> + .read = nsim_dev_peer_read, >>> + .write = nsim_dev_peer_write, >>> + .llseek = generic_file_llseek, >>> + .owner = THIS_MODULE, >>> +}; >>> + >>> static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, >>> struct nsim_dev_port *nsim_dev_port) >>> { >>> @@ -421,6 +539,9 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, >>> } >>> debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name); >>> >>> + debugfs_create_file("peer", 0600, nsim_dev_port->ddir, >>> + nsim_dev_port, &nsim_dev_peer_fops); >>> + >>> return 0; >>> } >>> >>> @@ -1704,19 +1825,6 @@ void nsim_drv_remove(struct nsim_bus_dev *nsim_bus_dev) >>> dev_set_drvdata(&nsim_bus_dev->dev, NULL); >>> } >>> >>> -static struct nsim_dev_port * >>> -__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type, >>> - unsigned int port_index) >>> -{ >>> - struct nsim_dev_port *nsim_dev_port; >>> - >>> - port_index = nsim_dev_port_index(type, port_index); >>> - list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) >>> - if (nsim_dev_port->port_index == port_index) >>> - return nsim_dev_port; >>> - return NULL; >>> -} >>> - >>> int nsim_drv_port_add(struct nsim_bus_dev *nsim_bus_dev, enum nsim_dev_port_type type, >>> unsigned int port_index) >>> { >>> diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c >>> index aecaf5f44374..434322f6a565 100644 >>> --- a/drivers/net/netdevsim/netdev.c >>> +++ b/drivers/net/netdevsim/netdev.c >>> @@ -388,6 +388,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) >>> ns->nsim_dev = nsim_dev; >>> ns->nsim_dev_port = nsim_dev_port; >>> ns->nsim_bus_dev = nsim_dev->nsim_bus_dev; >>> + RCU_INIT_POINTER(ns->peer, NULL); >>> SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev); >>> SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port); >>> nsim_ethtool_init(ns); >>> @@ -407,8 +408,13 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) >>> void nsim_destroy(struct netdevsim *ns) >>> { >>> struct net_device *dev = ns->netdev; >>> + struct netdevsim *peer; >>> >>> rtnl_lock(); >>> + peer = rtnl_dereference(ns->peer); >>> + if (peer) >>> + RCU_INIT_POINTER(peer->peer, NULL); >>> + RCU_INIT_POINTER(ns->peer, NULL); >>> unregister_netdevice(dev); >>> if (nsim_dev_port_is_pf(ns->nsim_dev_port)) { >>> nsim_macsec_teardown(ns); >>> diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h >>> index babb61d7790b..24fc3fbda791 100644 >>> --- a/drivers/net/netdevsim/netdevsim.h >>> +++ b/drivers/net/netdevsim/netdevsim.h >>> @@ -125,6 +125,7 @@ struct netdevsim { >>> } udp_ports; >>> >>> struct nsim_ethtool ethtool; >>> + struct netdevsim __rcu *peer; >>> }; >>> >>> struct netdevsim * >>> -- >>> 2.39.3 >>>
On 2024-01-03 17:39, Jakub Kicinski wrote: > On Wed, 27 Dec 2023 17:46:30 -0800 David Wei wrote: >> +static ssize_t nsim_dev_peer_write(struct file *file, >> + const char __user *data, >> + size_t count, loff_t *ppos) >> +{ >> + struct nsim_dev_port *nsim_dev_port, *peer_dev_port; >> + struct nsim_dev *peer_dev; >> + unsigned int id, port; >> + char buf[22]; >> + ssize_t ret; >> + >> + if (count >= sizeof(buf)) >> + return -ENOSPC; >> + >> + ret = copy_from_user(buf, data, count); >> + if (ret) >> + return -EFAULT; >> + buf[count] = '\0'; >> + >> + ret = sscanf(buf, "%u %u", &id, &port); >> + if (ret != 2) { >> + pr_err("Format is peer netdevsim \"id port\" (uint uint)\n"); > > netif_err() or dev_err() ? Granted the rest of the file seems to use > pr_err(), but I'm not sure why... I can change it to use one of these two in this patchset, then I can chnage the others separately in another patch. How does that sound? > >> + return -EINVAL; >> + } > > Could you put a sleep() here and test removing the device while some > thread is stuck here? I don't recall exactly but I thought debugfs > remove waits for concurrent reads and writes which could be problematic > given we take all the locks under the sun here.. Yep, I'll test this. > >> + ret = -EINVAL; >> + mutex_lock(&nsim_dev_list_lock); >> + peer_dev = nsim_dev_find_by_id(id); >> + if (!peer_dev) { >> + pr_err("Peer netdevsim %u does not exist\n", id); >> + goto out_mutex; >> + } >> + >> + devl_lock(priv_to_devlink(peer_dev)); >> + rtnl_lock(); >> + nsim_dev_port = file->private_data; >> + peer_dev_port = __nsim_dev_port_lookup(peer_dev, NSIM_DEV_PORT_TYPE_PF, >> + port); >> + if (!peer_dev_port) { >> + pr_err("Peer netdevsim %u port %u does not exist\n", id, port); >> + goto out_devl; >> + } >> + >> + if (nsim_dev_port == peer_dev_port) { >> + pr_err("Cannot link netdevsim to itself\n"); >> + goto out_devl; >> + } >> + >> + rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns); >> + rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns); >> + ret = count; >> + >> +out_devl: > > out_unlock_rtnl > >> + rtnl_unlock(); >> + devl_unlock(priv_to_devlink(peer_dev)); >> +out_mutex: > > out_unlock_dev_list > >> + mutex_unlock(&nsim_dev_list_lock); >> + >> + return ret; >> +} >> + >> +static const struct file_operations nsim_dev_peer_fops = { >> + .open = simple_open, >> + .read = nsim_dev_peer_read, >> + .write = nsim_dev_peer_write, >> + .llseek = generic_file_llseek, > > You don't support seek, you want some form of no_seek here. > >> + .owner = THIS_MODULE, >> +};
On Tue, 9 Jan 2024 08:57:59 -0800 David Wei wrote: > >> + ret = sscanf(buf, "%u %u", &id, &port); > >> + if (ret != 2) { > >> + pr_err("Format is peer netdevsim \"id port\" (uint uint)\n"); > > > > netif_err() or dev_err() ? Granted the rest of the file seems to use > > pr_err(), but I'm not sure why... > > I can change it to use one of these two in this patchset, then I can > chnage the others separately in another patch. How does that sound? Separate patch and separate series. Let's not load more unrelated patches into this series :)
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 8d477aa99f94..6d5e4ce08dfd 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -391,6 +391,124 @@ static const struct file_operations nsim_dev_rate_parent_fops = { .owner = THIS_MODULE, }; +static struct nsim_dev *nsim_dev_find_by_id(unsigned int id) +{ + struct nsim_dev *dev; + + list_for_each_entry(dev, &nsim_dev_list, list) + if (dev->nsim_bus_dev->dev.id == id) + return dev; + + return NULL; +} + +static struct nsim_dev_port * +__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type, + unsigned int port_index) +{ + struct nsim_dev_port *nsim_dev_port; + + port_index = nsim_dev_port_index(type, port_index); + list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) + if (nsim_dev_port->port_index == port_index) + return nsim_dev_port; + return NULL; +} + +static ssize_t nsim_dev_peer_read(struct file *file, char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_dev_port *nsim_dev_port; + struct netdevsim *peer; + unsigned int id, port; + ssize_t ret = 0; + char buf[23]; + + nsim_dev_port = file->private_data; + rcu_read_lock(); + peer = rcu_dereference(nsim_dev_port->ns->peer); + if (!peer) { + rcu_read_unlock(); + return 0; + } + + id = peer->nsim_bus_dev->dev.id; + port = peer->nsim_dev_port->port_index; + ret = scnprintf(buf, sizeof(buf), "%u %u\n", id, port); + ret = simple_read_from_buffer(data, count, ppos, buf, ret); + + rcu_read_unlock(); + return ret; +} + +static ssize_t nsim_dev_peer_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_dev_port *nsim_dev_port, *peer_dev_port; + struct nsim_dev *peer_dev; + unsigned int id, port; + char buf[22]; + ssize_t ret; + + if (count >= sizeof(buf)) + return -ENOSPC; + + ret = copy_from_user(buf, data, count); + if (ret) + return -EFAULT; + buf[count] = '\0'; + + ret = sscanf(buf, "%u %u", &id, &port); + if (ret != 2) { + pr_err("Format is peer netdevsim \"id port\" (uint uint)\n"); + return -EINVAL; + } + + ret = -EINVAL; + mutex_lock(&nsim_dev_list_lock); + peer_dev = nsim_dev_find_by_id(id); + if (!peer_dev) { + pr_err("Peer netdevsim %u does not exist\n", id); + goto out_mutex; + } + + devl_lock(priv_to_devlink(peer_dev)); + rtnl_lock(); + nsim_dev_port = file->private_data; + peer_dev_port = __nsim_dev_port_lookup(peer_dev, NSIM_DEV_PORT_TYPE_PF, + port); + if (!peer_dev_port) { + pr_err("Peer netdevsim %u port %u does not exist\n", id, port); + goto out_devl; + } + + if (nsim_dev_port == peer_dev_port) { + pr_err("Cannot link netdevsim to itself\n"); + goto out_devl; + } + + rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns); + rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns); + ret = count; + +out_devl: + rtnl_unlock(); + devl_unlock(priv_to_devlink(peer_dev)); +out_mutex: + mutex_unlock(&nsim_dev_list_lock); + + return ret; +} + +static const struct file_operations nsim_dev_peer_fops = { + .open = simple_open, + .read = nsim_dev_peer_read, + .write = nsim_dev_peer_write, + .llseek = generic_file_llseek, + .owner = THIS_MODULE, +}; + static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) { @@ -421,6 +539,9 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, } debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name); + debugfs_create_file("peer", 0600, nsim_dev_port->ddir, + nsim_dev_port, &nsim_dev_peer_fops); + return 0; } @@ -1704,19 +1825,6 @@ void nsim_drv_remove(struct nsim_bus_dev *nsim_bus_dev) dev_set_drvdata(&nsim_bus_dev->dev, NULL); } -static struct nsim_dev_port * -__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type, - unsigned int port_index) -{ - struct nsim_dev_port *nsim_dev_port; - - port_index = nsim_dev_port_index(type, port_index); - list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) - if (nsim_dev_port->port_index == port_index) - return nsim_dev_port; - return NULL; -} - int nsim_drv_port_add(struct nsim_bus_dev *nsim_bus_dev, enum nsim_dev_port_type type, unsigned int port_index) { diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index aecaf5f44374..434322f6a565 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -388,6 +388,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) ns->nsim_dev = nsim_dev; ns->nsim_dev_port = nsim_dev_port; ns->nsim_bus_dev = nsim_dev->nsim_bus_dev; + RCU_INIT_POINTER(ns->peer, NULL); SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev); SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port); nsim_ethtool_init(ns); @@ -407,8 +408,13 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) void nsim_destroy(struct netdevsim *ns) { struct net_device *dev = ns->netdev; + struct netdevsim *peer; rtnl_lock(); + peer = rtnl_dereference(ns->peer); + if (peer) + RCU_INIT_POINTER(peer->peer, NULL); + RCU_INIT_POINTER(ns->peer, NULL); unregister_netdevice(dev); if (nsim_dev_port_is_pf(ns->nsim_dev_port)) { nsim_macsec_teardown(ns); diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index babb61d7790b..24fc3fbda791 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -125,6 +125,7 @@ struct netdevsim { } udp_ports; struct nsim_ethtool ethtool; + struct netdevsim __rcu *peer; }; struct netdevsim *
Add a debugfs file in /sys/kernel/debug/netdevsim/netdevsimN/ports/A/peer Writing "M B" to this file will link port A of netdevsim N with port B of netdevsim M. Reading this file will return the linked netdevsim id and port, if any. During nsim_dev_peer_write(), nsim_dev_list_lock prevents concurrent modifications to nsim_dev and peer's devlink->lock prevents concurrent modifications to the peer's port_list. rtnl_lock ensures netdevices do not change during the critical section where a link is established. The lock order is consistent with other parts that touch netdevsim and should not deadlock. During nsim_dev_peer_read(), RCU read critical section ensures valid values even if stale. Signed-off-by: David Wei <dw@davidwei.uk> --- drivers/net/netdevsim/dev.c | 134 +++++++++++++++++++++++++++--- drivers/net/netdevsim/netdev.c | 6 ++ drivers/net/netdevsim/netdevsim.h | 1 + 3 files changed, 128 insertions(+), 13 deletions(-)