diff mbox series

[net-next,v5,2/5] netdevsim: allow two netdevsim ports to be connected

Message ID 20231228014633.3256862-3-dw@davidwei.uk (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series netdevsim: link and forward skbs between ports | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1113 this patch: 1113
netdev/cc_maintainers success CCed 4 of 4 maintainers
netdev/build_clang success Errors and warnings before: 1140 this patch: 1140
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1140 this patch: 1140
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 179 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

David Wei Dec. 28, 2023, 1:46 a.m. UTC
Add a debugfs file in
/sys/kernel/debug/netdevsim/netdevsimN/ports/A/peer

Writing "M B" to this file will link port A of netdevsim N with port B
of netdevsim M. Reading this file will return the linked netdevsim id
and port, if any.

During nsim_dev_peer_write(), nsim_dev_list_lock prevents concurrent
modifications to nsim_dev and peer's devlink->lock prevents concurrent
modifications to the peer's port_list. rtnl_lock ensures netdevices do
not change during the critical section where a link is established.

The lock order is consistent with other parts that touch netdevsim and
should not deadlock.

During nsim_dev_peer_read(), RCU read critical section ensures valid
values even if stale.

Signed-off-by: David Wei <dw@davidwei.uk>
---
 drivers/net/netdevsim/dev.c       | 134 +++++++++++++++++++++++++++---
 drivers/net/netdevsim/netdev.c    |   6 ++
 drivers/net/netdevsim/netdevsim.h |   1 +
 3 files changed, 128 insertions(+), 13 deletions(-)

Comments

Jiri Pirko Jan. 2, 2024, 11:11 a.m. UTC | #1
Thu, Dec 28, 2023 at 02:46:30AM CET, dw@davidwei.uk wrote:
>Add a debugfs file in
>/sys/kernel/debug/netdevsim/netdevsimN/ports/A/peer
>
>Writing "M B" to this file will link port A of netdevsim N with port B
>of netdevsim M. Reading this file will return the linked netdevsim id
>and port, if any.
>
>During nsim_dev_peer_write(), nsim_dev_list_lock prevents concurrent
>modifications to nsim_dev and peer's devlink->lock prevents concurrent
>modifications to the peer's port_list. rtnl_lock ensures netdevices do
>not change during the critical section where a link is established.
>
>The lock order is consistent with other parts that touch netdevsim and
>should not deadlock.
>
>During nsim_dev_peer_read(), RCU read critical section ensures valid
>values even if stale.
>
>Signed-off-by: David Wei <dw@davidwei.uk>
>---
> drivers/net/netdevsim/dev.c       | 134 +++++++++++++++++++++++++++---
> drivers/net/netdevsim/netdev.c    |   6 ++
> drivers/net/netdevsim/netdevsim.h |   1 +
> 3 files changed, 128 insertions(+), 13 deletions(-)
>
>diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
>index 8d477aa99f94..6d5e4ce08dfd 100644
>--- a/drivers/net/netdevsim/dev.c
>+++ b/drivers/net/netdevsim/dev.c
>@@ -391,6 +391,124 @@ static const struct file_operations nsim_dev_rate_parent_fops = {
> 	.owner = THIS_MODULE,
> };
> 
>+static struct nsim_dev *nsim_dev_find_by_id(unsigned int id)
>+{
>+	struct nsim_dev *dev;
>+
>+	list_for_each_entry(dev, &nsim_dev_list, list)
>+		if (dev->nsim_bus_dev->dev.id == id)
>+			return dev;
>+
>+	return NULL;
>+}
>+
>+static struct nsim_dev_port *
>+__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type,
>+		       unsigned int port_index)
>+{
>+	struct nsim_dev_port *nsim_dev_port;
>+
>+	port_index = nsim_dev_port_index(type, port_index);
>+	list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list)
>+		if (nsim_dev_port->port_index == port_index)
>+			return nsim_dev_port;
>+	return NULL;
>+}
>+
>+static ssize_t nsim_dev_peer_read(struct file *file, char __user *data,
>+				  size_t count, loff_t *ppos)
>+{
>+	struct nsim_dev_port *nsim_dev_port;
>+	struct netdevsim *peer;
>+	unsigned int id, port;
>+	ssize_t ret = 0;
>+	char buf[23];
>+
>+	nsim_dev_port = file->private_data;
>+	rcu_read_lock();
>+	peer = rcu_dereference(nsim_dev_port->ns->peer);
>+	if (!peer) {
>+		rcu_read_unlock();
>+		return 0;
>+	}
>+
>+	id = peer->nsim_bus_dev->dev.id;
>+	port = peer->nsim_dev_port->port_index;
>+	ret = scnprintf(buf, sizeof(buf), "%u %u\n", id, port);
>+	ret = simple_read_from_buffer(data, count, ppos, buf, ret);
>+
>+	rcu_read_unlock();
>+	return ret;
>+}
>+
>+static ssize_t nsim_dev_peer_write(struct file *file,
>+				   const char __user *data,
>+				   size_t count, loff_t *ppos)
>+{
>+	struct nsim_dev_port *nsim_dev_port, *peer_dev_port;
>+	struct nsim_dev *peer_dev;
>+	unsigned int id, port;
>+	char buf[22];
>+	ssize_t ret;
>+
>+	if (count >= sizeof(buf))
>+		return -ENOSPC;
>+
>+	ret = copy_from_user(buf, data, count);
>+	if (ret)
>+		return -EFAULT;
>+	buf[count] = '\0';
>+
>+	ret = sscanf(buf, "%u %u", &id, &port);
>+	if (ret != 2) {
>+		pr_err("Format is peer netdevsim \"id port\" (uint uint)\n");
>+		return -EINVAL;
>+	}
>+
>+	ret = -EINVAL;
>+	mutex_lock(&nsim_dev_list_lock);
>+	peer_dev = nsim_dev_find_by_id(id);
>+	if (!peer_dev) {
>+		pr_err("Peer netdevsim %u does not exist\n", id);
>+		goto out_mutex;
>+	}
>+
>+	devl_lock(priv_to_devlink(peer_dev));

Why exactly do you take devlink instance mutex of the peer here?


>+	rtnl_lock();
>+	nsim_dev_port = file->private_data;
>+	peer_dev_port = __nsim_dev_port_lookup(peer_dev, NSIM_DEV_PORT_TYPE_PF,
>+					       port);
>+	if (!peer_dev_port) {
>+		pr_err("Peer netdevsim %u port %u does not exist\n", id, port);
>+		goto out_devl;
>+	}
>+
>+	if (nsim_dev_port == peer_dev_port) {
>+		pr_err("Cannot link netdevsim to itself\n");
>+		goto out_devl;
>+	}
>+
>+	rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns);
>+	rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns);
>+	ret = count;
>+
>+out_devl:
>+	rtnl_unlock();
>+	devl_unlock(priv_to_devlink(peer_dev));
>+out_mutex:
>+	mutex_unlock(&nsim_dev_list_lock);
>+
>+	return ret;
>+}
>+
>+static const struct file_operations nsim_dev_peer_fops = {
>+	.open = simple_open,
>+	.read = nsim_dev_peer_read,
>+	.write = nsim_dev_peer_write,
>+	.llseek = generic_file_llseek,
>+	.owner = THIS_MODULE,
>+};
>+
> static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
> 				      struct nsim_dev_port *nsim_dev_port)
> {
>@@ -421,6 +539,9 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
> 	}
> 	debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name);
> 
>+	debugfs_create_file("peer", 0600, nsim_dev_port->ddir,
>+			    nsim_dev_port, &nsim_dev_peer_fops);
>+
> 	return 0;
> }
> 
>@@ -1704,19 +1825,6 @@ void nsim_drv_remove(struct nsim_bus_dev *nsim_bus_dev)
> 	dev_set_drvdata(&nsim_bus_dev->dev, NULL);
> }
> 
>-static struct nsim_dev_port *
>-__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type,
>-		       unsigned int port_index)
>-{
>-	struct nsim_dev_port *nsim_dev_port;
>-
>-	port_index = nsim_dev_port_index(type, port_index);
>-	list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list)
>-		if (nsim_dev_port->port_index == port_index)
>-			return nsim_dev_port;
>-	return NULL;
>-}
>-
> int nsim_drv_port_add(struct nsim_bus_dev *nsim_bus_dev, enum nsim_dev_port_type type,
> 		      unsigned int port_index)
> {
>diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
>index aecaf5f44374..434322f6a565 100644
>--- a/drivers/net/netdevsim/netdev.c
>+++ b/drivers/net/netdevsim/netdev.c
>@@ -388,6 +388,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
> 	ns->nsim_dev = nsim_dev;
> 	ns->nsim_dev_port = nsim_dev_port;
> 	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
>+	RCU_INIT_POINTER(ns->peer, NULL);
> 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
> 	SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port);
> 	nsim_ethtool_init(ns);
>@@ -407,8 +408,13 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
> void nsim_destroy(struct netdevsim *ns)
> {
> 	struct net_device *dev = ns->netdev;
>+	struct netdevsim *peer;
> 
> 	rtnl_lock();
>+	peer = rtnl_dereference(ns->peer);
>+	if (peer)
>+		RCU_INIT_POINTER(peer->peer, NULL);
>+	RCU_INIT_POINTER(ns->peer, NULL);
> 	unregister_netdevice(dev);
> 	if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
> 		nsim_macsec_teardown(ns);
>diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
>index babb61d7790b..24fc3fbda791 100644
>--- a/drivers/net/netdevsim/netdevsim.h
>+++ b/drivers/net/netdevsim/netdevsim.h
>@@ -125,6 +125,7 @@ struct netdevsim {
> 	} udp_ports;
> 
> 	struct nsim_ethtool ethtool;
>+	struct netdevsim __rcu *peer;
> };
> 
> struct netdevsim *
>-- 
>2.39.3
>
David Wei Jan. 3, 2024, 9:56 p.m. UTC | #2
On 2024-01-02 03:11, Jiri Pirko wrote:
> Thu, Dec 28, 2023 at 02:46:30AM CET, dw@davidwei.uk wrote:
>> Add a debugfs file in
>> /sys/kernel/debug/netdevsim/netdevsimN/ports/A/peer
>>
>> Writing "M B" to this file will link port A of netdevsim N with port B
>> of netdevsim M. Reading this file will return the linked netdevsim id
>> and port, if any.
>>
>> During nsim_dev_peer_write(), nsim_dev_list_lock prevents concurrent
>> modifications to nsim_dev and peer's devlink->lock prevents concurrent
>> modifications to the peer's port_list. rtnl_lock ensures netdevices do
>> not change during the critical section where a link is established.
>>
>> The lock order is consistent with other parts that touch netdevsim and
>> should not deadlock.
>>
>> During nsim_dev_peer_read(), RCU read critical section ensures valid
>> values even if stale.
>>
>> Signed-off-by: David Wei <dw@davidwei.uk>
>> ---
>> drivers/net/netdevsim/dev.c       | 134 +++++++++++++++++++++++++++---
>> drivers/net/netdevsim/netdev.c    |   6 ++
>> drivers/net/netdevsim/netdevsim.h |   1 +
>> 3 files changed, 128 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
>> index 8d477aa99f94..6d5e4ce08dfd 100644
>> --- a/drivers/net/netdevsim/dev.c
>> +++ b/drivers/net/netdevsim/dev.c
>> @@ -391,6 +391,124 @@ static const struct file_operations nsim_dev_rate_parent_fops = {
>> 	.owner = THIS_MODULE,
>> };
>>
>> +static struct nsim_dev *nsim_dev_find_by_id(unsigned int id)
>> +{
>> +	struct nsim_dev *dev;
>> +
>> +	list_for_each_entry(dev, &nsim_dev_list, list)
>> +		if (dev->nsim_bus_dev->dev.id == id)
>> +			return dev;
>> +
>> +	return NULL;
>> +}
>> +
>> +static struct nsim_dev_port *
>> +__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type,
>> +		       unsigned int port_index)
>> +{
>> +	struct nsim_dev_port *nsim_dev_port;
>> +
>> +	port_index = nsim_dev_port_index(type, port_index);
>> +	list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list)
>> +		if (nsim_dev_port->port_index == port_index)
>> +			return nsim_dev_port;
>> +	return NULL;
>> +}
>> +
>> +static ssize_t nsim_dev_peer_read(struct file *file, char __user *data,
>> +				  size_t count, loff_t *ppos)
>> +{
>> +	struct nsim_dev_port *nsim_dev_port;
>> +	struct netdevsim *peer;
>> +	unsigned int id, port;
>> +	ssize_t ret = 0;
>> +	char buf[23];
>> +
>> +	nsim_dev_port = file->private_data;
>> +	rcu_read_lock();
>> +	peer = rcu_dereference(nsim_dev_port->ns->peer);
>> +	if (!peer) {
>> +		rcu_read_unlock();
>> +		return 0;
>> +	}
>> +
>> +	id = peer->nsim_bus_dev->dev.id;
>> +	port = peer->nsim_dev_port->port_index;
>> +	ret = scnprintf(buf, sizeof(buf), "%u %u\n", id, port);
>> +	ret = simple_read_from_buffer(data, count, ppos, buf, ret);
>> +
>> +	rcu_read_unlock();
>> +	return ret;
>> +}
>> +
>> +static ssize_t nsim_dev_peer_write(struct file *file,
>> +				   const char __user *data,
>> +				   size_t count, loff_t *ppos)
>> +{
>> +	struct nsim_dev_port *nsim_dev_port, *peer_dev_port;
>> +	struct nsim_dev *peer_dev;
>> +	unsigned int id, port;
>> +	char buf[22];
>> +	ssize_t ret;
>> +
>> +	if (count >= sizeof(buf))
>> +		return -ENOSPC;
>> +
>> +	ret = copy_from_user(buf, data, count);
>> +	if (ret)
>> +		return -EFAULT;
>> +	buf[count] = '\0';
>> +
>> +	ret = sscanf(buf, "%u %u", &id, &port);
>> +	if (ret != 2) {
>> +		pr_err("Format is peer netdevsim \"id port\" (uint uint)\n");
>> +		return -EINVAL;
>> +	}
>> +
>> +	ret = -EINVAL;
>> +	mutex_lock(&nsim_dev_list_lock);
>> +	peer_dev = nsim_dev_find_by_id(id);
>> +	if (!peer_dev) {
>> +		pr_err("Peer netdevsim %u does not exist\n", id);
>> +		goto out_mutex;
>> +	}
>> +
>> +	devl_lock(priv_to_devlink(peer_dev));
> 
> Why exactly do you take devlink instance mutex of the peer here?

To make sure that port list do not change. Ports can be added or removed
at will from nsim_drv_port_add() and nsim_drv_port_del() which both take
the devlink lock.

> 
> 
>> +	rtnl_lock();
>> +	nsim_dev_port = file->private_data;
>> +	peer_dev_port = __nsim_dev_port_lookup(peer_dev, NSIM_DEV_PORT_TYPE_PF,
>> +					       port);
>> +	if (!peer_dev_port) {
>> +		pr_err("Peer netdevsim %u port %u does not exist\n", id, port);
>> +		goto out_devl;
>> +	}
>> +
>> +	if (nsim_dev_port == peer_dev_port) {
>> +		pr_err("Cannot link netdevsim to itself\n");
>> +		goto out_devl;
>> +	}
>> +
>> +	rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns);
>> +	rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns);
>> +	ret = count;
>> +
>> +out_devl:
>> +	rtnl_unlock();
>> +	devl_unlock(priv_to_devlink(peer_dev));
>> +out_mutex:
>> +	mutex_unlock(&nsim_dev_list_lock);
>> +
>> +	return ret;
>> +}
>> +
>> +static const struct file_operations nsim_dev_peer_fops = {
>> +	.open = simple_open,
>> +	.read = nsim_dev_peer_read,
>> +	.write = nsim_dev_peer_write,
>> +	.llseek = generic_file_llseek,
>> +	.owner = THIS_MODULE,
>> +};
>> +
>> static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
>> 				      struct nsim_dev_port *nsim_dev_port)
>> {
>> @@ -421,6 +539,9 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
>> 	}
>> 	debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name);
>>
>> +	debugfs_create_file("peer", 0600, nsim_dev_port->ddir,
>> +			    nsim_dev_port, &nsim_dev_peer_fops);
>> +
>> 	return 0;
>> }
>>
>> @@ -1704,19 +1825,6 @@ void nsim_drv_remove(struct nsim_bus_dev *nsim_bus_dev)
>> 	dev_set_drvdata(&nsim_bus_dev->dev, NULL);
>> }
>>
>> -static struct nsim_dev_port *
>> -__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type,
>> -		       unsigned int port_index)
>> -{
>> -	struct nsim_dev_port *nsim_dev_port;
>> -
>> -	port_index = nsim_dev_port_index(type, port_index);
>> -	list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list)
>> -		if (nsim_dev_port->port_index == port_index)
>> -			return nsim_dev_port;
>> -	return NULL;
>> -}
>> -
>> int nsim_drv_port_add(struct nsim_bus_dev *nsim_bus_dev, enum nsim_dev_port_type type,
>> 		      unsigned int port_index)
>> {
>> diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
>> index aecaf5f44374..434322f6a565 100644
>> --- a/drivers/net/netdevsim/netdev.c
>> +++ b/drivers/net/netdevsim/netdev.c
>> @@ -388,6 +388,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
>> 	ns->nsim_dev = nsim_dev;
>> 	ns->nsim_dev_port = nsim_dev_port;
>> 	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
>> +	RCU_INIT_POINTER(ns->peer, NULL);
>> 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
>> 	SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port);
>> 	nsim_ethtool_init(ns);
>> @@ -407,8 +408,13 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
>> void nsim_destroy(struct netdevsim *ns)
>> {
>> 	struct net_device *dev = ns->netdev;
>> +	struct netdevsim *peer;
>>
>> 	rtnl_lock();
>> +	peer = rtnl_dereference(ns->peer);
>> +	if (peer)
>> +		RCU_INIT_POINTER(peer->peer, NULL);
>> +	RCU_INIT_POINTER(ns->peer, NULL);
>> 	unregister_netdevice(dev);
>> 	if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
>> 		nsim_macsec_teardown(ns);
>> diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
>> index babb61d7790b..24fc3fbda791 100644
>> --- a/drivers/net/netdevsim/netdevsim.h
>> +++ b/drivers/net/netdevsim/netdevsim.h
>> @@ -125,6 +125,7 @@ struct netdevsim {
>> 	} udp_ports;
>>
>> 	struct nsim_ethtool ethtool;
>> +	struct netdevsim __rcu *peer;
>> };
>>
>> struct netdevsim *
>> -- 
>> 2.39.3
>>
Jakub Kicinski Jan. 4, 2024, 1:39 a.m. UTC | #3
On Wed, 27 Dec 2023 17:46:30 -0800 David Wei wrote:
> +static ssize_t nsim_dev_peer_write(struct file *file,
> +				   const char __user *data,
> +				   size_t count, loff_t *ppos)
> +{
> +	struct nsim_dev_port *nsim_dev_port, *peer_dev_port;
> +	struct nsim_dev *peer_dev;
> +	unsigned int id, port;
> +	char buf[22];
> +	ssize_t ret;
> +
> +	if (count >= sizeof(buf))
> +		return -ENOSPC;
> +
> +	ret = copy_from_user(buf, data, count);
> +	if (ret)
> +		return -EFAULT;
> +	buf[count] = '\0';
> +
> +	ret = sscanf(buf, "%u %u", &id, &port);
> +	if (ret != 2) {
> +		pr_err("Format is peer netdevsim \"id port\" (uint uint)\n");

netif_err() or dev_err() ? Granted the rest of the file seems to use
pr_err(), but I'm not sure why...

> +		return -EINVAL;
> +	}

Could you put a sleep() here and test removing the device while some
thread is stuck here? I don't recall exactly but I thought debugfs
remove waits for concurrent reads and writes which could be problematic
given we take all the locks under the sun here..

> +	ret = -EINVAL;
> +	mutex_lock(&nsim_dev_list_lock);
> +	peer_dev = nsim_dev_find_by_id(id);
> +	if (!peer_dev) {
> +		pr_err("Peer netdevsim %u does not exist\n", id);
> +		goto out_mutex;
> +	}
> +
> +	devl_lock(priv_to_devlink(peer_dev));
> +	rtnl_lock();
> +	nsim_dev_port = file->private_data;
> +	peer_dev_port = __nsim_dev_port_lookup(peer_dev, NSIM_DEV_PORT_TYPE_PF,
> +					       port);
> +	if (!peer_dev_port) {
> +		pr_err("Peer netdevsim %u port %u does not exist\n", id, port);
> +		goto out_devl;
> +	}
> +
> +	if (nsim_dev_port == peer_dev_port) {
> +		pr_err("Cannot link netdevsim to itself\n");
> +		goto out_devl;
> +	}
> +
> +	rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns);
> +	rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns);
> +	ret = count;
> +
> +out_devl:

out_unlock_rtnl

> +	rtnl_unlock();
> +	devl_unlock(priv_to_devlink(peer_dev));
> +out_mutex:

out_unlock_dev_list

> +	mutex_unlock(&nsim_dev_list_lock);
> +
> +	return ret;
> +}
> +
> +static const struct file_operations nsim_dev_peer_fops = {
> +	.open = simple_open,
> +	.read = nsim_dev_peer_read,
> +	.write = nsim_dev_peer_write,
> +	.llseek = generic_file_llseek,

You don't support seek, you want some form of no_seek here.

> +	.owner = THIS_MODULE,
> +};
Jiri Pirko Jan. 4, 2024, 9:30 a.m. UTC | #4
Wed, Jan 03, 2024 at 10:56:36PM CET, dw@davidwei.uk wrote:
>On 2024-01-02 03:11, Jiri Pirko wrote:
>> Thu, Dec 28, 2023 at 02:46:30AM CET, dw@davidwei.uk wrote:
>>> Add a debugfs file in
>>> /sys/kernel/debug/netdevsim/netdevsimN/ports/A/peer
>>>
>>> Writing "M B" to this file will link port A of netdevsim N with port B
>>> of netdevsim M. Reading this file will return the linked netdevsim id
>>> and port, if any.
>>>
>>> During nsim_dev_peer_write(), nsim_dev_list_lock prevents concurrent
>>> modifications to nsim_dev and peer's devlink->lock prevents concurrent
>>> modifications to the peer's port_list. rtnl_lock ensures netdevices do
>>> not change during the critical section where a link is established.
>>>
>>> The lock order is consistent with other parts that touch netdevsim and
>>> should not deadlock.
>>>
>>> During nsim_dev_peer_read(), RCU read critical section ensures valid
>>> values even if stale.
>>>
>>> Signed-off-by: David Wei <dw@davidwei.uk>
>>> ---
>>> drivers/net/netdevsim/dev.c       | 134 +++++++++++++++++++++++++++---
>>> drivers/net/netdevsim/netdev.c    |   6 ++
>>> drivers/net/netdevsim/netdevsim.h |   1 +
>>> 3 files changed, 128 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
>>> index 8d477aa99f94..6d5e4ce08dfd 100644
>>> --- a/drivers/net/netdevsim/dev.c
>>> +++ b/drivers/net/netdevsim/dev.c
>>> @@ -391,6 +391,124 @@ static const struct file_operations nsim_dev_rate_parent_fops = {
>>> 	.owner = THIS_MODULE,
>>> };
>>>
>>> +static struct nsim_dev *nsim_dev_find_by_id(unsigned int id)
>>> +{
>>> +	struct nsim_dev *dev;
>>> +
>>> +	list_for_each_entry(dev, &nsim_dev_list, list)
>>> +		if (dev->nsim_bus_dev->dev.id == id)
>>> +			return dev;
>>> +
>>> +	return NULL;
>>> +}
>>> +
>>> +static struct nsim_dev_port *
>>> +__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type,
>>> +		       unsigned int port_index)
>>> +{
>>> +	struct nsim_dev_port *nsim_dev_port;
>>> +
>>> +	port_index = nsim_dev_port_index(type, port_index);
>>> +	list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list)
>>> +		if (nsim_dev_port->port_index == port_index)
>>> +			return nsim_dev_port;
>>> +	return NULL;
>>> +}
>>> +
>>> +static ssize_t nsim_dev_peer_read(struct file *file, char __user *data,
>>> +				  size_t count, loff_t *ppos)
>>> +{
>>> +	struct nsim_dev_port *nsim_dev_port;
>>> +	struct netdevsim *peer;
>>> +	unsigned int id, port;
>>> +	ssize_t ret = 0;
>>> +	char buf[23];
>>> +
>>> +	nsim_dev_port = file->private_data;
>>> +	rcu_read_lock();
>>> +	peer = rcu_dereference(nsim_dev_port->ns->peer);
>>> +	if (!peer) {
>>> +		rcu_read_unlock();
>>> +		return 0;
>>> +	}
>>> +
>>> +	id = peer->nsim_bus_dev->dev.id;
>>> +	port = peer->nsim_dev_port->port_index;
>>> +	ret = scnprintf(buf, sizeof(buf), "%u %u\n", id, port);
>>> +	ret = simple_read_from_buffer(data, count, ppos, buf, ret);
>>> +
>>> +	rcu_read_unlock();
>>> +	return ret;
>>> +}
>>> +
>>> +static ssize_t nsim_dev_peer_write(struct file *file,
>>> +				   const char __user *data,
>>> +				   size_t count, loff_t *ppos)
>>> +{
>>> +	struct nsim_dev_port *nsim_dev_port, *peer_dev_port;
>>> +	struct nsim_dev *peer_dev;
>>> +	unsigned int id, port;
>>> +	char buf[22];
>>> +	ssize_t ret;
>>> +
>>> +	if (count >= sizeof(buf))
>>> +		return -ENOSPC;
>>> +
>>> +	ret = copy_from_user(buf, data, count);
>>> +	if (ret)
>>> +		return -EFAULT;
>>> +	buf[count] = '\0';
>>> +
>>> +	ret = sscanf(buf, "%u %u", &id, &port);
>>> +	if (ret != 2) {
>>> +		pr_err("Format is peer netdevsim \"id port\" (uint uint)\n");
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	ret = -EINVAL;
>>> +	mutex_lock(&nsim_dev_list_lock);
>>> +	peer_dev = nsim_dev_find_by_id(id);
>>> +	if (!peer_dev) {
>>> +		pr_err("Peer netdevsim %u does not exist\n", id);
>>> +		goto out_mutex;
>>> +	}
>>> +
>>> +	devl_lock(priv_to_devlink(peer_dev));
>> 
>> Why exactly do you take devlink instance mutex of the peer here?
>
>To make sure that port list do not change. Ports can be added or removed
>at will from nsim_drv_port_add() and nsim_drv_port_del() which both take
>the devlink lock.

Ok.

>
>> 
>> 
>>> +	rtnl_lock();
>>> +	nsim_dev_port = file->private_data;
>>> +	peer_dev_port = __nsim_dev_port_lookup(peer_dev, NSIM_DEV_PORT_TYPE_PF,
>>> +					       port);
>>> +	if (!peer_dev_port) {
>>> +		pr_err("Peer netdevsim %u port %u does not exist\n", id, port);
>>> +		goto out_devl;
>>> +	}
>>> +
>>> +	if (nsim_dev_port == peer_dev_port) {
>>> +		pr_err("Cannot link netdevsim to itself\n");
>>> +		goto out_devl;
>>> +	}
>>> +
>>> +	rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns);
>>> +	rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns);
>>> +	ret = count;
>>> +
>>> +out_devl:
>>> +	rtnl_unlock();
>>> +	devl_unlock(priv_to_devlink(peer_dev));
>>> +out_mutex:
>>> +	mutex_unlock(&nsim_dev_list_lock);
>>> +
>>> +	return ret;
>>> +}
>>> +
>>> +static const struct file_operations nsim_dev_peer_fops = {
>>> +	.open = simple_open,
>>> +	.read = nsim_dev_peer_read,
>>> +	.write = nsim_dev_peer_write,
>>> +	.llseek = generic_file_llseek,
>>> +	.owner = THIS_MODULE,
>>> +};
>>> +
>>> static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
>>> 				      struct nsim_dev_port *nsim_dev_port)
>>> {
>>> @@ -421,6 +539,9 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
>>> 	}
>>> 	debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name);
>>>
>>> +	debugfs_create_file("peer", 0600, nsim_dev_port->ddir,
>>> +			    nsim_dev_port, &nsim_dev_peer_fops);
>>> +
>>> 	return 0;
>>> }
>>>
>>> @@ -1704,19 +1825,6 @@ void nsim_drv_remove(struct nsim_bus_dev *nsim_bus_dev)
>>> 	dev_set_drvdata(&nsim_bus_dev->dev, NULL);
>>> }
>>>
>>> -static struct nsim_dev_port *
>>> -__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type,
>>> -		       unsigned int port_index)
>>> -{
>>> -	struct nsim_dev_port *nsim_dev_port;
>>> -
>>> -	port_index = nsim_dev_port_index(type, port_index);
>>> -	list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list)
>>> -		if (nsim_dev_port->port_index == port_index)
>>> -			return nsim_dev_port;
>>> -	return NULL;
>>> -}
>>> -
>>> int nsim_drv_port_add(struct nsim_bus_dev *nsim_bus_dev, enum nsim_dev_port_type type,
>>> 		      unsigned int port_index)
>>> {
>>> diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
>>> index aecaf5f44374..434322f6a565 100644
>>> --- a/drivers/net/netdevsim/netdev.c
>>> +++ b/drivers/net/netdevsim/netdev.c
>>> @@ -388,6 +388,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
>>> 	ns->nsim_dev = nsim_dev;
>>> 	ns->nsim_dev_port = nsim_dev_port;
>>> 	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
>>> +	RCU_INIT_POINTER(ns->peer, NULL);
>>> 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
>>> 	SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port);
>>> 	nsim_ethtool_init(ns);
>>> @@ -407,8 +408,13 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
>>> void nsim_destroy(struct netdevsim *ns)
>>> {
>>> 	struct net_device *dev = ns->netdev;
>>> +	struct netdevsim *peer;
>>>
>>> 	rtnl_lock();
>>> +	peer = rtnl_dereference(ns->peer);
>>> +	if (peer)
>>> +		RCU_INIT_POINTER(peer->peer, NULL);
>>> +	RCU_INIT_POINTER(ns->peer, NULL);
>>> 	unregister_netdevice(dev);
>>> 	if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
>>> 		nsim_macsec_teardown(ns);
>>> diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
>>> index babb61d7790b..24fc3fbda791 100644
>>> --- a/drivers/net/netdevsim/netdevsim.h
>>> +++ b/drivers/net/netdevsim/netdevsim.h
>>> @@ -125,6 +125,7 @@ struct netdevsim {
>>> 	} udp_ports;
>>>
>>> 	struct nsim_ethtool ethtool;
>>> +	struct netdevsim __rcu *peer;
>>> };
>>>
>>> struct netdevsim *
>>> -- 
>>> 2.39.3
>>>
David Wei Jan. 9, 2024, 4:57 p.m. UTC | #5
On 2024-01-03 17:39, Jakub Kicinski wrote:
> On Wed, 27 Dec 2023 17:46:30 -0800 David Wei wrote:
>> +static ssize_t nsim_dev_peer_write(struct file *file,
>> +				   const char __user *data,
>> +				   size_t count, loff_t *ppos)
>> +{
>> +	struct nsim_dev_port *nsim_dev_port, *peer_dev_port;
>> +	struct nsim_dev *peer_dev;
>> +	unsigned int id, port;
>> +	char buf[22];
>> +	ssize_t ret;
>> +
>> +	if (count >= sizeof(buf))
>> +		return -ENOSPC;
>> +
>> +	ret = copy_from_user(buf, data, count);
>> +	if (ret)
>> +		return -EFAULT;
>> +	buf[count] = '\0';
>> +
>> +	ret = sscanf(buf, "%u %u", &id, &port);
>> +	if (ret != 2) {
>> +		pr_err("Format is peer netdevsim \"id port\" (uint uint)\n");
> 
> netif_err() or dev_err() ? Granted the rest of the file seems to use
> pr_err(), but I'm not sure why...

I can change it to use one of these two in this patchset, then I can
chnage the others separately in another patch. How does that sound?

> 
>> +		return -EINVAL;
>> +	}
> 
> Could you put a sleep() here and test removing the device while some
> thread is stuck here? I don't recall exactly but I thought debugfs
> remove waits for concurrent reads and writes which could be problematic
> given we take all the locks under the sun here..

Yep, I'll test this.

> 
>> +	ret = -EINVAL;
>> +	mutex_lock(&nsim_dev_list_lock);
>> +	peer_dev = nsim_dev_find_by_id(id);
>> +	if (!peer_dev) {
>> +		pr_err("Peer netdevsim %u does not exist\n", id);
>> +		goto out_mutex;
>> +	}
>> +
>> +	devl_lock(priv_to_devlink(peer_dev));
>> +	rtnl_lock();
>> +	nsim_dev_port = file->private_data;
>> +	peer_dev_port = __nsim_dev_port_lookup(peer_dev, NSIM_DEV_PORT_TYPE_PF,
>> +					       port);
>> +	if (!peer_dev_port) {
>> +		pr_err("Peer netdevsim %u port %u does not exist\n", id, port);
>> +		goto out_devl;
>> +	}
>> +
>> +	if (nsim_dev_port == peer_dev_port) {
>> +		pr_err("Cannot link netdevsim to itself\n");
>> +		goto out_devl;
>> +	}
>> +
>> +	rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns);
>> +	rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns);
>> +	ret = count;
>> +
>> +out_devl:
> 
> out_unlock_rtnl
> 
>> +	rtnl_unlock();
>> +	devl_unlock(priv_to_devlink(peer_dev));
>> +out_mutex:
> 
> out_unlock_dev_list
> 
>> +	mutex_unlock(&nsim_dev_list_lock);
>> +
>> +	return ret;
>> +}
>> +
>> +static const struct file_operations nsim_dev_peer_fops = {
>> +	.open = simple_open,
>> +	.read = nsim_dev_peer_read,
>> +	.write = nsim_dev_peer_write,
>> +	.llseek = generic_file_llseek,
> 
> You don't support seek, you want some form of no_seek here.
> 
>> +	.owner = THIS_MODULE,
>> +};
Jakub Kicinski Jan. 10, 2024, 1:53 a.m. UTC | #6
On Tue, 9 Jan 2024 08:57:59 -0800 David Wei wrote:
> >> +	ret = sscanf(buf, "%u %u", &id, &port);
> >> +	if (ret != 2) {
> >> +		pr_err("Format is peer netdevsim \"id port\" (uint uint)\n");  
> > 
> > netif_err() or dev_err() ? Granted the rest of the file seems to use
> > pr_err(), but I'm not sure why...  
> 
> I can change it to use one of these two in this patchset, then I can
> chnage the others separately in another patch. How does that sound?

Separate patch and separate series. Let's not load more unrelated
patches into this series :)
diff mbox series

Patch

diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 8d477aa99f94..6d5e4ce08dfd 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -391,6 +391,124 @@  static const struct file_operations nsim_dev_rate_parent_fops = {
 	.owner = THIS_MODULE,
 };
 
+static struct nsim_dev *nsim_dev_find_by_id(unsigned int id)
+{
+	struct nsim_dev *dev;
+
+	list_for_each_entry(dev, &nsim_dev_list, list)
+		if (dev->nsim_bus_dev->dev.id == id)
+			return dev;
+
+	return NULL;
+}
+
+static struct nsim_dev_port *
+__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type,
+		       unsigned int port_index)
+{
+	struct nsim_dev_port *nsim_dev_port;
+
+	port_index = nsim_dev_port_index(type, port_index);
+	list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list)
+		if (nsim_dev_port->port_index == port_index)
+			return nsim_dev_port;
+	return NULL;
+}
+
+static ssize_t nsim_dev_peer_read(struct file *file, char __user *data,
+				  size_t count, loff_t *ppos)
+{
+	struct nsim_dev_port *nsim_dev_port;
+	struct netdevsim *peer;
+	unsigned int id, port;
+	ssize_t ret = 0;
+	char buf[23];
+
+	nsim_dev_port = file->private_data;
+	rcu_read_lock();
+	peer = rcu_dereference(nsim_dev_port->ns->peer);
+	if (!peer) {
+		rcu_read_unlock();
+		return 0;
+	}
+
+	id = peer->nsim_bus_dev->dev.id;
+	port = peer->nsim_dev_port->port_index;
+	ret = scnprintf(buf, sizeof(buf), "%u %u\n", id, port);
+	ret = simple_read_from_buffer(data, count, ppos, buf, ret);
+
+	rcu_read_unlock();
+	return ret;
+}
+
+static ssize_t nsim_dev_peer_write(struct file *file,
+				   const char __user *data,
+				   size_t count, loff_t *ppos)
+{
+	struct nsim_dev_port *nsim_dev_port, *peer_dev_port;
+	struct nsim_dev *peer_dev;
+	unsigned int id, port;
+	char buf[22];
+	ssize_t ret;
+
+	if (count >= sizeof(buf))
+		return -ENOSPC;
+
+	ret = copy_from_user(buf, data, count);
+	if (ret)
+		return -EFAULT;
+	buf[count] = '\0';
+
+	ret = sscanf(buf, "%u %u", &id, &port);
+	if (ret != 2) {
+		pr_err("Format is peer netdevsim \"id port\" (uint uint)\n");
+		return -EINVAL;
+	}
+
+	ret = -EINVAL;
+	mutex_lock(&nsim_dev_list_lock);
+	peer_dev = nsim_dev_find_by_id(id);
+	if (!peer_dev) {
+		pr_err("Peer netdevsim %u does not exist\n", id);
+		goto out_mutex;
+	}
+
+	devl_lock(priv_to_devlink(peer_dev));
+	rtnl_lock();
+	nsim_dev_port = file->private_data;
+	peer_dev_port = __nsim_dev_port_lookup(peer_dev, NSIM_DEV_PORT_TYPE_PF,
+					       port);
+	if (!peer_dev_port) {
+		pr_err("Peer netdevsim %u port %u does not exist\n", id, port);
+		goto out_devl;
+	}
+
+	if (nsim_dev_port == peer_dev_port) {
+		pr_err("Cannot link netdevsim to itself\n");
+		goto out_devl;
+	}
+
+	rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns);
+	rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns);
+	ret = count;
+
+out_devl:
+	rtnl_unlock();
+	devl_unlock(priv_to_devlink(peer_dev));
+out_mutex:
+	mutex_unlock(&nsim_dev_list_lock);
+
+	return ret;
+}
+
+static const struct file_operations nsim_dev_peer_fops = {
+	.open = simple_open,
+	.read = nsim_dev_peer_read,
+	.write = nsim_dev_peer_write,
+	.llseek = generic_file_llseek,
+	.owner = THIS_MODULE,
+};
+
 static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
 				      struct nsim_dev_port *nsim_dev_port)
 {
@@ -421,6 +539,9 @@  static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
 	}
 	debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name);
 
+	debugfs_create_file("peer", 0600, nsim_dev_port->ddir,
+			    nsim_dev_port, &nsim_dev_peer_fops);
+
 	return 0;
 }
 
@@ -1704,19 +1825,6 @@  void nsim_drv_remove(struct nsim_bus_dev *nsim_bus_dev)
 	dev_set_drvdata(&nsim_bus_dev->dev, NULL);
 }
 
-static struct nsim_dev_port *
-__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type,
-		       unsigned int port_index)
-{
-	struct nsim_dev_port *nsim_dev_port;
-
-	port_index = nsim_dev_port_index(type, port_index);
-	list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list)
-		if (nsim_dev_port->port_index == port_index)
-			return nsim_dev_port;
-	return NULL;
-}
-
 int nsim_drv_port_add(struct nsim_bus_dev *nsim_bus_dev, enum nsim_dev_port_type type,
 		      unsigned int port_index)
 {
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index aecaf5f44374..434322f6a565 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -388,6 +388,7 @@  nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
 	ns->nsim_dev = nsim_dev;
 	ns->nsim_dev_port = nsim_dev_port;
 	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
+	RCU_INIT_POINTER(ns->peer, NULL);
 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
 	SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port);
 	nsim_ethtool_init(ns);
@@ -407,8 +408,13 @@  nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
 void nsim_destroy(struct netdevsim *ns)
 {
 	struct net_device *dev = ns->netdev;
+	struct netdevsim *peer;
 
 	rtnl_lock();
+	peer = rtnl_dereference(ns->peer);
+	if (peer)
+		RCU_INIT_POINTER(peer->peer, NULL);
+	RCU_INIT_POINTER(ns->peer, NULL);
 	unregister_netdevice(dev);
 	if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
 		nsim_macsec_teardown(ns);
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index babb61d7790b..24fc3fbda791 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -125,6 +125,7 @@  struct netdevsim {
 	} udp_ports;
 
 	struct nsim_ethtool ethtool;
+	struct netdevsim __rcu *peer;
 };
 
 struct netdevsim *