diff mbox series

[PATCHv3,net-next] bonding: add software tx timestamping support

Message ID 20230410082351.1176466-1-liuhangbin@gmail.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [PATCHv3,net-next] bonding: add software tx timestamping support | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 2613 this patch: 2613
netdev/cc_maintainers warning 2 maintainers not CCed: willemdebruijn.kernel@gmail.com andy@greyhouse.net
netdev/build_clang success Errors and warnings before: 539 this patch: 539
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 2755 this patch: 2755
netdev/checkpatch warning WARNING: line length of 92 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Hangbin Liu April 10, 2023, 8:23 a.m. UTC
Currently, bonding only obtain the timestamp (ts) information of
the active slave, which is available only for modes 1, 5, and 6.
For other modes, bonding only has software rx timestamping support.

However, some users who use modes such as LACP also want tx timestamp
support. To address this issue, let's check the ts information of each
slave. If all slaves support tx timestamping, we can enable tx
timestamping support for the bond.

Suggested-by: Miroslav Lichvar <mlichvar@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
v3: remove dev_hold/dev_put. remove the '\' for line continuation.
v2: check each slave's ts info to make sure bond support sw tx
    timestamping
---
 drivers/net/bonding/bond_main.c | 36 +++++++++++++++++++++++++++++++--
 include/uapi/linux/net_tstamp.h |  3 +++
 2 files changed, 37 insertions(+), 2 deletions(-)

Comments

Jay Vosburgh April 12, 2023, 12:19 a.m. UTC | #1
Hangbin Liu <liuhangbin@gmail.com> wrote:

>Currently, bonding only obtain the timestamp (ts) information of
>the active slave, which is available only for modes 1, 5, and 6.
>For other modes, bonding only has software rx timestamping support.
>
>However, some users who use modes such as LACP also want tx timestamp
>support. To address this issue, let's check the ts information of each
>slave. If all slaves support tx timestamping, we can enable tx
>timestamping support for the bond.
>
>Suggested-by: Miroslav Lichvar <mlichvar@redhat.com>
>Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>

Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>

>---
>v3: remove dev_hold/dev_put. remove the '\' for line continuation.
>v2: check each slave's ts info to make sure bond support sw tx
>    timestamping
>---
> drivers/net/bonding/bond_main.c | 36 +++++++++++++++++++++++++++++++--
> include/uapi/linux/net_tstamp.h |  3 +++
> 2 files changed, 37 insertions(+), 2 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 00646aa315c3..3b643739bbe7 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -5686,9 +5686,13 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
> 				    struct ethtool_ts_info *info)
> {
> 	struct bonding *bond = netdev_priv(bond_dev);
>+	struct ethtool_ts_info ts_info;
> 	const struct ethtool_ops *ops;
> 	struct net_device *real_dev;
> 	struct phy_device *phydev;
>+	bool soft_support = false;
>+	struct list_head *iter;
>+	struct slave *slave;
> 	int ret = 0;
> 
> 	rcu_read_lock();
>@@ -5707,10 +5711,38 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
> 			ret = ops->get_ts_info(real_dev, info);
> 			goto out;
> 		}
>+	} else {
>+		/* Check if all slaves support software rx/tx timestamping */
>+		rcu_read_lock();
>+		bond_for_each_slave_rcu(bond, slave, iter) {
>+			ret = -1;
>+			ops = slave->dev->ethtool_ops;
>+			phydev = slave->dev->phydev;
>+
>+			if (phy_has_tsinfo(phydev))
>+				ret = phy_ts_info(phydev, &ts_info);
>+			else if (ops->get_ts_info)
>+				ret = ops->get_ts_info(slave->dev, &ts_info);
>+
>+			if (!ret && (ts_info.so_timestamping & SOF_TIMESTAMPING_SOFTRXTX) ==
>+				    SOF_TIMESTAMPING_SOFTRXTX) {
>+				soft_support = true;
>+				continue;
>+			}
>+
>+			soft_support = false;
>+			break;
>+		}
>+		rcu_read_unlock();
> 	}
> 
>-	info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
>-				SOF_TIMESTAMPING_SOFTWARE;
>+	ret = 0;
>+	if (soft_support) {
>+		info->so_timestamping = SOF_TIMESTAMPING_SOFTRXTX;
>+	} else {
>+		info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
>+					SOF_TIMESTAMPING_SOFTWARE;
>+	}
> 	info->phc_index = -1;
> 
> out:
>diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
>index a2c66b3d7f0f..2adaa0008434 100644
>--- a/include/uapi/linux/net_tstamp.h
>+++ b/include/uapi/linux/net_tstamp.h
>@@ -48,6 +48,9 @@ enum {
> 					 SOF_TIMESTAMPING_TX_SCHED | \
> 					 SOF_TIMESTAMPING_TX_ACK)
> 
>+#define SOF_TIMESTAMPING_SOFTRXTX (SOF_TIMESTAMPING_TX_SOFTWARE | \
>+				   SOF_TIMESTAMPING_RX_SOFTWARE | \
>+				   SOF_TIMESTAMPING_SOFTWARE)
> /**
>  * struct so_timestamping - SO_TIMESTAMPING parameter
>  *
>-- 
>2.38.1
>
Jakub Kicinski April 12, 2023, 4:30 a.m. UTC | #2
On Mon, 10 Apr 2023 16:23:51 +0800 Hangbin Liu wrote:
> @@ -5707,10 +5711,38 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
>  			ret = ops->get_ts_info(real_dev, info);
>  			goto out;
>  		}
> +	} else {
> +		/* Check if all slaves support software rx/tx timestamping */
> +		rcu_read_lock();
> +		bond_for_each_slave_rcu(bond, slave, iter) {
> +			ret = -1;
> +			ops = slave->dev->ethtool_ops;
> +			phydev = slave->dev->phydev;
> +
> +			if (phy_has_tsinfo(phydev))
> +				ret = phy_ts_info(phydev, &ts_info);
> +			else if (ops->get_ts_info)
> +				ret = ops->get_ts_info(slave->dev, &ts_info);

Do we _really_ need to hold RCU lock over this?
Imposing atomic context restrictions on driver callbacks should not be
taken lightly. I'm 75% sure .ethtool_get_ts_info can only be called
under rtnl lock off the top of my head, is that not the case?

> +			if (!ret && (ts_info.so_timestamping & SOF_TIMESTAMPING_SOFTRXTX) ==
> +				    SOF_TIMESTAMPING_SOFTRXTX) {

You could check in this loop if TX is supported...

> +				soft_support = true;
> +				continue;
> +			}
> +
> +			soft_support = false;
> +			break;
> +		}
> +		rcu_read_unlock();
>  	}
>  
> -	info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
> -				SOF_TIMESTAMPING_SOFTWARE;
> +	ret = 0;
> +	if (soft_support) {
> +		info->so_timestamping = SOF_TIMESTAMPING_SOFTRXTX;
> +	} else {
> +		info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
> +					SOF_TIMESTAMPING_SOFTWARE;

...make this unconditional and conditionally add TX...

> +	}
>  	info->phc_index = -1;
>  
>  out:
> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
> index a2c66b3d7f0f..2adaa0008434 100644
> --- a/include/uapi/linux/net_tstamp.h
> +++ b/include/uapi/linux/net_tstamp.h
> @@ -48,6 +48,9 @@ enum {
>  					 SOF_TIMESTAMPING_TX_SCHED | \
>  					 SOF_TIMESTAMPING_TX_ACK)
>  
> +#define SOF_TIMESTAMPING_SOFTRXTX (SOF_TIMESTAMPING_TX_SOFTWARE | \
> +				   SOF_TIMESTAMPING_RX_SOFTWARE | \
> +				   SOF_TIMESTAMPING_SOFTWARE)

..then you won't need this define in uAPI.
Jay Vosburgh April 12, 2023, 6:33 a.m. UTC | #3
Jakub Kicinski <kuba@kernel.org> wrote:

>On Mon, 10 Apr 2023 16:23:51 +0800 Hangbin Liu wrote:
>> @@ -5707,10 +5711,38 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
>>  			ret = ops->get_ts_info(real_dev, info);
>>  			goto out;
>>  		}
>> +	} else {
>> +		/* Check if all slaves support software rx/tx timestamping */
>> +		rcu_read_lock();
>> +		bond_for_each_slave_rcu(bond, slave, iter) {
>> +			ret = -1;
>> +			ops = slave->dev->ethtool_ops;
>> +			phydev = slave->dev->phydev;
>> +
>> +			if (phy_has_tsinfo(phydev))
>> +				ret = phy_ts_info(phydev, &ts_info);
>> +			else if (ops->get_ts_info)
>> +				ret = ops->get_ts_info(slave->dev, &ts_info);
>
>Do we _really_ need to hold RCU lock over this?
>Imposing atomic context restrictions on driver callbacks should not be
>taken lightly. I'm 75% sure .ethtool_get_ts_info can only be called
>under rtnl lock off the top of my head, is that not the case?

	Ok, maybe I didn't look at that carefully enough, and now that I
do, it's really complicated.

	Going through it, I think the call path that's relevant is
taprio_change -> taprio_parse_clockid -> ethtool_ops->get_ts_info.
taprio_change is Qdisc_ops.change function, and tc_modify_qdisc should
come in with RTNL held.

	If I'm reading cscope right, the other possible caller of
Qdisc_ops.change is fifo_set_limit, and it looks like that function is
only called by functions that are themselves Qdisc_ops.change functions
(red_change -> __red_change, sfb_change, tbf_change) or Qdisc_ops.init
functions (red_init -> __red_change, sfb_init, tbf_init).

	There's also a qdisc_create_dflt -> Qdisc_ops.init call path,
but I don't know if literally all calls to qdisc_create_dflt hold RTNL.

	There's a lot of them, and I'm not sure how many of those could
ever end up calling into taprio_change (if, say, a taprio qdisc is
attached within another qdisc).

	qdisc_create also calls Qdisc_ops.init, but that one seems to
clearly expect to enter with RTNL.

	Any tc expert able to state for sure whether it's possible to
get into any of the above without RTNL?  I suspect it isn't, but I'm not
100% sure either.


>> +			if (!ret && (ts_info.so_timestamping & SOF_TIMESTAMPING_SOFTRXTX) ==
>> +				    SOF_TIMESTAMPING_SOFTRXTX) {
>
>You could check in this loop if TX is supported...

	I see your point below about not wanting to create
SOFT_TIMESTAMPING_SOFTRXTX, but doesn't the logic need to test all three
of the flags _TX_SOFTWARE, _RX_SOFTWARE, and _SOFTWARE?

	-J

>> +				soft_support = true;
>> +				continue;
>> +			}
>> +
>> +			soft_support = false;
>> +			break;
>> +		}
>> +		rcu_read_unlock();
>>  	}
>>  
>> -	info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
>> -				SOF_TIMESTAMPING_SOFTWARE;
>> +	ret = 0;
>> +	if (soft_support) {
>> +		info->so_timestamping = SOF_TIMESTAMPING_SOFTRXTX;
>> +	} else {
>> +		info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
>> +					SOF_TIMESTAMPING_SOFTWARE;
>
>...make this unconditional and conditionally add TX...
>
>> +	}
>>  	info->phc_index = -1;
>>  
>>  out:
>> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
>> index a2c66b3d7f0f..2adaa0008434 100644
>> --- a/include/uapi/linux/net_tstamp.h
>> +++ b/include/uapi/linux/net_tstamp.h
>> @@ -48,6 +48,9 @@ enum {
>>  					 SOF_TIMESTAMPING_TX_SCHED | \
>>  					 SOF_TIMESTAMPING_TX_ACK)
>>  
>> +#define SOF_TIMESTAMPING_SOFTRXTX (SOF_TIMESTAMPING_TX_SOFTWARE | \
>> +				   SOF_TIMESTAMPING_RX_SOFTWARE | \
>> +				   SOF_TIMESTAMPING_SOFTWARE)
>
>..then you won't need this define in uAPI.

---
	-Jay Vosburgh, jay.vosburgh@canonical.com
Hangbin Liu April 12, 2023, 12:28 p.m. UTC | #4
On Tue, Apr 11, 2023 at 11:33:23PM -0700, Jay Vosburgh wrote:
> Jakub Kicinski <kuba@kernel.org> wrote:
> 
> >On Mon, 10 Apr 2023 16:23:51 +0800 Hangbin Liu wrote:
> >> @@ -5707,10 +5711,38 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
> >>  			ret = ops->get_ts_info(real_dev, info);
> >>  			goto out;
> >>  		}
> >> +	} else {
> >> +		/* Check if all slaves support software rx/tx timestamping */
> >> +		rcu_read_lock();
> >> +		bond_for_each_slave_rcu(bond, slave, iter) {
> >> +			ret = -1;
> >> +			ops = slave->dev->ethtool_ops;
> >> +			phydev = slave->dev->phydev;
> >> +
> >> +			if (phy_has_tsinfo(phydev))
> >> +				ret = phy_ts_info(phydev, &ts_info);
> >> +			else if (ops->get_ts_info)
> >> +				ret = ops->get_ts_info(slave->dev, &ts_info);
> >
> >Do we _really_ need to hold RCU lock over this?
> >Imposing atomic context restrictions on driver callbacks should not be
> >taken lightly. I'm 75% sure .ethtool_get_ts_info can only be called
> >under rtnl lock off the top of my head, is that not the case?
> 
> 	Ok, maybe I didn't look at that carefully enough, and now that I
> do, it's really complicated.
> 
> 	Going through it, I think the call path that's relevant is
> taprio_change -> taprio_parse_clockid -> ethtool_ops->get_ts_info.
> taprio_change is Qdisc_ops.change function, and tc_modify_qdisc should
> come in with RTNL held.
> 
> 	If I'm reading cscope right, the other possible caller of
> Qdisc_ops.change is fifo_set_limit, and it looks like that function is
> only called by functions that are themselves Qdisc_ops.change functions
> (red_change -> __red_change, sfb_change, tbf_change) or Qdisc_ops.init
> functions (red_init -> __red_change, sfb_init, tbf_init).
> 
> 	There's also a qdisc_create_dflt -> Qdisc_ops.init call path,
> but I don't know if literally all calls to qdisc_create_dflt hold RTNL.
> 
> 	There's a lot of them, and I'm not sure how many of those could
> ever end up calling into taprio_change (if, say, a taprio qdisc is
> attached within another qdisc).
> 
> 	qdisc_create also calls Qdisc_ops.init, but that one seems to
> clearly expect to enter with RTNL.
> 
> 	Any tc expert able to state for sure whether it's possible to
> get into any of the above without RTNL?  I suspect it isn't, but I'm not
> 100% sure either.

You dug more than me. Maybe we can add an ASSERT_RTNL() checking here first?
But since we can't 100% sure we are holding the rtnl lock, I think we
can keep the rcu lock for safe. I saw rlb_next_rx_slave() also did the same...

> 
> 
> >> +			if (!ret && (ts_info.so_timestamping & SOF_TIMESTAMPING_SOFTRXTX) ==
> >> +				    SOF_TIMESTAMPING_SOFTRXTX) {
> >
> >You could check in this loop if TX is supported...
> 
> 	I see your point below about not wanting to create
> SOFT_TIMESTAMPING_SOFTRXTX, but doesn't the logic need to test all three
> of the flags _TX_SOFTWARE, _RX_SOFTWARE, and _SOFTWARE?

I think Jakub means we have already add _RX_SOFTWARE and _SOFTWARE for bonding
whatever slave's flag, then we just need to check slave's _TX_SOFTWARE flag.

Thanks
Hangbin
Jakub Kicinski April 12, 2023, 2:25 p.m. UTC | #5
On Wed, 12 Apr 2023 20:28:08 +0800 Hangbin Liu wrote:
> > 	Ok, maybe I didn't look at that carefully enough, and now that I
> > do, it's really complicated.
> > 
> > 	Going through it, I think the call path that's relevant is
> > taprio_change -> taprio_parse_clockid -> ethtool_ops->get_ts_info.
> > taprio_change is Qdisc_ops.change function, and tc_modify_qdisc should
> > come in with RTNL held.
> > 
> > 	If I'm reading cscope right, the other possible caller of
> > Qdisc_ops.change is fifo_set_limit, and it looks like that function is
> > only called by functions that are themselves Qdisc_ops.change functions
> > (red_change -> __red_change, sfb_change, tbf_change) or Qdisc_ops.init
> > functions (red_init -> __red_change, sfb_init, tbf_init).
> > 
> > 	There's also a qdisc_create_dflt -> Qdisc_ops.init call path,
> > but I don't know if literally all calls to qdisc_create_dflt hold RTNL.
> > 
> > 	There's a lot of them, and I'm not sure how many of those could
> > ever end up calling into taprio_change (if, say, a taprio qdisc is
> > attached within another qdisc).
> > 
> > 	qdisc_create also calls Qdisc_ops.init, but that one seems to
> > clearly expect to enter with RTNL.
> > 
> > 	Any tc expert able to state for sure whether it's possible to
> > get into any of the above without RTNL?  I suspect it isn't, but I'm not
> > 100% sure either.  
> 
> You dug more than me. Maybe we can add an ASSERT_RTNL() checking here first?
> But since we can't 100% sure we are holding the rtnl lock, I think we
> can keep the rcu lock for safe. I saw rlb_next_rx_slave() also did the same...

ASSERT_RTNL sounds good. I think that drivers may expect rtnl lock to 
be held around ethtool ops, so if some path is not holding it - I'd
count that as a bug.

> > >You could check in this loop if TX is supported...  
> > 
> > 	I see your point below about not wanting to create
> > SOFT_TIMESTAMPING_SOFTRXTX, but doesn't the logic need to test all three
> > of the flags _TX_SOFTWARE, _RX_SOFTWARE, and _SOFTWARE?  
> 
> I think Jakub means we have already add _RX_SOFTWARE and _SOFTWARE for bonding
> whatever slave's flag, then we just need to check slave's _TX_SOFTWARE flag.

Indeed.
diff mbox series

Patch

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 00646aa315c3..3b643739bbe7 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -5686,9 +5686,13 @@  static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
 				    struct ethtool_ts_info *info)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
+	struct ethtool_ts_info ts_info;
 	const struct ethtool_ops *ops;
 	struct net_device *real_dev;
 	struct phy_device *phydev;
+	bool soft_support = false;
+	struct list_head *iter;
+	struct slave *slave;
 	int ret = 0;
 
 	rcu_read_lock();
@@ -5707,10 +5711,38 @@  static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
 			ret = ops->get_ts_info(real_dev, info);
 			goto out;
 		}
+	} else {
+		/* Check if all slaves support software rx/tx timestamping */
+		rcu_read_lock();
+		bond_for_each_slave_rcu(bond, slave, iter) {
+			ret = -1;
+			ops = slave->dev->ethtool_ops;
+			phydev = slave->dev->phydev;
+
+			if (phy_has_tsinfo(phydev))
+				ret = phy_ts_info(phydev, &ts_info);
+			else if (ops->get_ts_info)
+				ret = ops->get_ts_info(slave->dev, &ts_info);
+
+			if (!ret && (ts_info.so_timestamping & SOF_TIMESTAMPING_SOFTRXTX) ==
+				    SOF_TIMESTAMPING_SOFTRXTX) {
+				soft_support = true;
+				continue;
+			}
+
+			soft_support = false;
+			break;
+		}
+		rcu_read_unlock();
 	}
 
-	info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
-				SOF_TIMESTAMPING_SOFTWARE;
+	ret = 0;
+	if (soft_support) {
+		info->so_timestamping = SOF_TIMESTAMPING_SOFTRXTX;
+	} else {
+		info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
+					SOF_TIMESTAMPING_SOFTWARE;
+	}
 	info->phc_index = -1;
 
 out:
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index a2c66b3d7f0f..2adaa0008434 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -48,6 +48,9 @@  enum {
 					 SOF_TIMESTAMPING_TX_SCHED | \
 					 SOF_TIMESTAMPING_TX_ACK)
 
+#define SOF_TIMESTAMPING_SOFTRXTX (SOF_TIMESTAMPING_TX_SOFTWARE | \
+				   SOF_TIMESTAMPING_RX_SOFTWARE | \
+				   SOF_TIMESTAMPING_SOFTWARE)
 /**
  * struct so_timestamping - SO_TIMESTAMPING parameter
  *