Message ID | 20211109095013.27829-7-martin.kaistra@linutronix.de (mailing list archive) |
---|---|
State | Deferred |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | Add PTP support for BCM53128 switch | expand |
On Tue, Nov 09, 2021 at 10:50:08AM +0100, Martin Kaistra wrote: > In order to get the switch to generate a timestamp for a transmitted > packet, we need to set the TS bit in the BRCM tag. The switch will then > create a status frame, which gets send back to the cpu. > In b53_port_txtstamp() we put the skb into a waiting position. > > When a status frame is received, we extract the timestamp and put the time > according to our timecounter into the waiting skb. When > TX_TSTAMP_TIMEOUT is reached and we have no means to correctly get back > a full timestamp, we cancel the process. > > As the status frame doesn't contain a reference to the original packet, > only one packet with timestamp request can be sent at a time. > > Signed-off-by: Martin Kaistra <martin.kaistra@linutronix.de> > --- > drivers/net/dsa/b53/b53_common.c | 1 + > drivers/net/dsa/b53/b53_ptp.c | 56 ++++++++++++++++++++++++++++++++ > drivers/net/dsa/b53/b53_ptp.h | 8 +++++ > net/dsa/tag_brcm.c | 46 ++++++++++++++++++++++++++ > 4 files changed, 111 insertions(+) > > diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c > index a9408f9cd414..56a9de89b38b 100644 > --- a/drivers/net/dsa/b53/b53_common.c > +++ b/drivers/net/dsa/b53/b53_common.c > @@ -2301,6 +2301,7 @@ static const struct dsa_switch_ops b53_switch_ops = { > .port_change_mtu = b53_change_mtu, > .get_ts_info = b53_get_ts_info, > .port_rxtstamp = b53_port_rxtstamp, > + .port_txtstamp = b53_port_txtstamp, > }; > > struct b53_chip_data { > diff --git a/drivers/net/dsa/b53/b53_ptp.c b/drivers/net/dsa/b53/b53_ptp.c > index f8dd8d484d93..5567135ba8b9 100644 > --- a/drivers/net/dsa/b53/b53_ptp.c > +++ b/drivers/net/dsa/b53/b53_ptp.c > @@ -100,14 +100,65 @@ static long b53_hwtstamp_work(struct ptp_clock_info *ptp) > { > struct b53_device *dev = > container_of(ptp, struct b53_device, ptp_clock_info); > + struct dsa_switch *ds = dev->ds; > + int i; > > mutex_lock(&dev->ptp_mutex); > timecounter_read(&dev->tc); > mutex_unlock(&dev->ptp_mutex); > > + for (i = 0; i < ds->num_ports; i++) { > + struct b53_port_hwtstamp *ps; > + > + if (!dsa_is_user_port(ds, i)) > + continue; dsa_switch_for_each_user_port and replace i with dp->index. This makes for a more efficient iteration through the port list. > + > + ps = &dev->ports[i].port_hwtstamp; > + > + if (test_bit(B53_HWTSTAMP_TX_IN_PROGRESS, &ps->state) && > + time_is_before_jiffies(ps->tx_tstamp_start + > + TX_TSTAMP_TIMEOUT)) { > + dev_err(dev->dev, > + "Timeout while waiting for Tx timestamp!\n"); > + dev_kfree_skb_any(ps->tx_skb); > + ps->tx_skb = NULL; > + clear_bit_unlock(B53_HWTSTAMP_TX_IN_PROGRESS, > + &ps->state); > + } > + } > + > return B53_PTP_OVERFLOW_PERIOD; > } > > +void b53_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb) > +{ > + struct b53_device *dev = ds->priv; > + struct b53_port_hwtstamp *ps = &dev->ports[port].port_hwtstamp; > + struct sk_buff *clone; > + unsigned int type; > + > + type = ptp_classify_raw(skb); > + > + if (type != PTP_CLASS_V2_L2) > + return; > + > + if (!test_bit(B53_HWTSTAMP_ENABLED, &ps->state)) > + return; > + > + clone = skb_clone_sk(skb); > + if (!clone) > + return; > + > + if (test_and_set_bit_lock(B53_HWTSTAMP_TX_IN_PROGRESS, &ps->state)) { Is it ok if you simply don't timestamp a second skb which may be sent while the first one is in flight, I wonder? What PTP profiles have you tested with? At just one PTP packet at a time, the switch isn't giving you a lot. Is it a hardware limitation? > + kfree_skb(clone); > + return; > + } > + > + ps->tx_skb = clone; > + ps->tx_tstamp_start = jiffies; > +} > +EXPORT_SYMBOL(b53_port_txtstamp); > + > bool b53_port_rxtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb, > unsigned int type) > { > @@ -136,6 +187,8 @@ EXPORT_SYMBOL(b53_port_rxtstamp); > > int b53_ptp_init(struct b53_device *dev) > { > + struct dsa_port *dp; > + > mutex_init(&dev->ptp_mutex); > > /* Enable BroadSync HD for all ports */ > @@ -191,6 +244,9 @@ int b53_ptp_init(struct b53_device *dev) > > ptp_schedule_worker(dev->ptp_clock, 0); > > + dsa_switch_for_each_port(dp, dev->ds) > + dp->priv = &dev->ports[dp->index].port_hwtstamp; > + > return 0; > } > EXPORT_SYMBOL(b53_ptp_init); > diff --git a/drivers/net/dsa/b53/b53_ptp.h b/drivers/net/dsa/b53/b53_ptp.h > index 3b3437870c55..f888f0a2022a 100644 > --- a/drivers/net/dsa/b53/b53_ptp.h > +++ b/drivers/net/dsa/b53/b53_ptp.h > @@ -10,6 +10,7 @@ > #include "b53_priv.h" > > #define SKB_PTP_TYPE(__skb) (*(unsigned int *)((__skb)->cb)) > +#define TX_TSTAMP_TIMEOUT msecs_to_jiffies(40) > > #ifdef CONFIG_B53_PTP > int b53_ptp_init(struct b53_device *dev); > @@ -18,6 +19,8 @@ int b53_get_ts_info(struct dsa_switch *ds, int port, > struct ethtool_ts_info *info); > bool b53_port_rxtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb, > unsigned int type); > +void b53_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb); > + > #else /* !CONFIG_B53_PTP */ > > static inline int b53_ptp_init(struct b53_device *dev) > @@ -41,5 +44,10 @@ static inline bool b53_port_rxtstamp(struct dsa_switch *ds, int port, > return false; > } > > +static inline void b53_port_txtstamp(struct dsa_switch *ds, int port, > + struct sk_buff *skb) > +{ > +} > + > #endif > #endif > diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c > index d611c1073deb..a44ac81fa097 100644 > --- a/net/dsa/tag_brcm.c > +++ b/net/dsa/tag_brcm.c > @@ -8,6 +8,7 @@ > #include <linux/dsa/brcm.h> > #include <linux/etherdevice.h> > #include <linux/list.h> > +#include <linux/ptp_classify.h> > #include <linux/slab.h> > #include <linux/dsa/b53.h> > > @@ -76,6 +77,7 @@ > #define BRCM_EG_TC_SHIFT 5 > #define BRCM_EG_TC_MASK 0x7 > #define BRCM_EG_PID_MASK 0x1f > +#define BRCM_EG_T_R 0x20 > > #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM) || \ > IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND) > @@ -85,6 +87,8 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, > unsigned int offset) > { > struct dsa_port *dp = dsa_slave_to_port(dev); > + unsigned int type = ptp_classify_raw(skb); > + struct b53_port_hwtstamp *ps = dp->priv; > u16 queue = skb_get_queue_mapping(skb); > u8 *brcm_tag; > > @@ -112,7 +116,13 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, > */ > brcm_tag[0] = (1 << BRCM_OPCODE_SHIFT) | > ((queue & BRCM_IG_TC_MASK) << BRCM_IG_TC_SHIFT); > + > brcm_tag[1] = 0; > + > + if (type == PTP_CLASS_V2_L2 && > + test_bit(B53_HWTSTAMP_TX_IN_PROGRESS, &ps->state)) > + brcm_tag[1] = 1 << BRCM_IG_TS_SHIFT; > + > brcm_tag[2] = 0; > if (dp->index == 8) > brcm_tag[2] = BRCM_IG_DSTMAP2_MASK; > @@ -126,6 +136,33 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, > return skb; > } > > +static int set_txtstamp(struct dsa_port *dp, > + int port, > + u64 ns) > +{ > + struct b53_device *b53_dev = dp->ds->priv; > + struct b53_port_hwtstamp *ps = dp->priv; > + struct skb_shared_hwtstamps shhwtstamps; > + struct sk_buff *tmp_skb; > + > + if (!ps->tx_skb) > + return 0; > + > + mutex_lock(&b53_dev->ptp_mutex); > + ns = timecounter_cyc2time(&b53_dev->tc, ns); > + mutex_unlock(&b53_dev->ptp_mutex); This is called from brcm_tag_rcv_ll which runs in softirq context. You can't take a sleeping mutex, sorry. Please test your patches with CONFIG_DEBUG_ATOMIC_SLEEP and friends (lockdep etc). > + > + memset(&shhwtstamps, 0, sizeof(shhwtstamps)); > + shhwtstamps.hwtstamp = ns_to_ktime(ns); > + tmp_skb = ps->tx_skb; > + ps->tx_skb = NULL; > + > + clear_bit_unlock(B53_HWTSTAMP_TX_IN_PROGRESS, &ps->state); > + skb_complete_tx_timestamp(tmp_skb, &shhwtstamps); > + > + return 0; > +} > + > /* Frames with this tag have one of these two layouts: > * ----------------------------------- > * | MAC DA | MAC SA | 4b tag | Type | DSA_TAG_PROTO_BRCM > @@ -143,6 +180,7 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, > unsigned int offset, > int *tag_len) > { > + struct dsa_port *dp; > int source_port; > u8 *brcm_tag; > u32 tstamp; > @@ -177,6 +215,14 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, > if (!skb->dev) > return NULL; > > + /* Check whether this is a status frame */ > + if (unlikely(*tag_len == 8 && brcm_tag[3] & BRCM_EG_T_R)) { > + dp = dsa_slave_to_port(skb->dev); > + > + set_txtstamp(dp, source_port, tstamp); > + return NULL; > + } > + > /* Remove Broadcom tag and update checksum */ > skb_pull_rcsum(skb, *tag_len); > > -- > 2.20.1 >
Hi Vladimir, On Tue Nov 09 2021, Vladimir Oltean wrote: >> +void b53_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb) >> +{ >> + struct b53_device *dev = ds->priv; >> + struct b53_port_hwtstamp *ps = &dev->ports[port].port_hwtstamp; >> + struct sk_buff *clone; >> + unsigned int type; >> + >> + type = ptp_classify_raw(skb); >> + >> + if (type != PTP_CLASS_V2_L2) >> + return; >> + >> + if (!test_bit(B53_HWTSTAMP_ENABLED, &ps->state)) >> + return; >> + >> + clone = skb_clone_sk(skb); >> + if (!clone) >> + return; >> + >> + if (test_and_set_bit_lock(B53_HWTSTAMP_TX_IN_PROGRESS, &ps->state)) { > > Is it ok if you simply don't timestamp a second skb which may be sent > while the first one is in flight, I wonder? What PTP profiles have you > tested with? At just one PTP packet at a time, the switch isn't giving > you a lot. PTP only generates a couple of messages per second which need to be timestamped. Therefore, this behavior shouldn't be a problem. hellcreek (and mv88e6xxx) do the same thing, simply because the device can only hold only one Tx timestamp. If we'd allow more than one PTP packet in flight, there will be correlation problems. I've tested with default and gPTP profile without any problems. What PTP profiles do have in mind? > Is it a hardware limitation? Not for the b53. It will generate status frames for each to be timestamped packet. However, I don't see the need to allow more than one Tx packet per port to be timestamped at the moment. Thanks, Kurt
On Tue, Nov 09, 2021 at 10:50:08AM +0100, Martin Kaistra wrote: > In order to get the switch to generate a timestamp for a transmitted > packet, we need to set the TS bit in the BRCM tag. The switch will then > create a status frame, which gets send back to the cpu. > In b53_port_txtstamp() we put the skb into a waiting position. > > When a status frame is received, we extract the timestamp and put the time > according to our timecounter into the waiting skb. When > TX_TSTAMP_TIMEOUT is reached and we have no means to correctly get back > a full timestamp, we cancel the process. > > As the status frame doesn't contain a reference to the original packet, > only one packet with timestamp request can be sent at a time. > > Signed-off-by: Martin Kaistra <martin.kaistra@linutronix.de> > --- > drivers/net/dsa/b53/b53_common.c | 1 + > drivers/net/dsa/b53/b53_ptp.c | 56 ++++++++++++++++++++++++++++++++ > drivers/net/dsa/b53/b53_ptp.h | 8 +++++ > net/dsa/tag_brcm.c | 46 ++++++++++++++++++++++++++ > 4 files changed, 111 insertions(+) > > diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c > index a9408f9cd414..56a9de89b38b 100644 > --- a/drivers/net/dsa/b53/b53_common.c > +++ b/drivers/net/dsa/b53/b53_common.c > @@ -2301,6 +2301,7 @@ static const struct dsa_switch_ops b53_switch_ops = { > .port_change_mtu = b53_change_mtu, > .get_ts_info = b53_get_ts_info, > .port_rxtstamp = b53_port_rxtstamp, > + .port_txtstamp = b53_port_txtstamp, > }; > > struct b53_chip_data { > diff --git a/drivers/net/dsa/b53/b53_ptp.c b/drivers/net/dsa/b53/b53_ptp.c > index f8dd8d484d93..5567135ba8b9 100644 > --- a/drivers/net/dsa/b53/b53_ptp.c > +++ b/drivers/net/dsa/b53/b53_ptp.c > @@ -100,14 +100,65 @@ static long b53_hwtstamp_work(struct ptp_clock_info *ptp) > { > struct b53_device *dev = > container_of(ptp, struct b53_device, ptp_clock_info); > + struct dsa_switch *ds = dev->ds; > + int i; > > mutex_lock(&dev->ptp_mutex); > timecounter_read(&dev->tc); > mutex_unlock(&dev->ptp_mutex); > > + for (i = 0; i < ds->num_ports; i++) { > + struct b53_port_hwtstamp *ps; > + > + if (!dsa_is_user_port(ds, i)) > + continue; > + > + ps = &dev->ports[i].port_hwtstamp; > + > + if (test_bit(B53_HWTSTAMP_TX_IN_PROGRESS, &ps->state) && > + time_is_before_jiffies(ps->tx_tstamp_start + > + TX_TSTAMP_TIMEOUT)) { > + dev_err(dev->dev, > + "Timeout while waiting for Tx timestamp!\n"); > + dev_kfree_skb_any(ps->tx_skb); > + ps->tx_skb = NULL; > + clear_bit_unlock(B53_HWTSTAMP_TX_IN_PROGRESS, > + &ps->state); > + } > + } > + > return B53_PTP_OVERFLOW_PERIOD; > } > > +void b53_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb) > +{ > + struct b53_device *dev = ds->priv; > + struct b53_port_hwtstamp *ps = &dev->ports[port].port_hwtstamp; > + struct sk_buff *clone; > + unsigned int type; > + > + type = ptp_classify_raw(skb); > + > + if (type != PTP_CLASS_V2_L2) > + return; > + > + if (!test_bit(B53_HWTSTAMP_ENABLED, &ps->state)) > + return; > + > + clone = skb_clone_sk(skb); > + if (!clone) > + return; > + > + if (test_and_set_bit_lock(B53_HWTSTAMP_TX_IN_PROGRESS, &ps->state)) { > + kfree_skb(clone); > + return; > + } > + > + ps->tx_skb = clone; > + ps->tx_tstamp_start = jiffies; > +} > +EXPORT_SYMBOL(b53_port_txtstamp); > + > bool b53_port_rxtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb, > unsigned int type) > { > @@ -136,6 +187,8 @@ EXPORT_SYMBOL(b53_port_rxtstamp); > > int b53_ptp_init(struct b53_device *dev) > { > + struct dsa_port *dp; > + > mutex_init(&dev->ptp_mutex); > > /* Enable BroadSync HD for all ports */ > @@ -191,6 +244,9 @@ int b53_ptp_init(struct b53_device *dev) > > ptp_schedule_worker(dev->ptp_clock, 0); > > + dsa_switch_for_each_port(dp, dev->ds) > + dp->priv = &dev->ports[dp->index].port_hwtstamp; > + > return 0; > } > EXPORT_SYMBOL(b53_ptp_init); > diff --git a/drivers/net/dsa/b53/b53_ptp.h b/drivers/net/dsa/b53/b53_ptp.h > index 3b3437870c55..f888f0a2022a 100644 > --- a/drivers/net/dsa/b53/b53_ptp.h > +++ b/drivers/net/dsa/b53/b53_ptp.h > @@ -10,6 +10,7 @@ > #include "b53_priv.h" > > #define SKB_PTP_TYPE(__skb) (*(unsigned int *)((__skb)->cb)) > +#define TX_TSTAMP_TIMEOUT msecs_to_jiffies(40) > > #ifdef CONFIG_B53_PTP > int b53_ptp_init(struct b53_device *dev); > @@ -18,6 +19,8 @@ int b53_get_ts_info(struct dsa_switch *ds, int port, > struct ethtool_ts_info *info); > bool b53_port_rxtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb, > unsigned int type); > +void b53_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb); > + > #else /* !CONFIG_B53_PTP */ > > static inline int b53_ptp_init(struct b53_device *dev) > @@ -41,5 +44,10 @@ static inline bool b53_port_rxtstamp(struct dsa_switch *ds, int port, > return false; > } > > +static inline void b53_port_txtstamp(struct dsa_switch *ds, int port, > + struct sk_buff *skb) > +{ > +} > + > #endif > #endif > diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c > index d611c1073deb..a44ac81fa097 100644 > --- a/net/dsa/tag_brcm.c > +++ b/net/dsa/tag_brcm.c > @@ -8,6 +8,7 @@ > #include <linux/dsa/brcm.h> > #include <linux/etherdevice.h> > #include <linux/list.h> > +#include <linux/ptp_classify.h> > #include <linux/slab.h> > #include <linux/dsa/b53.h> > > @@ -76,6 +77,7 @@ > #define BRCM_EG_TC_SHIFT 5 > #define BRCM_EG_TC_MASK 0x7 > #define BRCM_EG_PID_MASK 0x1f > +#define BRCM_EG_T_R 0x20 > > #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM) || \ > IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND) > @@ -85,6 +87,8 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, > unsigned int offset) > { > struct dsa_port *dp = dsa_slave_to_port(dev); > + unsigned int type = ptp_classify_raw(skb); > + struct b53_port_hwtstamp *ps = dp->priv; > u16 queue = skb_get_queue_mapping(skb); > u8 *brcm_tag; > > @@ -112,7 +116,13 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, > */ > brcm_tag[0] = (1 << BRCM_OPCODE_SHIFT) | > ((queue & BRCM_IG_TC_MASK) << BRCM_IG_TC_SHIFT); > + > brcm_tag[1] = 0; > + > + if (type == PTP_CLASS_V2_L2 && > + test_bit(B53_HWTSTAMP_TX_IN_PROGRESS, &ps->state)) Introducing dp->priv must be one of the most stupid things I've ever done. I'm sorry for this, I'll try to remove it over the weekend. Here you are dereferencing dp->priv without checking for NULL. But you are only populating dp->priv if CONFIG_B53_PTP is enabled. So if a "malicious" user space program sends a PTP event packet without requesting timestamping, and PTP support is turned off, the kernel is toast. In other news, you should not blindly timestamp any packet that looks like PTP. You should look at (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP), AND (this is important) whether the HWTSTAMP_TX_ON ioctl flag was passed to _your_ driver. Otherwise, PHY timestamping will be broken and people will wonder why (it still is, currently, because DSA doesn't call skb_tx_timestamp(). but that is way easier to debug and fix than it would be to get timestamps from the unintended PHC). Or even PTP timestamping of a DSA switch cascaded beneath a b53 port, in a disjoint tree setup. Please take a look at chapter "3.2.4 Other caveats for MAC drivers" from Documentation/networking/timestamping.rst (a bit ironic, I know, I've been RTFM'ed with this same file in the same thread). > + brcm_tag[1] = 1 << BRCM_IG_TS_SHIFT; > + > brcm_tag[2] = 0; > if (dp->index == 8) > brcm_tag[2] = BRCM_IG_DSTMAP2_MASK; > @@ -126,6 +136,33 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, > return skb; > } > > +static int set_txtstamp(struct dsa_port *dp, > + int port, > + u64 ns) > +{ > + struct b53_device *b53_dev = dp->ds->priv; > + struct b53_port_hwtstamp *ps = dp->priv; > + struct skb_shared_hwtstamps shhwtstamps; > + struct sk_buff *tmp_skb; > + > + if (!ps->tx_skb) > + return 0; > + > + mutex_lock(&b53_dev->ptp_mutex); > + ns = timecounter_cyc2time(&b53_dev->tc, ns); > + mutex_unlock(&b53_dev->ptp_mutex); > + > + memset(&shhwtstamps, 0, sizeof(shhwtstamps)); > + shhwtstamps.hwtstamp = ns_to_ktime(ns); > + tmp_skb = ps->tx_skb; > + ps->tx_skb = NULL; > + > + clear_bit_unlock(B53_HWTSTAMP_TX_IN_PROGRESS, &ps->state); > + skb_complete_tx_timestamp(tmp_skb, &shhwtstamps); > + > + return 0; > +} > + > /* Frames with this tag have one of these two layouts: > * ----------------------------------- > * | MAC DA | MAC SA | 4b tag | Type | DSA_TAG_PROTO_BRCM > @@ -143,6 +180,7 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, > unsigned int offset, > int *tag_len) > { > + struct dsa_port *dp; > int source_port; > u8 *brcm_tag; > u32 tstamp; > @@ -177,6 +215,14 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, > if (!skb->dev) > return NULL; > > + /* Check whether this is a status frame */ > + if (unlikely(*tag_len == 8 && brcm_tag[3] & BRCM_EG_T_R)) { > + dp = dsa_slave_to_port(skb->dev); > + > + set_txtstamp(dp, source_port, tstamp); > + return NULL; > + } > + > /* Remove Broadcom tag and update checksum */ > skb_pull_rcsum(skb, *tag_len); > > -- > 2.20.1 >
Hi Kurt, On Wed, Nov 10, 2021 at 08:14:32AM +0100, Kurt Kanzenbach wrote: > Hi Vladimir, > > On Tue Nov 09 2021, Vladimir Oltean wrote: > >> +void b53_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb) > >> +{ > >> + struct b53_device *dev = ds->priv; > >> + struct b53_port_hwtstamp *ps = &dev->ports[port].port_hwtstamp; > >> + struct sk_buff *clone; > >> + unsigned int type; > >> + > >> + type = ptp_classify_raw(skb); > >> + > >> + if (type != PTP_CLASS_V2_L2) > >> + return; > >> + > >> + if (!test_bit(B53_HWTSTAMP_ENABLED, &ps->state)) > >> + return; > >> + > >> + clone = skb_clone_sk(skb); > >> + if (!clone) > >> + return; > >> + > >> + if (test_and_set_bit_lock(B53_HWTSTAMP_TX_IN_PROGRESS, &ps->state)) { > > > > Is it ok if you simply don't timestamp a second skb which may be sent > > while the first one is in flight, I wonder? What PTP profiles have you > > tested with? At just one PTP packet at a time, the switch isn't giving > > you a lot. > > PTP only generates a couple of messages per second which need to be > timestamped. Therefore, this behavior shouldn't be a problem. > > hellcreek (and mv88e6xxx) do the same thing, simply because the device > can only hold only one Tx timestamp. If we'd allow more than one PTP > packet in flight, there will be correlation problems. I've tested with > default and gPTP profile without any problems. What PTP profiles do have > in mind? First of all, let's separate "more than one packet in flight" at the hardware/driver level vs user space level. Even if there is any hardware requirement to not request TX timestamping for the 2nd frame until the 1st has been acked, that shouldn't necessarily have an implication upon what user space sees. After all, we don't tell user space anything about the realities of the hardware it's running on. So it is true that ptp4l is single threaded and always polls synchronously for the reception of a TX timestamp on the error queue before proceeding to do anything else. But writing a kernel driver to the specification of a single user space program is questionable. Especially with the SOF_TIMESTAMPING_OPT_ID flag of the SO_TIMESTAMPING socket option, it is quite possible to write a different PTP stack that handles TX timestamps differently. It sends event messages on their respective timer expiry (sync, peer delay request, whatever), and processes TX timestamps as they come, asynchronously instead of blocking. That other PTP stack would not work reliably with this driver (or with mv88e6xxx, or with hellcreek). > > Is it a hardware limitation? > > Not for the b53. It will generate status frames for each to be > timestamped packet. However, I don't see the need to allow more than one > Tx packet per port to be timestamped at the moment. > > Thanks, > Kurt
On Wed, Nov 10, 2021 at 03:05:45PM +0200, Vladimir Oltean wrote: > Hi Kurt, > > On Wed, Nov 10, 2021 at 08:14:32AM +0100, Kurt Kanzenbach wrote: > > Hi Vladimir, > > > > On Tue Nov 09 2021, Vladimir Oltean wrote: > > >> +void b53_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb) > > >> +{ > > >> + struct b53_device *dev = ds->priv; > > >> + struct b53_port_hwtstamp *ps = &dev->ports[port].port_hwtstamp; > > >> + struct sk_buff *clone; > > >> + unsigned int type; > > >> + > > >> + type = ptp_classify_raw(skb); > > >> + > > >> + if (type != PTP_CLASS_V2_L2) > > >> + return; > > >> + > > >> + if (!test_bit(B53_HWTSTAMP_ENABLED, &ps->state)) > > >> + return; > > >> + > > >> + clone = skb_clone_sk(skb); > > >> + if (!clone) > > >> + return; > > >> + > > >> + if (test_and_set_bit_lock(B53_HWTSTAMP_TX_IN_PROGRESS, &ps->state)) { > > > > > > Is it ok if you simply don't timestamp a second skb which may be sent > > > while the first one is in flight, I wonder? What PTP profiles have you > > > tested with? At just one PTP packet at a time, the switch isn't giving > > > you a lot. > > > > PTP only generates a couple of messages per second which need to be > > timestamped. Therefore, this behavior shouldn't be a problem. > > > > hellcreek (and mv88e6xxx) do the same thing, simply because the device > > can only hold only one Tx timestamp. If we'd allow more than one PTP > > packet in flight, there will be correlation problems. I've tested with > > default and gPTP profile without any problems. What PTP profiles do have > > in mind? > > First of all, let's separate "more than one packet in flight" at the > hardware/driver level vs user space level. Even if there is any hardware > requirement to not request TX timestamping for the 2nd frame until the > 1st has been acked, that shouldn't necessarily have an implication upon > what user space sees. After all, we don't tell user space anything about > the realities of the hardware it's running on. > > So it is true that ptp4l is single threaded and always polls > synchronously for the reception of a TX timestamp on the error queue > before proceeding to do anything else. But writing a kernel driver to > the specification of a single user space program is questionable. > Especially with the SOF_TIMESTAMPING_OPT_ID flag of the SO_TIMESTAMPING > socket option, it is quite possible to write a different PTP stack that > handles TX timestamps differently. It sends event messages on their > respective timer expiry (sync, peer delay request, whatever), and > processes TX timestamps as they come, asynchronously instead of blocking. > That other PTP stack would not work reliably with this driver (or with > mv88e6xxx, or with hellcreek). Another example that may be closer to you is using vclocks and multiple ptp4l instances in multiple domains, over the same ports. Since ptp4l doesn't claim exclusive ownership of an interface, there you don't even need to have a different stack to see issues with the timestamps of one ptp4l instance getting dropped just because a different one happened to send an event message at the same time. > > > Is it a hardware limitation? > > > > Not for the b53. It will generate status frames for each to be > > timestamped packet. However, I don't see the need to allow more than one > > Tx packet per port to be timestamped at the moment. > > > > Thanks, > > Kurt > >
On Wed Nov 10 2021, Vladimir Oltean wrote: > Hi Kurt, > > On Wed, Nov 10, 2021 at 08:14:32AM +0100, Kurt Kanzenbach wrote: >> Hi Vladimir, >> >> On Tue Nov 09 2021, Vladimir Oltean wrote: >> >> +void b53_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb) >> >> +{ >> >> + struct b53_device *dev = ds->priv; >> >> + struct b53_port_hwtstamp *ps = &dev->ports[port].port_hwtstamp; >> >> + struct sk_buff *clone; >> >> + unsigned int type; >> >> + >> >> + type = ptp_classify_raw(skb); >> >> + >> >> + if (type != PTP_CLASS_V2_L2) >> >> + return; >> >> + >> >> + if (!test_bit(B53_HWTSTAMP_ENABLED, &ps->state)) >> >> + return; >> >> + >> >> + clone = skb_clone_sk(skb); >> >> + if (!clone) >> >> + return; >> >> + >> >> + if (test_and_set_bit_lock(B53_HWTSTAMP_TX_IN_PROGRESS, &ps->state)) { >> > >> > Is it ok if you simply don't timestamp a second skb which may be sent >> > while the first one is in flight, I wonder? What PTP profiles have you >> > tested with? At just one PTP packet at a time, the switch isn't giving >> > you a lot. >> >> PTP only generates a couple of messages per second which need to be >> timestamped. Therefore, this behavior shouldn't be a problem. >> >> hellcreek (and mv88e6xxx) do the same thing, simply because the device >> can only hold only one Tx timestamp. If we'd allow more than one PTP >> packet in flight, there will be correlation problems. I've tested with >> default and gPTP profile without any problems. What PTP profiles do have >> in mind? > > First of all, let's separate "more than one packet in flight" at the > hardware/driver level vs user space level. Even if there is any hardware > requirement to not request TX timestamping for the 2nd frame until the > 1st has been acked, that shouldn't necessarily have an implication upon > what user space sees. After all, we don't tell user space anything about > the realities of the hardware it's running on. Fair enough. > > So it is true that ptp4l is single threaded and always polls > synchronously for the reception of a TX timestamp on the error queue > before proceeding to do anything else. But writing a kernel driver to > the specification of a single user space program is questionable. > Especially with the SOF_TIMESTAMPING_OPT_ID flag of the SO_TIMESTAMPING > socket option, it is quite possible to write a different PTP stack that > handles TX timestamps differently. It sends event messages on their > respective timer expiry (sync, peer delay request, whatever), and > processes TX timestamps as they come, asynchronously instead of blocking. > That other PTP stack would not work reliably with this driver (or with > mv88e6xxx, or with hellcreek). Yeah, a PTP stack which e.g. runs delay measurements independently from the other tasks (sync, announce, ...) may run into trouble with such as an implementation. I'm wondering how would you solve that problem for hardware such as hellcreek? If a packet for timestamping is "in-flight" the Tx path for a concurrent frame would have to be delayed. That might be a surprise to the user as well. Thanks, Kurt
On Wed, Nov 10, 2021 at 02:47:19PM +0100, Kurt Kanzenbach wrote: > On Wed Nov 10 2021, Vladimir Oltean wrote: > > Hi Kurt, > > > > On Wed, Nov 10, 2021 at 08:14:32AM +0100, Kurt Kanzenbach wrote: > >> Hi Vladimir, > >> > >> On Tue Nov 09 2021, Vladimir Oltean wrote: > >> >> +void b53_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb) > >> >> +{ > >> >> + struct b53_device *dev = ds->priv; > >> >> + struct b53_port_hwtstamp *ps = &dev->ports[port].port_hwtstamp; > >> >> + struct sk_buff *clone; > >> >> + unsigned int type; > >> >> + > >> >> + type = ptp_classify_raw(skb); > >> >> + > >> >> + if (type != PTP_CLASS_V2_L2) > >> >> + return; > >> >> + > >> >> + if (!test_bit(B53_HWTSTAMP_ENABLED, &ps->state)) > >> >> + return; > >> >> + > >> >> + clone = skb_clone_sk(skb); > >> >> + if (!clone) > >> >> + return; > >> >> + > >> >> + if (test_and_set_bit_lock(B53_HWTSTAMP_TX_IN_PROGRESS, &ps->state)) { > >> > > >> > Is it ok if you simply don't timestamp a second skb which may be sent > >> > while the first one is in flight, I wonder? What PTP profiles have you > >> > tested with? At just one PTP packet at a time, the switch isn't giving > >> > you a lot. > >> > >> PTP only generates a couple of messages per second which need to be > >> timestamped. Therefore, this behavior shouldn't be a problem. > >> > >> hellcreek (and mv88e6xxx) do the same thing, simply because the device > >> can only hold only one Tx timestamp. If we'd allow more than one PTP > >> packet in flight, there will be correlation problems. I've tested with > >> default and gPTP profile without any problems. What PTP profiles do have > >> in mind? > > > > First of all, let's separate "more than one packet in flight" at the > > hardware/driver level vs user space level. Even if there is any hardware > > requirement to not request TX timestamping for the 2nd frame until the > > 1st has been acked, that shouldn't necessarily have an implication upon > > what user space sees. After all, we don't tell user space anything about > > the realities of the hardware it's running on. > > Fair enough. > > > > > So it is true that ptp4l is single threaded and always polls > > synchronously for the reception of a TX timestamp on the error queue > > before proceeding to do anything else. But writing a kernel driver to > > the specification of a single user space program is questionable. > > Especially with the SOF_TIMESTAMPING_OPT_ID flag of the SO_TIMESTAMPING > > socket option, it is quite possible to write a different PTP stack that > > handles TX timestamps differently. It sends event messages on their > > respective timer expiry (sync, peer delay request, whatever), and > > processes TX timestamps as they come, asynchronously instead of blocking. > > That other PTP stack would not work reliably with this driver (or with > > mv88e6xxx, or with hellcreek). > > Yeah, a PTP stack which e.g. runs delay measurements independently from > the other tasks (sync, announce, ...) may run into trouble with such as > an implementation. I'm wondering how would you solve that problem for > hardware such as hellcreek? If a packet for timestamping is "in-flight" > the Tx path for a concurrent frame would have to be delayed. That might > be a surprise to the user as well. Yes, of course it would have to be delayed. But it would at least see the timestamp, on the other hand... Christian Eggers, while working on the mythical ksz9477 PTP support, had the same kind of hardware to work with, and he unconditionally deferred PTP packets, then in a kthread owned by the driver (not the tagger), he initialized a completion structure, sent the packet, slept until the completion was done (signaled by an irq thread in his case, can be signaled by the tagger in Martin's case), and proceeded with the next packet. See ksz9477_defer_xmit() here: https://patchwork.ozlabs.org/project/netdev/patch/20201203102117.8995-9-ceggers@arri.de/ It might look like I'm trying really hard to sell deferred xmit here, and that's not the case - I really hate it myself. But when putting two and two together, it really seems to be what is needed.
On Wed, Nov 10, 2021 at 03:05:45PM +0200, Vladimir Oltean wrote: > So it is true that ptp4l is single threaded and always polls > synchronously for the reception of a TX timestamp on the error queue > before proceeding to do anything else. But writing a kernel driver to > the specification of a single user space program is questionable. There are a number of HW devices on the market that only support one outstanding Tx time stamp. The implementation of ptp4l follows this limitation because a) it allows ptp4l to "just work" with most HW, and b) there is as yet no practical advantage to asynchronous Tx time stamping. The premise of (b) might change if you had a GM serving hundreds or thousands of unicast clients, for example. In any case, I agree that the driver should enable the capabilities of the HW and not impose artificial limitations. Thanks, Richard
On Wed, Nov 10, 2021 at 07:08:55AM -0800, Richard Cochran wrote: > In any case, I agree that the driver should enable the capabilities of > the HW and not impose artificial limitations. Which was not my actual point. We were discussing whether the kernel should attempt to take a 2-step TX timestamp if a different one is outstanding, be it from the same socket or from a different one. Whether the hardware supports multiple concurrent TX timestamps is orthogonal to that.
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index a9408f9cd414..56a9de89b38b 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2301,6 +2301,7 @@ static const struct dsa_switch_ops b53_switch_ops = { .port_change_mtu = b53_change_mtu, .get_ts_info = b53_get_ts_info, .port_rxtstamp = b53_port_rxtstamp, + .port_txtstamp = b53_port_txtstamp, }; struct b53_chip_data { diff --git a/drivers/net/dsa/b53/b53_ptp.c b/drivers/net/dsa/b53/b53_ptp.c index f8dd8d484d93..5567135ba8b9 100644 --- a/drivers/net/dsa/b53/b53_ptp.c +++ b/drivers/net/dsa/b53/b53_ptp.c @@ -100,14 +100,65 @@ static long b53_hwtstamp_work(struct ptp_clock_info *ptp) { struct b53_device *dev = container_of(ptp, struct b53_device, ptp_clock_info); + struct dsa_switch *ds = dev->ds; + int i; mutex_lock(&dev->ptp_mutex); timecounter_read(&dev->tc); mutex_unlock(&dev->ptp_mutex); + for (i = 0; i < ds->num_ports; i++) { + struct b53_port_hwtstamp *ps; + + if (!dsa_is_user_port(ds, i)) + continue; + + ps = &dev->ports[i].port_hwtstamp; + + if (test_bit(B53_HWTSTAMP_TX_IN_PROGRESS, &ps->state) && + time_is_before_jiffies(ps->tx_tstamp_start + + TX_TSTAMP_TIMEOUT)) { + dev_err(dev->dev, + "Timeout while waiting for Tx timestamp!\n"); + dev_kfree_skb_any(ps->tx_skb); + ps->tx_skb = NULL; + clear_bit_unlock(B53_HWTSTAMP_TX_IN_PROGRESS, + &ps->state); + } + } + return B53_PTP_OVERFLOW_PERIOD; } +void b53_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb) +{ + struct b53_device *dev = ds->priv; + struct b53_port_hwtstamp *ps = &dev->ports[port].port_hwtstamp; + struct sk_buff *clone; + unsigned int type; + + type = ptp_classify_raw(skb); + + if (type != PTP_CLASS_V2_L2) + return; + + if (!test_bit(B53_HWTSTAMP_ENABLED, &ps->state)) + return; + + clone = skb_clone_sk(skb); + if (!clone) + return; + + if (test_and_set_bit_lock(B53_HWTSTAMP_TX_IN_PROGRESS, &ps->state)) { + kfree_skb(clone); + return; + } + + ps->tx_skb = clone; + ps->tx_tstamp_start = jiffies; +} +EXPORT_SYMBOL(b53_port_txtstamp); + bool b53_port_rxtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb, unsigned int type) { @@ -136,6 +187,8 @@ EXPORT_SYMBOL(b53_port_rxtstamp); int b53_ptp_init(struct b53_device *dev) { + struct dsa_port *dp; + mutex_init(&dev->ptp_mutex); /* Enable BroadSync HD for all ports */ @@ -191,6 +244,9 @@ int b53_ptp_init(struct b53_device *dev) ptp_schedule_worker(dev->ptp_clock, 0); + dsa_switch_for_each_port(dp, dev->ds) + dp->priv = &dev->ports[dp->index].port_hwtstamp; + return 0; } EXPORT_SYMBOL(b53_ptp_init); diff --git a/drivers/net/dsa/b53/b53_ptp.h b/drivers/net/dsa/b53/b53_ptp.h index 3b3437870c55..f888f0a2022a 100644 --- a/drivers/net/dsa/b53/b53_ptp.h +++ b/drivers/net/dsa/b53/b53_ptp.h @@ -10,6 +10,7 @@ #include "b53_priv.h" #define SKB_PTP_TYPE(__skb) (*(unsigned int *)((__skb)->cb)) +#define TX_TSTAMP_TIMEOUT msecs_to_jiffies(40) #ifdef CONFIG_B53_PTP int b53_ptp_init(struct b53_device *dev); @@ -18,6 +19,8 @@ int b53_get_ts_info(struct dsa_switch *ds, int port, struct ethtool_ts_info *info); bool b53_port_rxtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb, unsigned int type); +void b53_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb); + #else /* !CONFIG_B53_PTP */ static inline int b53_ptp_init(struct b53_device *dev) @@ -41,5 +44,10 @@ static inline bool b53_port_rxtstamp(struct dsa_switch *ds, int port, return false; } +static inline void b53_port_txtstamp(struct dsa_switch *ds, int port, + struct sk_buff *skb) +{ +} + #endif #endif diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index d611c1073deb..a44ac81fa097 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -8,6 +8,7 @@ #include <linux/dsa/brcm.h> #include <linux/etherdevice.h> #include <linux/list.h> +#include <linux/ptp_classify.h> #include <linux/slab.h> #include <linux/dsa/b53.h> @@ -76,6 +77,7 @@ #define BRCM_EG_TC_SHIFT 5 #define BRCM_EG_TC_MASK 0x7 #define BRCM_EG_PID_MASK 0x1f +#define BRCM_EG_T_R 0x20 #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM) || \ IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND) @@ -85,6 +87,8 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, unsigned int offset) { struct dsa_port *dp = dsa_slave_to_port(dev); + unsigned int type = ptp_classify_raw(skb); + struct b53_port_hwtstamp *ps = dp->priv; u16 queue = skb_get_queue_mapping(skb); u8 *brcm_tag; @@ -112,7 +116,13 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, */ brcm_tag[0] = (1 << BRCM_OPCODE_SHIFT) | ((queue & BRCM_IG_TC_MASK) << BRCM_IG_TC_SHIFT); + brcm_tag[1] = 0; + + if (type == PTP_CLASS_V2_L2 && + test_bit(B53_HWTSTAMP_TX_IN_PROGRESS, &ps->state)) + brcm_tag[1] = 1 << BRCM_IG_TS_SHIFT; + brcm_tag[2] = 0; if (dp->index == 8) brcm_tag[2] = BRCM_IG_DSTMAP2_MASK; @@ -126,6 +136,33 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, return skb; } +static int set_txtstamp(struct dsa_port *dp, + int port, + u64 ns) +{ + struct b53_device *b53_dev = dp->ds->priv; + struct b53_port_hwtstamp *ps = dp->priv; + struct skb_shared_hwtstamps shhwtstamps; + struct sk_buff *tmp_skb; + + if (!ps->tx_skb) + return 0; + + mutex_lock(&b53_dev->ptp_mutex); + ns = timecounter_cyc2time(&b53_dev->tc, ns); + mutex_unlock(&b53_dev->ptp_mutex); + + memset(&shhwtstamps, 0, sizeof(shhwtstamps)); + shhwtstamps.hwtstamp = ns_to_ktime(ns); + tmp_skb = ps->tx_skb; + ps->tx_skb = NULL; + + clear_bit_unlock(B53_HWTSTAMP_TX_IN_PROGRESS, &ps->state); + skb_complete_tx_timestamp(tmp_skb, &shhwtstamps); + + return 0; +} + /* Frames with this tag have one of these two layouts: * ----------------------------------- * | MAC DA | MAC SA | 4b tag | Type | DSA_TAG_PROTO_BRCM @@ -143,6 +180,7 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, unsigned int offset, int *tag_len) { + struct dsa_port *dp; int source_port; u8 *brcm_tag; u32 tstamp; @@ -177,6 +215,14 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, if (!skb->dev) return NULL; + /* Check whether this is a status frame */ + if (unlikely(*tag_len == 8 && brcm_tag[3] & BRCM_EG_T_R)) { + dp = dsa_slave_to_port(skb->dev); + + set_txtstamp(dp, source_port, tstamp); + return NULL; + } + /* Remove Broadcom tag and update checksum */ skb_pull_rcsum(skb, *tag_len);
In order to get the switch to generate a timestamp for a transmitted packet, we need to set the TS bit in the BRCM tag. The switch will then create a status frame, which gets send back to the cpu. In b53_port_txtstamp() we put the skb into a waiting position. When a status frame is received, we extract the timestamp and put the time according to our timecounter into the waiting skb. When TX_TSTAMP_TIMEOUT is reached and we have no means to correctly get back a full timestamp, we cancel the process. As the status frame doesn't contain a reference to the original packet, only one packet with timestamp request can be sent at a time. Signed-off-by: Martin Kaistra <martin.kaistra@linutronix.de> --- drivers/net/dsa/b53/b53_common.c | 1 + drivers/net/dsa/b53/b53_ptp.c | 56 ++++++++++++++++++++++++++++++++ drivers/net/dsa/b53/b53_ptp.h | 8 +++++ net/dsa/tag_brcm.c | 46 ++++++++++++++++++++++++++ 4 files changed, 111 insertions(+)