diff mbox series

[net-next] net: micrel: Change to receive timestamp in the frame for lan8841

Message ID 20230607070948.1746768-1-horatiu.vultur@microchip.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net-next] net: micrel: Change to receive timestamp in the frame for lan8841 | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/cc_maintainers success CCed 9 of 9 maintainers
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Horatiu Vultur June 7, 2023, 7:09 a.m. UTC
Currently for each timestamp frame, the SW needs to go and read the
received timestamp over the MDIO bus. But the HW has the capability
to store the received nanoseconds part and the least significant two
bits of the seconds in the reserved field of the PTP header. In this
way we could save few mdio transactions (actually a little more
transactions because the access to the PTP registers are indirect)
for each received frame. But is still required to read the seconds
part of each received frame.
Doing these changes to start to get the received timestamp in the
reserved field of the header, will give a great CPU usage performance.
Running ptp4l with logSyncInterval of -9 will give a ~50% CPU
improvment.

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
---
 drivers/net/phy/micrel.c | 231 +++++++++++++++++++++++----------------
 1 file changed, 139 insertions(+), 92 deletions(-)

Comments

Richard Cochran June 7, 2023, 5:47 p.m. UTC | #1
On Wed, Jun 07, 2023 at 09:09:48AM +0200, Horatiu Vultur wrote:

> Doing these changes to start to get the received timestamp in the
> reserved field of the header, will give a great CPU usage performance.
> Running ptp4l with logSyncInterval of -9 will give a ~50% CPU
> improvment.

Really?

> -static struct lan8814_ptp_rx_ts *lan8841_ptp_get_rx_ts(struct kszphy_ptp_priv *ptp_priv)
> -{
> -	struct phy_device *phydev = ptp_priv->phydev;
> -	struct lan8814_ptp_rx_ts *rx_ts;
> -	u32 sec, nsec;
> -	u16 seq;
> -
> -	nsec = phy_read_mmd(phydev, 2, LAN8841_PTP_RX_INGRESS_NS_HI);
> -	if (!(nsec & LAN8841_PTP_RX_INGRESS_NSEC_HI_VALID))
> -		return NULL;
> -
> -	nsec = ((nsec & 0x3fff) << 16);
> -	nsec = nsec | phy_read_mmd(phydev, 2, LAN8841_PTP_RX_INGRESS_NS_LO);
> -
> -	sec = phy_read_mmd(phydev, 2, LAN8841_PTP_RX_INGRESS_SEC_HI);
> -	sec = sec << 16;
> -	sec = sec | phy_read_mmd(phydev, 2, LAN8841_PTP_RX_INGRESS_SEC_LO);
> -
> -	seq = phy_read_mmd(phydev, 2, LAN8841_PTP_RX_MSG_HEADER2);

Before: 5x phy_read_mmd() per frame ...

> -	rx_ts = kzalloc(sizeof(*rx_ts), GFP_KERNEL);
> -	if (!rx_ts)
> -		return NULL;
> -
> -	rx_ts->seconds = sec;
> -	rx_ts->nsec = nsec;
> -	rx_ts->seq_id = seq;
> -
> -	return rx_ts;
> -}

> +static void lan8841_ptp_getseconds(struct ptp_clock_info *ptp,
> +				   struct timespec64 *ts)
> +{
> +	struct kszphy_ptp_priv *ptp_priv = container_of(ptp, struct kszphy_ptp_priv,
> +							ptp_clock_info);
> +	struct phy_device *phydev = ptp_priv->phydev;
> +	time64_t s;
> +
> +	mutex_lock(&ptp_priv->ptp_lock);
> +	/* Issue the command to read the LTC */
> +	phy_write_mmd(phydev, 2, LAN8841_PTP_CMD_CTL,
> +		      LAN8841_PTP_CMD_CTL_PTP_LTC_READ);
> +
> +	/* Read the LTC */
> +	s = phy_read_mmd(phydev, 2, LAN8841_PTP_LTC_RD_SEC_HI);
> +	s <<= 16;
> +	s |= phy_read_mmd(phydev, 2, LAN8841_PTP_LTC_RD_SEC_MID);
> +	s <<= 16;
> +	s |= phy_read_mmd(phydev, 2, LAN8841_PTP_LTC_RD_SEC_LO);

After: 4x phy_read_mmd() per frame.  How does that save 50% cpu?

> +	mutex_unlock(&ptp_priv->ptp_lock);
> +
> +	set_normalized_timespec64(ts, s, 0);
> +}


> +static long lan8841_ptp_do_aux_work(struct ptp_clock_info *ptp)
> +{
> +	struct kszphy_ptp_priv *ptp_priv = container_of(ptp, struct kszphy_ptp_priv,
> +							ptp_clock_info);
> +	struct skb_shared_hwtstamps *shhwtstamps;
> +	struct timespec64 ts;
> +	struct sk_buff *skb;
> +	u32 ts_header;
> +
> +	while ((skb = skb_dequeue(&ptp_priv->rx_queue)) != NULL) {
> +		lan8841_ptp_getseconds(ptp, &ts);

No need to call this once per frame.  It would be sufficent to call it
once every 2 seconds and cache the result.

> +		ts_header = __be32_to_cpu(LAN8841_SKB_CB(skb)->header->reserved2);
> +
> +		shhwtstamps = skb_hwtstamps(skb);
> +		memset(shhwtstamps, 0, sizeof(*shhwtstamps));
> +
> +		/* Check for any wrap arounds for the second part */
> +		if ((ts.tv_sec & GENMASK(1, 0)) < ts_header >> 30)
> +			ts.tv_sec -= GENMASK(1, 0) + 1;
> +
> +		shhwtstamps->hwtstamp =
> +			ktime_set((ts.tv_sec & ~(GENMASK(1, 0))) | ts_header >> 30,
> +				  ts_header & GENMASK(29, 0));
> +		LAN8841_SKB_CB(skb)->header->reserved2 = 0;
> +
> +		netif_rx(skb);
> +	}
> +
> +	return -1;
> +}

Thanks,
Richard
Richard Cochran June 8, 2023, 5:51 a.m. UTC | #2
On Wed, Jun 07, 2023 at 10:47:16AM -0700, Richard Cochran wrote:
> On Wed, Jun 07, 2023 at 09:09:48AM +0200, Horatiu Vultur wrote:
> > +static long lan8841_ptp_do_aux_work(struct ptp_clock_info *ptp)
> > +{
> > +	struct kszphy_ptp_priv *ptp_priv = container_of(ptp, struct kszphy_ptp_priv,
> > +							ptp_clock_info);
> > +	struct skb_shared_hwtstamps *shhwtstamps;
> > +	struct timespec64 ts;
> > +	struct sk_buff *skb;
> > +	u32 ts_header;
> > +
> > +	while ((skb = skb_dequeue(&ptp_priv->rx_queue)) != NULL) {
> > +		lan8841_ptp_getseconds(ptp, &ts);
> 
> No need to call this once per frame.  It would be sufficent to call it
> once every 2 seconds and cache the result.

Okay, this is tricky.

- If you call lan8841_ptp_getseconds() after gathering the received
  frames, then the frame timestamps are clearly in the past WRT the
  call to getseconds.  That makes the wrap check simpler.  But the
  getseconds call really should be placed before the 'while' loop.

- If the Rx frame rate exceeds 1/second, then it would be more
  efficient to call getseconds every second, and cache the result.
  But then the wrap around check needs to account for the fact that
  the cached value may have occurred either before or after the frame
  timestamp.

I'll explain that second point when my brain wakes again...

Thanks,
Richard
Horatiu Vultur June 8, 2023, 9:05 a.m. UTC | #3
Hi Richard,

The 06/07/2023 10:47, Richard Cochran wrote:
> 
> On Wed, Jun 07, 2023 at 09:09:48AM +0200, Horatiu Vultur wrote:
> 
> > Doing these changes to start to get the received timestamp in the
> > reserved field of the header, will give a great CPU usage performance.
> > Running ptp4l with logSyncInterval of -9 will give a ~50% CPU
> > improvment.
> 
> Really?

I have run a simple top on the PC while running ptp4l.

This is the output without this patch:
  PID  PPID USER     STAT   VSZ %VSZ %CPU COMMAND
  136     2 root     DW       0   0%  58% [irq/33-e2004118]
  141   133 root     R     2232   0%  23% ptp4l -f /tmp/linux.cfg

And this is the output with the patch:
  142   134 root     S     2232   0%  15% ptp4l -f /tmp/linux.cfg
   36     2 root     DW       0   0%  15% [ptp0]
  137     2 root     SW       0   0%   0% [irq/33-e2004118]

If you think I should do better measurements, I am open to suggestions.

> 
> > -static struct lan8814_ptp_rx_ts *lan8841_ptp_get_rx_ts(struct kszphy_ptp_priv *ptp_priv)
> > -{
> > -     struct phy_device *phydev = ptp_priv->phydev;
> > -     struct lan8814_ptp_rx_ts *rx_ts;
> > -     u32 sec, nsec;
> > -     u16 seq;
> > -
> > -     nsec = phy_read_mmd(phydev, 2, LAN8841_PTP_RX_INGRESS_NS_HI);
> > -     if (!(nsec & LAN8841_PTP_RX_INGRESS_NSEC_HI_VALID))
> > -             return NULL;
> > -
> > -     nsec = ((nsec & 0x3fff) << 16);
> > -     nsec = nsec | phy_read_mmd(phydev, 2, LAN8841_PTP_RX_INGRESS_NS_LO);
> > -
> > -     sec = phy_read_mmd(phydev, 2, LAN8841_PTP_RX_INGRESS_SEC_HI);
> > -     sec = sec << 16;
> > -     sec = sec | phy_read_mmd(phydev, 2, LAN8841_PTP_RX_INGRESS_SEC_LO);
> > -
> > -     seq = phy_read_mmd(phydev, 2, LAN8841_PTP_RX_MSG_HEADER2);
> 
> Before: 5x phy_read_mmd() per frame ...
> 
> > -     rx_ts = kzalloc(sizeof(*rx_ts), GFP_KERNEL);
> > -     if (!rx_ts)
> > -             return NULL;
> > -
> > -     rx_ts->seconds = sec;
> > -     rx_ts->nsec = nsec;
> > -     rx_ts->seq_id = seq;
> > -
> > -     return rx_ts;
> > -}
> 
> > +static void lan8841_ptp_getseconds(struct ptp_clock_info *ptp,
> > +                                struct timespec64 *ts)
> > +{
> > +     struct kszphy_ptp_priv *ptp_priv = container_of(ptp, struct kszphy_ptp_priv,
> > +                                                     ptp_clock_info);
> > +     struct phy_device *phydev = ptp_priv->phydev;
> > +     time64_t s;
> > +
> > +     mutex_lock(&ptp_priv->ptp_lock);
> > +     /* Issue the command to read the LTC */
> > +     phy_write_mmd(phydev, 2, LAN8841_PTP_CMD_CTL,
> > +                   LAN8841_PTP_CMD_CTL_PTP_LTC_READ);
> > +
> > +     /* Read the LTC */
> > +     s = phy_read_mmd(phydev, 2, LAN8841_PTP_LTC_RD_SEC_HI);
> > +     s <<= 16;
> > +     s |= phy_read_mmd(phydev, 2, LAN8841_PTP_LTC_RD_SEC_MID);
> > +     s <<= 16;
> > +     s |= phy_read_mmd(phydev, 2, LAN8841_PTP_LTC_RD_SEC_LO);
> 
> After: 4x phy_read_mmd() per frame.  How does that save 50% cpu?
> 
> > +     mutex_unlock(&ptp_priv->ptp_lock);
> > +
> > +     set_normalized_timespec64(ts, s, 0);
> > +}
> 
> 
> > +static long lan8841_ptp_do_aux_work(struct ptp_clock_info *ptp)
> > +{
> > +     struct kszphy_ptp_priv *ptp_priv = container_of(ptp, struct kszphy_ptp_priv,
> > +                                                     ptp_clock_info);
> > +     struct skb_shared_hwtstamps *shhwtstamps;
> > +     struct timespec64 ts;
> > +     struct sk_buff *skb;
> > +     u32 ts_header;
> > +
> > +     while ((skb = skb_dequeue(&ptp_priv->rx_queue)) != NULL) {
> > +             lan8841_ptp_getseconds(ptp, &ts);
> 
> No need to call this once per frame.  It would be sufficent to call it
> once every 2 seconds and cache the result.
> 
> > +             ts_header = __be32_to_cpu(LAN8841_SKB_CB(skb)->header->reserved2);
> > +
> > +             shhwtstamps = skb_hwtstamps(skb);
> > +             memset(shhwtstamps, 0, sizeof(*shhwtstamps));
> > +
> > +             /* Check for any wrap arounds for the second part */
> > +             if ((ts.tv_sec & GENMASK(1, 0)) < ts_header >> 30)
> > +                     ts.tv_sec -= GENMASK(1, 0) + 1;
> > +
> > +             shhwtstamps->hwtstamp =
> > +                     ktime_set((ts.tv_sec & ~(GENMASK(1, 0))) | ts_header >> 30,
> > +                               ts_header & GENMASK(29, 0));
> > +             LAN8841_SKB_CB(skb)->header->reserved2 = 0;
> > +
> > +             netif_rx(skb);
> > +     }
> > +
> > +     return -1;
> > +}
> 
> Thanks,
> Richard
Horatiu Vultur June 8, 2023, 9:21 a.m. UTC | #4
The 06/07/2023 22:51, Richard Cochran wrote:
> 
> On Wed, Jun 07, 2023 at 10:47:16AM -0700, Richard Cochran wrote:
> > On Wed, Jun 07, 2023 at 09:09:48AM +0200, Horatiu Vultur wrote:
> > > +static long lan8841_ptp_do_aux_work(struct ptp_clock_info *ptp)
> > > +{
> > > +   struct kszphy_ptp_priv *ptp_priv = container_of(ptp, struct kszphy_ptp_priv,
> > > +                                                   ptp_clock_info);
> > > +   struct skb_shared_hwtstamps *shhwtstamps;
> > > +   struct timespec64 ts;
> > > +   struct sk_buff *skb;
> > > +   u32 ts_header;
> > > +
> > > +   while ((skb = skb_dequeue(&ptp_priv->rx_queue)) != NULL) {
> > > +           lan8841_ptp_getseconds(ptp, &ts);
> >
> > No need to call this once per frame.  It would be sufficent to call it
> > once every 2 seconds and cache the result.

Funny thing is that I have already a patch for this, it is not 100%
complete, because of one of the possible issues that you mention below.
But in my case I read the seconds twice per second, regardless of the RX
frame rate.

> 
> Okay, this is tricky.
> 
> - If you call lan8841_ptp_getseconds() after gathering the received
>   frames, then the frame timestamps are clearly in the past WRT the
>   call to getseconds.  That makes the wrap check simpler.  But the
>   getseconds call really should be placed before the 'while' loop.
> 
> - If the Rx frame rate exceeds 1/second, then it would be more
>   efficient to call getseconds every second, and cache the result.
>   But then the wrap around check needs to account for the fact that
>   the cached value may have occurred either before or after the frame
>   timestamp.
> 
> I'll explain that second point when my brain wakes again...

I am looking forward for your explanation.

In the end don't you think it should be 2 different patches, first patch
which changes to mode to get the RX timestamp, while the second one
optimize this?

Bellow just for reference I added the patch which it tries to get the second
part twice per second.

---
Subject: [PATCH] net: micrel: Schedule work to read seconds for lan8841

Instead of reading the seconds part of the received frame for each of
the frames, schedule a workqueue to read the seconds part every 500ms
and then for each of the received frames use this information instead of
reading again the seconds part. Because if for example running with 512
frames per second, there is no point to read 512 times the second part.
Of course care needs to be taken in case of the less two significant
bits are 0 or 3, to make sure there are no seconds wraparound.
This will improve the CPU usage by ~20% and also it is possible to receive
1024 Sync frames per second.

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
---
 drivers/net/phy/micrel.c | 49 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 45 insertions(+), 4 deletions(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 28365006b2067..9832eea404377 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -33,6 +33,7 @@
 #include <linux/ptp_classify.h>
 #include <linux/net_tstamp.h>
 #include <linux/gpio/consumer.h>
+#include <linux/workqueue.h>
 
 /* Operation Mode Strap Override */
 #define MII_KSZPHY_OMSO				0x16
@@ -254,6 +255,9 @@
 #define PS_TO_REG				200
 #define FIFO_SIZE				8
 
+/* Delay used to get the second part from the LTC */
+#define LAN8841_GET_SEC_LTC_DELAY		(500 * NSEC_PER_MSEC)
+
 struct kszphy_hw_stat {
 	const char *string;
 	u8 reg;
@@ -321,6 +325,9 @@ struct kszphy_ptp_priv {
 	/* Lock for ptp_clock */
 	struct mutex ptp_lock;
 	struct ptp_pin_desc *pin_config;
+
+	s64 seconds;
+	struct delayed_work seconds_work;
 };
 
 struct kszphy_priv {
@@ -3840,6 +3847,12 @@ static void lan8841_ptp_enable_processing(struct kszphy_ptp_priv *ptp_priv,
 			       LAN8841_PTP_INSERT_TS_32BIT,
 			       LAN8841_PTP_INSERT_TS_EN |
 			       LAN8841_PTP_INSERT_TS_32BIT);
+
+		/* Schedule the work to read the seconds, which will be used in
+		 * the received timestamp
+		 */
+		schedule_delayed_work(&ptp_priv->seconds_work,
+				      nsecs_to_jiffies(LAN8841_GET_SEC_LTC_DELAY));
 	} else {
 		/* Disable interrupts on the TX side */
 		phy_modify_mmd(phydev, 2, LAN8841_PTP_INT_EN,
@@ -3853,6 +3866,11 @@ static void lan8841_ptp_enable_processing(struct kszphy_ptp_priv *ptp_priv,
 			       LAN8841_PTP_INSERT_TS_32BIT, 0);
 
 		ptp_cancel_worker_sync(ptp_priv->ptp_clock);
+
+		/* Stop the work, as there is no reason to continue to read the
+		 * seconds if there is no timestamping enabled
+		 */
+		cancel_delayed_work_sync(&ptp_priv->seconds_work);
 	}
 }
 
@@ -4062,6 +4080,8 @@ static int lan8841_ptp_settime64(struct ptp_clock_info *ptp,
 	phy_write_mmd(phydev, 2, LAN8841_PTP_LTC_SET_NS_LO, lower_16_bits(ts->tv_nsec));
 	phy_write_mmd(phydev, 2, LAN8841_PTP_LTC_SET_NS_HI, upper_16_bits(ts->tv_nsec) & 0x3fff);
 
+	ptp_priv->seconds = ts->tv_sec;
+
 	/* Set the command to load the LTC */
 	phy_write_mmd(phydev, 2, LAN8841_PTP_CMD_CTL,
 		      LAN8841_PTP_CMD_CTL_PTP_LTC_LOAD);
@@ -4116,7 +4136,6 @@ static void lan8841_ptp_getseconds(struct ptp_clock_info *ptp,
 	struct phy_device *phydev = ptp_priv->phydev;
 	time64_t s;
 
-	mutex_lock(&ptp_priv->ptp_lock);
 	/* Issue the command to read the LTC */
 	phy_write_mmd(phydev, 2, LAN8841_PTP_CMD_CTL,
 		      LAN8841_PTP_CMD_CTL_PTP_LTC_READ);
@@ -4127,7 +4146,6 @@ static void lan8841_ptp_getseconds(struct ptp_clock_info *ptp,
 	s |= phy_read_mmd(phydev, 2, LAN8841_PTP_LTC_RD_SEC_MID);
 	s <<= 16;
 	s |= phy_read_mmd(phydev, 2, LAN8841_PTP_LTC_RD_SEC_LO);
-	mutex_unlock(&ptp_priv->ptp_lock);
 
 	set_normalized_timespec64(ts, s, 0);
 }
@@ -4644,15 +4662,20 @@ static long lan8841_ptp_do_aux_work(struct ptp_clock_info *ptp)
 	u32 ts_header;
 
 	while ((skb = skb_dequeue(&ptp_priv->rx_queue)) != NULL) {
-		lan8841_ptp_getseconds(ptp, &ts);
+		mutex_lock(&ptp_priv->ptp_lock);
+		ts.tv_sec = ptp_priv->seconds;
+		mutex_unlock(&ptp_priv->ptp_lock);
+
 		ts_header = __be32_to_cpu(LAN8841_SKB_CB(skb)->header->reserved2);
 
 		shhwtstamps = skb_hwtstamps(skb);
 		memset(shhwtstamps, 0, sizeof(*shhwtstamps));
 
 		/* Check for any wrap arounds for the second part */
-		if ((ts.tv_sec & GENMASK(1, 0)) < ts_header >> 30)
+		if ((ts.tv_sec & GENMASK(1, 0)) == 0 && (ts_header >> 30) == 3)
 			ts.tv_sec -= GENMASK(1, 0) + 1;
+		else if ((ts.tv_sec & GENMASK(1, 0)) == 3 && (ts_header >> 30) == 0)
+			ts.tv_sec += 1;
 
 		shhwtstamps->hwtstamp =
 			ktime_set((ts.tv_sec & ~(GENMASK(1, 0))) | ts_header >> 30,
@@ -4665,6 +4688,21 @@ static long lan8841_ptp_do_aux_work(struct ptp_clock_info *ptp)
 	return -1;
 }
 
+static void lan8841_ptp_seconds_work(struct work_struct *seconds_work)
+{
+	struct kszphy_ptp_priv *ptp_priv =
+		container_of(seconds_work, struct kszphy_ptp_priv, seconds_work.work);
+	struct timespec64 ts;
+
+	mutex_lock(&ptp_priv->ptp_lock);
+	lan8841_ptp_getseconds(&ptp_priv->ptp_clock_info, &ts);
+	ptp_priv->seconds = ts.tv_sec;
+	mutex_unlock(&ptp_priv->ptp_lock);
+
+	schedule_delayed_work(&ptp_priv->seconds_work,
+			      nsecs_to_jiffies(LAN8841_GET_SEC_LTC_DELAY));
+}
+
 static struct ptp_clock_info lan8841_ptp_clock_info = {
 	.owner		= THIS_MODULE,
 	.name		= "lan8841 ptp",
@@ -4749,6 +4787,8 @@ static int lan8841_probe(struct phy_device *phydev)
 
 	phydev->mii_ts = &ptp_priv->mii_ts;
 
+	INIT_DELAYED_WORK(&ptp_priv->seconds_work, lan8841_ptp_seconds_work);
+
 	return 0;
 }
 
@@ -4758,6 +4798,7 @@ static int lan8841_suspend(struct phy_device *phydev)
 	struct kszphy_ptp_priv *ptp_priv = &priv->ptp_priv;
 
 	ptp_cancel_worker_sync(ptp_priv->ptp_clock);
+	cancel_delayed_work_sync(&ptp_priv->seconds_work);
 
 	return genphy_suspend(phydev);
 }
diff mbox series

Patch

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 0ff4fd9f1a183..28365006b2067 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -3502,6 +3502,9 @@  static int lan8814_probe(struct phy_device *phydev)
 #define LAN8841_PTP_CMD_CTL_PTP_RESET		BIT(0)
 #define LAN8841_PTP_RX_PARSE_CONFIG		368
 #define LAN8841_PTP_TX_PARSE_CONFIG		432
+#define LAN8841_PTP_RX_MODE			381
+#define LAN8841_PTP_INSERT_TS_EN		BIT(0)
+#define LAN8841_PTP_INSERT_TS_32BIT		BIT(1)
 
 static int lan8841_config_init(struct phy_device *phydev)
 {
@@ -3650,68 +3653,18 @@  static void lan8841_ptp_process_tx_ts(struct kszphy_ptp_priv *ptp_priv)
 		lan8814_match_tx_skb(ptp_priv, sec, nsec, seq);
 }
 
-#define LAN8841_PTP_RX_INGRESS_SEC_LO			389
-#define LAN8841_PTP_RX_INGRESS_SEC_HI			388
-#define LAN8841_PTP_RX_INGRESS_NS_LO			387
-#define LAN8841_PTP_RX_INGRESS_NS_HI			386
-#define LAN8841_PTP_RX_INGRESS_NSEC_HI_VALID		BIT(15)
-#define LAN8841_PTP_RX_MSG_HEADER2			391
-
-static struct lan8814_ptp_rx_ts *lan8841_ptp_get_rx_ts(struct kszphy_ptp_priv *ptp_priv)
-{
-	struct phy_device *phydev = ptp_priv->phydev;
-	struct lan8814_ptp_rx_ts *rx_ts;
-	u32 sec, nsec;
-	u16 seq;
-
-	nsec = phy_read_mmd(phydev, 2, LAN8841_PTP_RX_INGRESS_NS_HI);
-	if (!(nsec & LAN8841_PTP_RX_INGRESS_NSEC_HI_VALID))
-		return NULL;
-
-	nsec = ((nsec & 0x3fff) << 16);
-	nsec = nsec | phy_read_mmd(phydev, 2, LAN8841_PTP_RX_INGRESS_NS_LO);
-
-	sec = phy_read_mmd(phydev, 2, LAN8841_PTP_RX_INGRESS_SEC_HI);
-	sec = sec << 16;
-	sec = sec | phy_read_mmd(phydev, 2, LAN8841_PTP_RX_INGRESS_SEC_LO);
-
-	seq = phy_read_mmd(phydev, 2, LAN8841_PTP_RX_MSG_HEADER2);
-
-	rx_ts = kzalloc(sizeof(*rx_ts), GFP_KERNEL);
-	if (!rx_ts)
-		return NULL;
-
-	rx_ts->seconds = sec;
-	rx_ts->nsec = nsec;
-	rx_ts->seq_id = seq;
-
-	return rx_ts;
-}
-
-static void lan8841_ptp_process_rx_ts(struct kszphy_ptp_priv *ptp_priv)
-{
-	struct lan8814_ptp_rx_ts *rx_ts;
-
-	while ((rx_ts = lan8841_ptp_get_rx_ts(ptp_priv)) != NULL)
-		lan8814_match_rx_ts(ptp_priv, rx_ts);
-}
-
 #define LAN8841_PTP_INT_STS			259
 #define LAN8841_PTP_INT_STS_PTP_TX_TS_OVRFL_INT	BIT(13)
 #define LAN8841_PTP_INT_STS_PTP_TX_TS_INT	BIT(12)
-#define LAN8841_PTP_INT_STS_PTP_RX_TS_OVRFL_INT	BIT(9)
-#define LAN8841_PTP_INT_STS_PTP_RX_TS_INT	BIT(8)
 #define LAN8841_PTP_INT_STS_PTP_GPIO_CAP_INT	BIT(2)
 
-static void lan8841_ptp_flush_fifo(struct kszphy_ptp_priv *ptp_priv, bool egress)
+static void lan8841_ptp_flush_fifo(struct kszphy_ptp_priv *ptp_priv)
 {
 	struct phy_device *phydev = ptp_priv->phydev;
 	int i;
 
 	for (i = 0; i < FIFO_SIZE; ++i)
-		phy_read_mmd(phydev, 2,
-			     egress ? LAN8841_PTP_TX_MSG_HEADER2 :
-				      LAN8841_PTP_RX_MSG_HEADER2);
+		phy_read_mmd(phydev, 2, LAN8841_PTP_TX_MSG_HEADER2);
 
 	phy_read_mmd(phydev, 2, LAN8841_PTP_INT_STS);
 }
@@ -3789,23 +3742,17 @@  static void lan8841_handle_ptp_interrupt(struct phy_device *phydev)
 		if (status & LAN8841_PTP_INT_STS_PTP_TX_TS_INT)
 			lan8841_ptp_process_tx_ts(ptp_priv);
 
-		if (status & LAN8841_PTP_INT_STS_PTP_RX_TS_INT)
-			lan8841_ptp_process_rx_ts(ptp_priv);
-
 		if (status & LAN8841_PTP_INT_STS_PTP_GPIO_CAP_INT)
 			lan8841_gpio_process_cap(ptp_priv);
 
 		if (status & LAN8841_PTP_INT_STS_PTP_TX_TS_OVRFL_INT) {
-			lan8841_ptp_flush_fifo(ptp_priv, true);
+			lan8841_ptp_flush_fifo(ptp_priv);
 			skb_queue_purge(&ptp_priv->tx_queue);
 		}
 
-		if (status & LAN8841_PTP_INT_STS_PTP_RX_TS_OVRFL_INT) {
-			lan8841_ptp_flush_fifo(ptp_priv, false);
-			skb_queue_purge(&ptp_priv->rx_queue);
-		}
-
-	} while (status);
+	} while (status & (LAN8841_PTP_INT_STS_PTP_TX_TS_INT |
+			   LAN8841_PTP_INT_STS_PTP_GPIO_CAP_INT |
+			   LAN8841_PTP_INT_STS_PTP_TX_TS_OVRFL_INT));
 }
 
 #define LAN8841_INTS_PTP		BIT(9)
@@ -3869,32 +3816,44 @@  static int lan8841_ts_info(struct mii_timestamper *mii_ts,
 #define LAN8841_PTP_INT_EN			260
 #define LAN8841_PTP_INT_EN_PTP_TX_TS_OVRFL_EN	BIT(13)
 #define LAN8841_PTP_INT_EN_PTP_TX_TS_EN		BIT(12)
-#define LAN8841_PTP_INT_EN_PTP_RX_TS_OVRFL_EN	BIT(9)
-#define LAN8841_PTP_INT_EN_PTP_RX_TS_EN		BIT(8)
 
-static void lan8841_ptp_enable_int(struct kszphy_ptp_priv *ptp_priv,
-				   bool enable)
+static void lan8841_ptp_enable_processing(struct kszphy_ptp_priv *ptp_priv,
+					  bool enable)
 {
 	struct phy_device *phydev = ptp_priv->phydev;
 
-	if (enable)
-		/* Enable interrupts */
+	if (enable) {
+		/* Enable interrupts on the TX side */
 		phy_modify_mmd(phydev, 2, LAN8841_PTP_INT_EN,
 			       LAN8841_PTP_INT_EN_PTP_TX_TS_OVRFL_EN |
-			       LAN8841_PTP_INT_EN_PTP_RX_TS_OVRFL_EN |
-			       LAN8841_PTP_INT_EN_PTP_TX_TS_EN |
-			       LAN8841_PTP_INT_EN_PTP_RX_TS_EN,
+			       LAN8841_PTP_INT_EN_PTP_TX_TS_EN,
 			       LAN8841_PTP_INT_EN_PTP_TX_TS_OVRFL_EN |
-			       LAN8841_PTP_INT_EN_PTP_RX_TS_OVRFL_EN |
-			       LAN8841_PTP_INT_EN_PTP_TX_TS_EN |
-			       LAN8841_PTP_INT_EN_PTP_RX_TS_EN);
-	else
-		/* Disable interrupts */
+			       LAN8841_PTP_INT_EN_PTP_TX_TS_EN);
+
+		/* Enable the modification of the frame on RX side,
+		 * this will add the ns and 2 bits of sec in the reserved field
+		 * of the PTP header
+		 */
+		phy_modify_mmd(phydev, KSZ9131RN_MMD_COMMON_CTRL_REG,
+			       LAN8841_PTP_RX_MODE,
+			       LAN8841_PTP_INSERT_TS_EN |
+			       LAN8841_PTP_INSERT_TS_32BIT,
+			       LAN8841_PTP_INSERT_TS_EN |
+			       LAN8841_PTP_INSERT_TS_32BIT);
+	} else {
+		/* Disable interrupts on the TX side */
 		phy_modify_mmd(phydev, 2, LAN8841_PTP_INT_EN,
 			       LAN8841_PTP_INT_EN_PTP_TX_TS_OVRFL_EN |
-			       LAN8841_PTP_INT_EN_PTP_RX_TS_OVRFL_EN |
-			       LAN8841_PTP_INT_EN_PTP_TX_TS_EN |
-			       LAN8841_PTP_INT_EN_PTP_RX_TS_EN, 0);
+			       LAN8841_PTP_INT_EN_PTP_TX_TS_EN, 0);
+
+		/* Disable modification of the RX frames */
+		phy_modify_mmd(phydev, KSZ9131RN_MMD_COMMON_CTRL_REG,
+			       LAN8841_PTP_RX_MODE,
+			       LAN8841_PTP_INSERT_TS_EN |
+			       LAN8841_PTP_INSERT_TS_32BIT, 0);
+
+		ptp_cancel_worker_sync(ptp_priv->ptp_clock);
+	}
 }
 
 #define LAN8841_PTP_RX_TIMESTAMP_EN		379
@@ -3905,7 +3864,6 @@  static int lan8841_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
 {
 	struct kszphy_ptp_priv *ptp_priv = container_of(mii_ts, struct kszphy_ptp_priv, mii_ts);
 	struct phy_device *phydev = ptp_priv->phydev;
-	struct lan8814_ptp_rx_ts *rx_ts, *tmp;
 	struct hwtstamp_config config;
 	int txcfg = 0, rxcfg = 0;
 	int pkt_ts_enable;
@@ -3969,24 +3927,47 @@  static int lan8841_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
 				PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_ : 0);
 
 	/* Now enable/disable the timestamping */
-	lan8841_ptp_enable_int(ptp_priv,
-			       config.rx_filter != HWTSTAMP_FILTER_NONE);
-
-	/* In case of multiple starts and stops, these needs to be cleared */
-	list_for_each_entry_safe(rx_ts, tmp, &ptp_priv->rx_ts_list, list) {
-		list_del(&rx_ts->list);
-		kfree(rx_ts);
-	}
+	lan8841_ptp_enable_processing(ptp_priv,
+				      config.rx_filter != HWTSTAMP_FILTER_NONE);
 
 	skb_queue_purge(&ptp_priv->rx_queue);
 	skb_queue_purge(&ptp_priv->tx_queue);
 
-	lan8841_ptp_flush_fifo(ptp_priv, false);
-	lan8841_ptp_flush_fifo(ptp_priv, true);
+	lan8841_ptp_flush_fifo(ptp_priv);
 
 	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? -EFAULT : 0;
 }
 
+#define LAN8841_SKB_CB(skb)	((struct lan8841_skb_cb *)(skb)->cb)
+
+struct lan8841_skb_cb {
+	struct ptp_header *header;
+};
+
+static bool lan8841_rxtstamp(struct mii_timestamper *mii_ts,
+			     struct sk_buff *skb, int type)
+{
+	struct kszphy_ptp_priv *ptp_priv =
+			container_of(mii_ts, struct kszphy_ptp_priv, mii_ts);
+	struct ptp_header *header = ptp_parse_header(skb, type);
+
+	if (!header)
+		return false;
+
+	if (ptp_priv->rx_filter == HWTSTAMP_FILTER_NONE ||
+	    type == PTP_CLASS_NONE)
+		return false;
+
+	if ((type & ptp_priv->version) == 0 || (type & ptp_priv->layer) == 0)
+		return false;
+
+	LAN8841_SKB_CB(skb)->header = header;
+	skb_queue_tail(&ptp_priv->rx_queue, skb);
+	ptp_schedule_worker(ptp_priv->ptp_clock, 0);
+
+	return true;
+}
+
 #define LAN8841_EVENT_A		0
 #define LAN8841_EVENT_B		1
 #define LAN8841_PTP_LTC_TARGET_SEC_HI(event)	((event) == LAN8841_EVENT_A ? 278 : 288)
@@ -4127,6 +4108,30 @@  static int lan8841_ptp_gettime64(struct ptp_clock_info *ptp,
 	return 0;
 }
 
+static void lan8841_ptp_getseconds(struct ptp_clock_info *ptp,
+				   struct timespec64 *ts)
+{
+	struct kszphy_ptp_priv *ptp_priv = container_of(ptp, struct kszphy_ptp_priv,
+							ptp_clock_info);
+	struct phy_device *phydev = ptp_priv->phydev;
+	time64_t s;
+
+	mutex_lock(&ptp_priv->ptp_lock);
+	/* Issue the command to read the LTC */
+	phy_write_mmd(phydev, 2, LAN8841_PTP_CMD_CTL,
+		      LAN8841_PTP_CMD_CTL_PTP_LTC_READ);
+
+	/* Read the LTC */
+	s = phy_read_mmd(phydev, 2, LAN8841_PTP_LTC_RD_SEC_HI);
+	s <<= 16;
+	s |= phy_read_mmd(phydev, 2, LAN8841_PTP_LTC_RD_SEC_MID);
+	s <<= 16;
+	s |= phy_read_mmd(phydev, 2, LAN8841_PTP_LTC_RD_SEC_LO);
+	mutex_unlock(&ptp_priv->ptp_lock);
+
+	set_normalized_timespec64(ts, s, 0);
+}
+
 #define LAN8841_PTP_LTC_STEP_ADJ_LO			276
 #define LAN8841_PTP_LTC_STEP_ADJ_HI			275
 #define LAN8841_PTP_LTC_STEP_ADJ_DIR			BIT(15)
@@ -4629,6 +4634,37 @@  static int lan8841_ptp_enable(struct ptp_clock_info *ptp,
 	return 0;
 }
 
+static long lan8841_ptp_do_aux_work(struct ptp_clock_info *ptp)
+{
+	struct kszphy_ptp_priv *ptp_priv = container_of(ptp, struct kszphy_ptp_priv,
+							ptp_clock_info);
+	struct skb_shared_hwtstamps *shhwtstamps;
+	struct timespec64 ts;
+	struct sk_buff *skb;
+	u32 ts_header;
+
+	while ((skb = skb_dequeue(&ptp_priv->rx_queue)) != NULL) {
+		lan8841_ptp_getseconds(ptp, &ts);
+		ts_header = __be32_to_cpu(LAN8841_SKB_CB(skb)->header->reserved2);
+
+		shhwtstamps = skb_hwtstamps(skb);
+		memset(shhwtstamps, 0, sizeof(*shhwtstamps));
+
+		/* Check for any wrap arounds for the second part */
+		if ((ts.tv_sec & GENMASK(1, 0)) < ts_header >> 30)
+			ts.tv_sec -= GENMASK(1, 0) + 1;
+
+		shhwtstamps->hwtstamp =
+			ktime_set((ts.tv_sec & ~(GENMASK(1, 0))) | ts_header >> 30,
+				  ts_header & GENMASK(29, 0));
+		LAN8841_SKB_CB(skb)->header->reserved2 = 0;
+
+		netif_rx(skb);
+	}
+
+	return -1;
+}
+
 static struct ptp_clock_info lan8841_ptp_clock_info = {
 	.owner		= THIS_MODULE,
 	.name		= "lan8841 ptp",
@@ -4639,6 +4675,7 @@  static struct ptp_clock_info lan8841_ptp_clock_info = {
 	.adjfine	= lan8841_ptp_adjfine,
 	.verify         = lan8841_ptp_verify,
 	.enable         = lan8841_ptp_enable,
+	.do_aux_work	= lan8841_ptp_do_aux_work,
 	.n_per_out      = LAN8841_PTP_GPIO_NUM,
 	.n_ext_ts       = LAN8841_PTP_GPIO_NUM,
 	.n_pins         = LAN8841_PTP_GPIO_NUM,
@@ -4705,7 +4742,7 @@  static int lan8841_probe(struct phy_device *phydev)
 	ptp_priv->phydev = phydev;
 	mutex_init(&ptp_priv->ptp_lock);
 
-	ptp_priv->mii_ts.rxtstamp = lan8814_rxtstamp;
+	ptp_priv->mii_ts.rxtstamp = lan8841_rxtstamp;
 	ptp_priv->mii_ts.txtstamp = lan8814_txtstamp;
 	ptp_priv->mii_ts.hwtstamp = lan8841_hwtstamp;
 	ptp_priv->mii_ts.ts_info = lan8841_ts_info;
@@ -4715,6 +4752,16 @@  static int lan8841_probe(struct phy_device *phydev)
 	return 0;
 }
 
+static int lan8841_suspend(struct phy_device *phydev)
+{
+	struct kszphy_priv *priv = phydev->priv;
+	struct kszphy_ptp_priv *ptp_priv = &priv->ptp_priv;
+
+	ptp_cancel_worker_sync(ptp_priv->ptp_clock);
+
+	return genphy_suspend(phydev);
+}
+
 static struct phy_driver ksphy_driver[] = {
 {
 	.phy_id		= PHY_ID_KS8737,
@@ -4938,7 +4985,7 @@  static struct phy_driver ksphy_driver[] = {
 	.get_sset_count = kszphy_get_sset_count,
 	.get_strings	= kszphy_get_strings,
 	.get_stats	= kszphy_get_stats,
-	.suspend	= genphy_suspend,
+	.suspend	= lan8841_suspend,
 	.resume		= genphy_resume,
 	.cable_test_start	= lan8814_cable_test_start,
 	.cable_test_get_status	= ksz886x_cable_test_get_status,