diff mbox series

[net-next,v3,5/6] ptp: Speed up vclock lookup

Message ID 20220501111836.10910-6-gerhard@engleder-embedded.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series ptp: Support hardware clocks with additional free running cycle counter | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 3 of 3 maintainers
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: line length of 88 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Gerhard Engleder May 1, 2022, 11:18 a.m. UTC
ptp_convert_timestamp() is called in the RX path of network messages.
The current implementation takes ~5000ns on 1.2GHz A53. This is too much
for the hot path of packet processing.

Introduce hash table for fast vclock lookup in ptp_convert_timestamp().
The execution time of ptp_convert_timestamp() is reduced to ~700ns on
1.2GHz A53.

Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
---
 drivers/ptp/ptp_private.h |  1 +
 drivers/ptp/ptp_vclock.c  | 66 ++++++++++++++++++++++++++++-----------
 2 files changed, 48 insertions(+), 19 deletions(-)

Comments

Richard Cochran May 5, 2022, 2:07 p.m. UTC | #1
On Sun, May 01, 2022 at 01:18:35PM +0200, Gerhard Engleder wrote:
> ptp_convert_timestamp() is called in the RX path of network messages.
> The current implementation takes ~5000ns on 1.2GHz A53. This is too much
> for the hot path of packet processing.
> 
> Introduce hash table for fast vclock lookup in ptp_convert_timestamp().
> The execution time of ptp_convert_timestamp() is reduced to ~700ns on
> 1.2GHz A53.

50 times faster... nice improvement!
 
> Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>

Acked-by: Richard Cochran <richardcochran@gmail.com>
diff mbox series

Patch

diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h
index ab47c10b3874..77918a2c6701 100644
--- a/drivers/ptp/ptp_private.h
+++ b/drivers/ptp/ptp_private.h
@@ -63,6 +63,7 @@  struct ptp_vclock {
 	struct ptp_clock *pclock;
 	struct ptp_clock_info info;
 	struct ptp_clock *clock;
+	struct hlist_node vclock_hash_node;
 	struct cyclecounter cc;
 	struct timecounter tc;
 	spinlock_t lock;	/* protects tc/cc */
diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c
index c30bcce2bb43..1c0ed4805c0a 100644
--- a/drivers/ptp/ptp_vclock.c
+++ b/drivers/ptp/ptp_vclock.c
@@ -5,6 +5,7 @@ 
  * Copyright 2021 NXP
  */
 #include <linux/slab.h>
+#include <linux/hashtable.h>
 #include "ptp_private.h"
 
 #define PTP_VCLOCK_CC_SHIFT		31
@@ -13,6 +14,32 @@ 
 #define PTP_VCLOCK_FADJ_DENOMINATOR	15625ULL
 #define PTP_VCLOCK_REFRESH_INTERVAL	(HZ * 2)
 
+/* protects vclock_hash addition/deletion */
+static DEFINE_SPINLOCK(vclock_hash_lock);
+
+static DEFINE_READ_MOSTLY_HASHTABLE(vclock_hash, 8);
+
+static void ptp_vclock_hash_add(struct ptp_vclock *vclock)
+{
+	spin_lock(&vclock_hash_lock);
+
+	hlist_add_head_rcu(&vclock->vclock_hash_node,
+			   &vclock_hash[vclock->clock->index % HASH_SIZE(vclock_hash)]);
+
+	spin_unlock(&vclock_hash_lock);
+}
+
+static void ptp_vclock_hash_del(struct ptp_vclock *vclock)
+{
+	spin_lock(&vclock_hash_lock);
+
+	hlist_del_init_rcu(&vclock->vclock_hash_node);
+
+	spin_unlock(&vclock_hash_lock);
+
+	synchronize_rcu();
+}
+
 static int ptp_vclock_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 {
 	struct ptp_vclock *vclock = info_to_vclock(ptp);
@@ -176,6 +203,8 @@  struct ptp_vclock *ptp_vclock_register(struct ptp_clock *pclock)
 	snprintf(vclock->info.name, PTP_CLOCK_NAME_LEN, "ptp%d_virt",
 		 pclock->index);
 
+	INIT_HLIST_NODE(&vclock->vclock_hash_node);
+
 	spin_lock_init(&vclock->lock);
 
 	vclock->clock = ptp_clock_register(&vclock->info, &pclock->dev);
@@ -187,11 +216,15 @@  struct ptp_vclock *ptp_vclock_register(struct ptp_clock *pclock)
 	timecounter_init(&vclock->tc, &vclock->cc, 0);
 	ptp_schedule_worker(vclock->clock, PTP_VCLOCK_REFRESH_INTERVAL);
 
+	ptp_vclock_hash_add(vclock);
+
 	return vclock;
 }
 
 void ptp_vclock_unregister(struct ptp_vclock *vclock)
 {
+	ptp_vclock_hash_del(vclock);
+
 	ptp_clock_unregister(vclock->clock);
 	kfree(vclock);
 }
@@ -234,34 +267,29 @@  EXPORT_SYMBOL(ptp_get_vclocks_index);
 
 ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, int vclock_index)
 {
-	char name[PTP_CLOCK_NAME_LEN] = "";
+	unsigned int hash = vclock_index % HASH_SIZE(vclock_hash);
 	struct ptp_vclock *vclock;
-	struct ptp_clock *ptp;
 	unsigned long flags;
-	struct device *dev;
 	u64 ns;
+	u64 vclock_ns = 0;
 
-	snprintf(name, PTP_CLOCK_NAME_LEN, "ptp%d", vclock_index);
-	dev = class_find_device_by_name(ptp_class, name);
-	if (!dev)
-		return 0;
+	ns = ktime_to_ns(*hwtstamp);
 
-	ptp = dev_get_drvdata(dev);
-	if (!ptp->is_virtual_clock) {
-		put_device(dev);
-		return 0;
-	}
+	rcu_read_lock();
 
-	vclock = info_to_vclock(ptp->info);
+	hlist_for_each_entry_rcu(vclock, &vclock_hash[hash], vclock_hash_node) {
+		if (vclock->clock->index != vclock_index)
+			continue;
 
-	ns = ktime_to_ns(*hwtstamp);
+		spin_lock_irqsave(&vclock->lock, flags);
+		vclock_ns = timecounter_cyc2time(&vclock->tc, ns);
+		spin_unlock_irqrestore(&vclock->lock, flags);
+		break;
+	}
 
-	spin_lock_irqsave(&vclock->lock, flags);
-	ns = timecounter_cyc2time(&vclock->tc, ns);
-	spin_unlock_irqrestore(&vclock->lock, flags);
+	rcu_read_unlock();
 
-	put_device(dev);
-	return ns_to_ktime(ns);
+	return ns_to_ktime(vclock_ns);
 }
 EXPORT_SYMBOL(ptp_convert_timestamp);
 #endif