diff mbox series

[v1] ptp: add ToD device driver for Intel FPGA cards

Message ID 20230313030239.886816-1-tianfei.zhang@intel.com (mailing list archive)
State New
Headers show
Series [v1] ptp: add ToD device driver for Intel FPGA cards | expand

Commit Message

Zhang, Tianfei March 13, 2023, 3:02 a.m. UTC
Adding a DFL (Device Feature List) device driver of ToD device for
Intel FPGA cards.

The Intel FPGA Time of Day(ToD) IP within the FPGA DFL bus is exposed
as PTP Hardware clock(PHC) device to the Linux PTP stack to synchronize
the system clock to its ToD information using phc2sys utility of the
Linux PTP stack. The DFL is a hardware List within FPGA, which defines
a linked list of feature headers within the device MMIO space to provide
an extensible way of adding subdevice features.

Signed-off-by: Raghavendra Khadatare <raghavendrax.anand.khadatare@intel.com>
Signed-off-by: Tianfei Zhang <tianfei.zhang@intel.com>
---
 MAINTAINERS               |   7 +
 drivers/ptp/Kconfig       |  13 ++
 drivers/ptp/Makefile      |   1 +
 drivers/ptp/ptp_dfl_tod.c | 334 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 355 insertions(+)
 create mode 100644 drivers/ptp/ptp_dfl_tod.c


base-commit: eeac8ede17557680855031c6f305ece2378af326

Comments

Marco Pagani March 13, 2023, 5:50 p.m. UTC | #1
On 2023-03-13 04:02, Tianfei Zhang wrote:
> Adding a DFL (Device Feature List) device driver of ToD device for
> Intel FPGA cards.
> 
> The Intel FPGA Time of Day(ToD) IP within the FPGA DFL bus is exposed
> as PTP Hardware clock(PHC) device to the Linux PTP stack to synchronize
> the system clock to its ToD information using phc2sys utility of the
> Linux PTP stack. The DFL is a hardware List within FPGA, which defines
> a linked list of feature headers within the device MMIO space to provide
> an extensible way of adding subdevice features.
> 
> Signed-off-by: Raghavendra Khadatare <raghavendrax.anand.khadatare@intel.com>
> Signed-off-by: Tianfei Zhang <tianfei.zhang@intel.com>
> ---
>  MAINTAINERS               |   7 +
>  drivers/ptp/Kconfig       |  13 ++
>  drivers/ptp/Makefile      |   1 +
>  drivers/ptp/ptp_dfl_tod.c | 334 ++++++++++++++++++++++++++++++++++++++
>  4 files changed, 355 insertions(+)
>  create mode 100644 drivers/ptp/ptp_dfl_tod.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index ec57c42ed544..584979abbd92 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -15623,6 +15623,13 @@ L:	netdev@vger.kernel.org
>  S:	Maintained
>  F:	drivers/ptp/ptp_ocp.c
>  
> +INTEL PTP DFL ToD DRIVER
> +M:	Tianfei Zhang <tianfei.zhang@intel.com>
> +L:	linux-fpga@vger.kernel.org
> +L:	netdev@vger.kernel.org
> +S:	Maintained
> +F:	drivers/ptp/ptp_dfl_tod.c
> +
>  OPENCORES I2C BUS DRIVER
>  M:	Peter Korsgaard <peter@korsgaard.com>
>  M:	Andrew Lunn <andrew@lunn.ch>
> diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
> index fe4971b65c64..e0d6f136ee46 100644
> --- a/drivers/ptp/Kconfig
> +++ b/drivers/ptp/Kconfig
> @@ -186,4 +186,17 @@ config PTP_1588_CLOCK_OCP
>  
>  	  More information is available at http://www.timingcard.com/
>  
> +config PTP_DFL_TOD
> +	tristate "FPGA DFL ToD Driver"
> +	depends on FPGA_DFL
> +	help
> +	  The DFL (Device Feature List) device driver for the Intel ToD
> +	  (Time-of-Day) device in FPGA card. The ToD IP within the FPGA
> +	  is exposed as PTP Hardware Clock (PHC) device to the Linux PTP
> +	  stack to synchronize the system clock to its ToD information
> +	  using phc2sys utility of the Linux PTP stack.
> +
> +	  To compile this driver as a module, choose M here: the module
> +	  will be called ptp_dfl_tod.
> +
>  endmenu
> diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile
> index 28a6fe342d3e..553f18bf3c83 100644
> --- a/drivers/ptp/Makefile
> +++ b/drivers/ptp/Makefile
> @@ -18,3 +18,4 @@ obj-$(CONFIG_PTP_1588_CLOCK_IDTCM)	+= ptp_clockmatrix.o
>  obj-$(CONFIG_PTP_1588_CLOCK_IDT82P33)	+= ptp_idt82p33.o
>  obj-$(CONFIG_PTP_1588_CLOCK_VMW)	+= ptp_vmw.o
>  obj-$(CONFIG_PTP_1588_CLOCK_OCP)	+= ptp_ocp.o
> +obj-$(CONFIG_PTP_DFL_TOD)		+= ptp_dfl_tod.o
> diff --git a/drivers/ptp/ptp_dfl_tod.c b/drivers/ptp/ptp_dfl_tod.c
> new file mode 100644
> index 000000000000..d9fbdfc53bd8
> --- /dev/null
> +++ b/drivers/ptp/ptp_dfl_tod.c
> @@ -0,0 +1,334 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * DFL device driver for Time-of-Day (ToD) private feature
> + *
> + * Copyright (C) 2023 Intel Corporation
> + */
> +
> +#include <linux/bitfield.h>
> +#include <linux/delay.h>
> +#include <linux/dfl.h>
> +#include <linux/gcd.h>
> +#include <linux/iopoll.h>
> +#include <linux/module.h>
> +#include <linux/ptp_clock_kernel.h>
> +#include <linux/spinlock.h>
> +#include <linux/units.h>
> +
> +#define FME_FEATURE_ID_TOD		0x22
> +
> +/* ToD clock register space. */
> +#define TOD_CLK_FREQ			0x038
> +
> +/*
> + * The read sequence of ToD timestamp registers: TOD_NANOSEC, TOD_SECONDSL and
> + * TOD_SECONDSH, because there is a hardware snapshot whenever the TOD_NANOSEC
> + * register is read.
> + *
> + * The ToD IP requires writing registers in the reverse order to the read sequence.
> + * The timestamp is corrected when the TOD_NANOSEC register is written, so the
> + * sequence of write TOD registers: TOD_SECONDSH, TOD_SECONDSL and TOD_NANOSEC.
> + */
> +#define TOD_SECONDSH			0x100
> +#define TOD_SECONDSL			0x104
> +#define TOD_NANOSEC			0x108
> +#define TOD_PERIOD			0x110
> +#define TOD_ADJUST_PERIOD		0x114
> +#define TOD_ADJUST_COUNT		0x118
> +#define TOD_DRIFT_ADJUST		0x11c
> +#define TOD_DRIFT_ADJUST_RATE		0x120
> +#define PERIOD_FRAC_OFFSET		16
> +#define SECONDS_MSB			GENMASK_ULL(47, 32)
> +#define SECONDS_LSB			GENMASK_ULL(31, 0)
> +#define TOD_SECONDSH_SEC_MSB		GENMASK_ULL(15, 0)
> +
> +#define CAL_SECONDS(m, l)		((FIELD_GET(TOD_SECONDSH_SEC_MSB, (m)) << 32) | (l))
> +
> +#define TOD_PERIOD_MASK		GENMASK_ULL(19, 0)
> +#define TOD_PERIOD_MAX			FIELD_MAX(TOD_PERIOD_MASK)
> +#define TOD_PERIOD_MIN			0
> +#define TOD_DRIFT_ADJUST_MASK		GENMASK_ULL(15, 0)
> +#define TOD_DRIFT_ADJUST_FNS_MAX	FIELD_MAX(TOD_DRIFT_ADJUST_MASK)
> +#define TOD_DRIFT_ADJUST_RATE_MAX	TOD_DRIFT_ADJUST_FNS_MAX
> +#define TOD_ADJUST_COUNT_MASK		GENMASK_ULL(19, 0)
> +#define TOD_ADJUST_COUNT_MAX		FIELD_MAX(TOD_ADJUST_COUNT_MASK)
> +#define TOD_ADJUST_INTERVAL_US		1000
> +#define TOD_ADJUST_MS			\
> +		(((TOD_PERIOD_MAX >> 16) + 1) * (TOD_ADJUST_COUNT_MAX + 1))
> +#define TOD_ADJUST_MS_MAX		(TOD_ADJUST_MS / MICRO)
> +#define TOD_ADJUST_MAX_US		(TOD_ADJUST_MS_MAX * USEC_PER_MSEC)
> +#define TOD_MAX_ADJ			(500 * MEGA)
> +
> +struct dfl_tod {
> +	struct ptp_clock_info ptp_clock_ops;
> +	struct device *dev;
> +	struct ptp_clock *ptp_clock;
> +
> +	/* ToD Clock address space */
> +	void __iomem *tod_ctrl;
> +
> +	/* ToD clock registers protection */
> +	spinlock_t tod_lock;
> +};
> +
> +/*
> + * A fine ToD HW clock offset adjustment. To perform the fine offset adjustment, the
> + * adjust_period and adjust_count argument are used to update the TOD_ADJUST_PERIOD
> + * and TOD_ADJUST_COUNT register for in hardware. The dt->tod_lock spinlock must be
> + * held when calling this function.
> + */

Calling this function while holding the dt->tod_lock spinlock might cause an error
since read_poll_timeout() can sleep. If it is required to use a spinlock, there is
the readl_poll_timeout_atomic() function, which can be called from atomic context.
However, in this case, it is probably better to use a mutex instead of a spinlock
since the delay appears to be 1 ms.

> +static int fine_adjust_tod_clock(struct dfl_tod *dt, u32 adjust_period,
> +				 u32 adjust_count)
> +{
> +	void __iomem *base = dt->tod_ctrl;
> +	u32 val;
> +
> +	writel(adjust_period, base + TOD_ADJUST_PERIOD);
> +	writel(adjust_count, base + TOD_ADJUST_COUNT);
> +
> +	/* Wait for present offset adjustment update to complete */
> +	return readl_poll_timeout(base + TOD_ADJUST_COUNT, val, !val, TOD_ADJUST_INTERVAL_US,
> +				  TOD_ADJUST_MAX_US);
> +}
> +
> +/*
> + * A coarse ToD HW clock offset adjustment.
> + * The coarse time adjustment performs by adding or subtracting the delta value
> + * from the current ToD HW clock time.
> + */
> +static int coarse_adjust_tod_clock(struct dfl_tod *dt, s64 delta)
> +{
> +	u32 seconds_msb, seconds_lsb, nanosec;
> +	void __iomem *base = dt->tod_ctrl;
> +	u64 seconds, now;
> +
> +	if (delta == 0)
> +		return 0;
> +
> +	nanosec = readl(base + TOD_NANOSEC);
> +	seconds_lsb = readl(base + TOD_SECONDSL);
> +	seconds_msb = readl(base + TOD_SECONDSH);
> +
> +	/* Calculate new time */
> +	seconds = CAL_SECONDS(seconds_msb, seconds_lsb);
> +	now = seconds * NSEC_PER_SEC + nanosec + delta;
> +
> +	seconds = div_u64_rem(now, NSEC_PER_SEC, &nanosec);
> +	seconds_msb = FIELD_GET(SECONDS_MSB, seconds);
> +	seconds_lsb = FIELD_GET(SECONDS_LSB, seconds);
> +
> +	writel(seconds_msb, base + TOD_SECONDSH);
> +	writel(seconds_lsb, base + TOD_SECONDSL);
> +	writel(nanosec, base + TOD_NANOSEC);
> +
> +	return 0;
> +}
> +
> +static int dfl_tod_adjust_fine(struct ptp_clock_info *ptp, long scaled_ppm)
> +{
> +	struct dfl_tod *dt = container_of(ptp, struct dfl_tod, ptp_clock_ops);
> +	u32 tod_period, tod_rem, tod_drift_adjust_fns, tod_drift_adjust_rate;
> +	void __iomem *base = dt->tod_ctrl;
> +	unsigned long flags, rate;
> +	u64 ppb;
> +
> +	/* Get the clock rate from clock frequency register offset */
> +	rate = readl(base + TOD_CLK_FREQ);
> +
> +	/* add GIGA as nominal ppb */
> +	ppb = scaled_ppm_to_ppb(scaled_ppm) + GIGA;
> +
> +	tod_period = div_u64_rem(ppb << PERIOD_FRAC_OFFSET, rate, &tod_rem);
> +	if (tod_period > TOD_PERIOD_MAX)
> +		return -ERANGE;
> +
> +	/*
> +	 * The drift of ToD adjusted periodically by adding a drift_adjust_fns
> +	 * correction value every drift_adjust_rate count of clock cycles.
> +	 */
> +	tod_drift_adjust_fns = tod_rem / gcd(tod_rem, rate);
> +	tod_drift_adjust_rate = rate / gcd(tod_rem, rate);
> +
> +	while ((tod_drift_adjust_fns > TOD_DRIFT_ADJUST_FNS_MAX) ||
> +	       (tod_drift_adjust_rate > TOD_DRIFT_ADJUST_RATE_MAX)) {
> +		tod_drift_adjust_fns >>= 1;
> +		tod_drift_adjust_rate >>= 1;
> +	}

Why both tod_drift_adjust_fns and tod_drift_adjust_rate are iteratively divided
if one of the two exceeds the maximum value? Wouldn't it be more accurate to set
each of them to the max if they exceeded their respective maximum value?

> +
> +	if (tod_drift_adjust_fns == 0)
> +		tod_drift_adjust_rate = 0;
> +
> +	spin_lock_irqsave(&dt->tod_lock, flags);
> +	writel(tod_period, base + TOD_PERIOD);
> +	writel(0, base + TOD_ADJUST_PERIOD);
> +	writel(0, base + TOD_ADJUST_COUNT);
> +	writel(tod_drift_adjust_fns, base + TOD_DRIFT_ADJUST);
> +	writel(tod_drift_adjust_rate, base + TOD_DRIFT_ADJUST_RATE);
> +	spin_unlock_irqrestore(&dt->tod_lock, flags);
> +
> +	return 0;
> +}
> +
> +static int dfl_tod_adjust_time(struct ptp_clock_info *ptp, s64 delta)
> +{
> +	struct dfl_tod *dt = container_of(ptp, struct dfl_tod, ptp_clock_ops);
> +	u32 period, diff, rem, rem_period, adj_period;
> +	void __iomem *base = dt->tod_ctrl;
> +	unsigned long flags;
> +	bool neg_adj;
> +	u64 count;
> +	int ret;
> +
> +	neg_adj = delta < 0;
> +	if (neg_adj)
> +		delta = -delta;
> +
> +	spin_lock_irqsave(&dt->tod_lock, flags);
> +
> +	/*
> +	 * Get the maximum possible value of the Period register offset
> +	 * adjustment in nanoseconds scale. This depends on the current
> +	 * Period register setting and the maximum and minimum possible
> +	 * values of the Period register.
> +	 */
> +	period = readl(base + TOD_PERIOD);
> +
> +	if (neg_adj) {
> +		diff = (period - TOD_PERIOD_MIN) >> PERIOD_FRAC_OFFSET;
> +		adj_period = period - (diff << PERIOD_FRAC_OFFSET);
> +		rem_period = period - (rem << PERIOD_FRAC_OFFSET);

rem seems to be uninitialized here.

> +	} else {
> +		diff = (TOD_PERIOD_MAX - period) >> PERIOD_FRAC_OFFSET;
> +		adj_period = period + (diff << PERIOD_FRAC_OFFSET);
> +		rem_period = period + (rem << PERIOD_FRAC_OFFSET);
> +	}
> +
> +	ret = 0;
> +
> +	/* Find the number of cycles required for the time adjustment */
> +	count = div_u64_rem(delta, diff, &rem);
> +
> +	if (count > TOD_ADJUST_COUNT_MAX) {
> +		ret = coarse_adjust_tod_clock(dt, delta);
> +	} else {
> +		/* Adjust the period by count cycles to adjust the time */
> +		if (count)
> +			ret = fine_adjust_tod_clock(dt, adj_period, count);
> +
> +		/* If there is a remainder, adjust the period for an additional cycle */
> +		if (rem)
> +			ret = fine_adjust_tod_clock(dt, rem_period, 1);
> +	}
> +
> +	spin_unlock_irqrestore(&dt->tod_lock, flags);
> +
> +	return ret;
> +}
> +
> +static int dfl_tod_get_timex(struct ptp_clock_info *ptp, struct timespec64 *ts,
> +			     struct ptp_system_timestamp *sts)
> +{
> +	struct dfl_tod *dt = container_of(ptp, struct dfl_tod, ptp_clock_ops);
> +	u32 seconds_msb, seconds_lsb, nanosec;
> +	void __iomem *base = dt->tod_ctrl;
> +	unsigned long flags;
> +	u64 seconds;
> +
> +	spin_lock_irqsave(&dt->tod_lock, flags);
> +	ptp_read_system_prets(sts);
> +	nanosec = readl(base + TOD_NANOSEC);
> +	seconds_lsb = readl(base + TOD_SECONDSL);
> +	seconds_msb = readl(base + TOD_SECONDSH);
> +	ptp_read_system_postts(sts);
> +	spin_unlock_irqrestore(&dt->tod_lock, flags);
> +
> +	seconds = CAL_SECONDS(seconds_msb, seconds_lsb);
> +
> +	ts->tv_nsec = nanosec;
> +	ts->tv_sec = seconds;
> +
> +	return 0;
> +}
> +
> +static int dfl_tod_set_time(struct ptp_clock_info *ptp,
> +			    const struct timespec64 *ts)
> +{
> +	struct dfl_tod *dt = container_of(ptp, struct dfl_tod, ptp_clock_ops);
> +	u32 seconds_msb = FIELD_GET(SECONDS_MSB, ts->tv_sec);
> +	u32 seconds_lsb = FIELD_GET(SECONDS_LSB, ts->tv_sec);
> +	u32 nanosec = FIELD_GET(SECONDS_LSB, ts->tv_nsec);
> +	void __iomem *base = dt->tod_ctrl;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&dt->tod_lock, flags);
> +	writel(seconds_msb, base + TOD_SECONDSH);
> +	writel(seconds_lsb, base + TOD_SECONDSL);
> +	writel(nanosec, base + TOD_NANOSEC);
> +	spin_unlock_irqrestore(&dt->tod_lock, flags);
> +
> +	return 0;
> +}
> +
> +static struct ptp_clock_info dfl_tod_clock_ops = {
> +	.owner = THIS_MODULE,
> +	.name = "dfl_tod",
> +	.max_adj = TOD_MAX_ADJ,
> +	.adjfine = dfl_tod_adjust_fine,
> +	.adjtime = dfl_tod_adjust_time,
> +	.gettimex64 = dfl_tod_get_timex,
> +	.settime64 = dfl_tod_set_time,
> +};
> +
> +static int dfl_tod_probe(struct dfl_device *ddev)
> +{
> +	struct device *dev = &ddev->dev;
> +	struct dfl_tod *dt;
> +
> +	dt = devm_kzalloc(dev, sizeof(*dt), GFP_KERNEL);
> +	if (!dt)
> +		return -ENOMEM;
> +
> +	dt->tod_ctrl = devm_ioremap_resource(dev, &ddev->mmio_res);
> +	if (IS_ERR(dt->tod_ctrl))
> +		return PTR_ERR(dt->tod_ctrl);
> +
> +	dt->dev = dev;
> +	spin_lock_init(&dt->tod_lock);
> +	dev_set_drvdata(dev, dt);
> +
> +	dt->ptp_clock_ops = dfl_tod_clock_ops;
> +
> +	dt->ptp_clock = ptp_clock_register(&dt->ptp_clock_ops, dev);
> +	if (IS_ERR(dt->ptp_clock))
> +		return dev_err_probe(dt->dev, PTR_ERR(dt->ptp_clock),
> +				     "Unable to register PTP clock\n");
> +
> +	return 0;
> +}
> +
> +static void dfl_tod_remove(struct dfl_device *ddev)
> +{
> +	struct dfl_tod *dt = dev_get_drvdata(&ddev->dev);
> +
> +	ptp_clock_unregister(dt->ptp_clock);
> +}
> +
> +static const struct dfl_device_id dfl_tod_ids[] = {
> +	{ FME_ID, FME_FEATURE_ID_TOD },
> +	{ }
> +};
> +MODULE_DEVICE_TABLE(dfl, dfl_tod_ids);
> +
> +static struct dfl_driver dfl_tod_driver = {
> +	.drv = {
> +		.name = "dfl-tod",
> +	},
> +	.id_table = dfl_tod_ids,
> +	.probe = dfl_tod_probe,
> +	.remove = dfl_tod_remove,
> +};
> +module_dfl_driver(dfl_tod_driver);
> +
> +MODULE_DESCRIPTION("FPGA DFL ToD driver");
> +MODULE_AUTHOR("Intel Corporation");
> +MODULE_LICENSE("GPL");
> 
> base-commit: eeac8ede17557680855031c6f305ece2378af326

Thanks,
Marco
Richard Cochran March 13, 2023, 6:49 p.m. UTC | #2
On Sun, Mar 12, 2023 at 11:02:39PM -0400, Tianfei Zhang wrote:


> +static int dfl_tod_probe(struct dfl_device *ddev)
> +{
> +	struct device *dev = &ddev->dev;
> +	struct dfl_tod *dt;
> +
> +	dt = devm_kzalloc(dev, sizeof(*dt), GFP_KERNEL);
> +	if (!dt)
> +		return -ENOMEM;
> +
> +	dt->tod_ctrl = devm_ioremap_resource(dev, &ddev->mmio_res);
> +	if (IS_ERR(dt->tod_ctrl))
> +		return PTR_ERR(dt->tod_ctrl);
> +
> +	dt->dev = dev;
> +	spin_lock_init(&dt->tod_lock);
> +	dev_set_drvdata(dev, dt);
> +
> +	dt->ptp_clock_ops = dfl_tod_clock_ops;
> +
> +	dt->ptp_clock = ptp_clock_register(&dt->ptp_clock_ops, dev);
> +	if (IS_ERR(dt->ptp_clock))
> +		return dev_err_probe(dt->dev, PTR_ERR(dt->ptp_clock),
> +				     "Unable to register PTP clock\n");

Need to handle NULL as well...

/**
 * ptp_clock_register() - register a PTP hardware clock driver
 *
 * @info:   Structure describing the new clock.
 * @parent: Pointer to the parent device of the new clock.
 *
 * Returns a valid pointer on success or PTR_ERR on failure.  If PHC
 * support is missing at the configuration level, this function
 * returns NULL, and drivers are expected to gracefully handle that
 * case separately.
 */

Thanks,
Richard
Zhang, Tianfei March 14, 2023, 7:16 a.m. UTC | #3
> -----Original Message-----
> From: Richard Cochran <richardcochran@gmail.com>
> Sent: Tuesday, March 14, 2023 2:50 AM
> To: Zhang, Tianfei <tianfei.zhang@intel.com>
> Cc: netdev@vger.kernel.org; linux-fpga@vger.kernel.org;
> ilpo.jarvinen@linux.intel.com; andriy.shevchenko@linux.intel.com; Weight, Russell H
> <russell.h.weight@intel.com>; matthew.gerlach@linux.intel.com; pierre-
> louis.bossart@linux.intel.com; Gomes, Vinicius <vinicius.gomes@intel.com>;
> Khadatare, RaghavendraX Anand <raghavendrax.anand.khadatare@intel.com>
> Subject: Re: [PATCH v1] ptp: add ToD device driver for Intel FPGA cards
> 
> On Sun, Mar 12, 2023 at 11:02:39PM -0400, Tianfei Zhang wrote:
> 
> 
> > +static int dfl_tod_probe(struct dfl_device *ddev) {
> > +	struct device *dev = &ddev->dev;
> > +	struct dfl_tod *dt;
> > +
> > +	dt = devm_kzalloc(dev, sizeof(*dt), GFP_KERNEL);
> > +	if (!dt)
> > +		return -ENOMEM;
> > +
> > +	dt->tod_ctrl = devm_ioremap_resource(dev, &ddev->mmio_res);
> > +	if (IS_ERR(dt->tod_ctrl))
> > +		return PTR_ERR(dt->tod_ctrl);
> > +
> > +	dt->dev = dev;
> > +	spin_lock_init(&dt->tod_lock);
> > +	dev_set_drvdata(dev, dt);
> > +
> > +	dt->ptp_clock_ops = dfl_tod_clock_ops;
> > +
> > +	dt->ptp_clock = ptp_clock_register(&dt->ptp_clock_ops, dev);
> > +	if (IS_ERR(dt->ptp_clock))
> > +		return dev_err_probe(dt->dev, PTR_ERR(dt->ptp_clock),
> > +				     "Unable to register PTP clock\n");
> 
> Need to handle NULL as well...

It looks like that it doesn't need check NULL for ptp_clock_register(), it handle the NULL case internally and return ERR_PTR(-ENOMEM).

struct ptp_clock *ptp_clock_register()
{
             err = -ENOMEM;
              ptp = kzalloc(sizeof(struct ptp_clock), GFP_KERNEL);
	if (ptp == NULL)
		goto no_memory;

              ...
           
            return ptp;

no_memory:
	return ERR_PTR(err);
}
Andy Shevchenko March 14, 2023, 10:47 a.m. UTC | #4
On Mon, Mar 13, 2023 at 11:49:53AM -0700, Richard Cochran wrote:
> On Sun, Mar 12, 2023 at 11:02:39PM -0400, Tianfei Zhang wrote:

...

> > +	dt->ptp_clock = ptp_clock_register(&dt->ptp_clock_ops, dev);
> > +	if (IS_ERR(dt->ptp_clock))
> > +		return dev_err_probe(dt->dev, PTR_ERR(dt->ptp_clock),
> > +				     "Unable to register PTP clock\n");
> 
> Need to handle NULL as well...
> 
> /**
>  * ptp_clock_register() - register a PTP hardware clock driver
>  *
>  * @info:   Structure describing the new clock.
>  * @parent: Pointer to the parent device of the new clock.
>  *
>  * Returns a valid pointer on success or PTR_ERR on failure.  If PHC
>  * support is missing at the configuration level, this function
>  * returns NULL, and drivers are expected to gracefully handle that
>  * case separately.
>  */

I'm wondering why.

The semantics of the above is similar to gpiod_get_optional() and since NULL
is a valid return in such cases, the PTP has to handle this transparently to
the user. Otherwise it's badly designed API which has to be fixed.

TL;DR: If I'm mistaken, I would like to know why.
Richard Cochran March 14, 2023, 7:46 p.m. UTC | #5
On Tue, Mar 14, 2023 at 12:47:03PM +0200, Andy Shevchenko wrote:
> The semantics of the above is similar to gpiod_get_optional() and since NULL
> is a valid return in such cases, the PTP has to handle this transparently to
> the user. Otherwise it's badly designed API which has to be fixed.

Does it now?  Whatever.

> TL;DR: If I'm mistaken, I would like to know why.

git log.  git blame.

Get to know the tools of trade.

Thanks,
Richard
Richard Cochran March 14, 2023, 8:01 p.m. UTC | #6
On Tue, Mar 14, 2023 at 07:16:28AM +0000, Zhang, Tianfei wrote:

> > Need to handle NULL as well...
> 
> It looks like that it doesn't need check NULL for ptp_clock_register(), it handle the NULL case internally and return ERR_PTR(-ENOMEM).

You aren't looking in the right place.

Go back and READ the KernelDoc that I posted and you cut from your reply.

Thanks,
Richard
Zhang, Tianfei March 15, 2023, 2:59 a.m. UTC | #7
> -----Original Message-----
> From: Marco Pagani <marpagan@redhat.com>
> Sent: Tuesday, March 14, 2023 1:50 AM
> To: Zhang, Tianfei <tianfei.zhang@intel.com>
> Cc: ilpo.jarvinen@linux.intel.com; andriy.shevchenko@linux.intel.com; Weight,
> Russell H <russell.h.weight@intel.com>; matthew.gerlach@linux.intel.com; pierre-
> louis.bossart@linux.intel.com; Gomes, Vinicius <vinicius.gomes@intel.com>;
> Khadatare, RaghavendraX Anand <raghavendrax.anand.khadatare@intel.com>;
> netdev@vger.kernel.org; linux-fpga@vger.kernel.org; richardcochran@gmail.com
> Subject: Re: [PATCH v1] ptp: add ToD device driver for Intel FPGA cards
> 
> 
> 
> On 2023-03-13 04:02, Tianfei Zhang wrote:
> > Adding a DFL (Device Feature List) device driver of ToD device for
> > Intel FPGA cards.
> >
> > The Intel FPGA Time of Day(ToD) IP within the FPGA DFL bus is exposed
> > as PTP Hardware clock(PHC) device to the Linux PTP stack to
> > synchronize the system clock to its ToD information using phc2sys
> > utility of the Linux PTP stack. The DFL is a hardware List within
> > FPGA, which defines a linked list of feature headers within the device
> > MMIO space to provide an extensible way of adding subdevice features.
> >
> > Signed-off-by: Raghavendra Khadatare
> > <raghavendrax.anand.khadatare@intel.com>
> > Signed-off-by: Tianfei Zhang <tianfei.zhang@intel.com>
> > ---
> >  MAINTAINERS               |   7 +
> >  drivers/ptp/Kconfig       |  13 ++
> >  drivers/ptp/Makefile      |   1 +
> >  drivers/ptp/ptp_dfl_tod.c | 334
> > ++++++++++++++++++++++++++++++++++++++
> >  4 files changed, 355 insertions(+)
> >  create mode 100644 drivers/ptp/ptp_dfl_tod.c
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS index
> > ec57c42ed544..584979abbd92 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -15623,6 +15623,13 @@ L:	netdev@vger.kernel.org
> >  S:	Maintained
> >  F:	drivers/ptp/ptp_ocp.c
> >
> > +INTEL PTP DFL ToD DRIVER
> > +M:	Tianfei Zhang <tianfei.zhang@intel.com>
> > +L:	linux-fpga@vger.kernel.org
> > +L:	netdev@vger.kernel.org
> > +S:	Maintained
> > +F:	drivers/ptp/ptp_dfl_tod.c
> > +
> >  OPENCORES I2C BUS DRIVER
> >  M:	Peter Korsgaard <peter@korsgaard.com>
> >  M:	Andrew Lunn <andrew@lunn.ch>
> > diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index
> > fe4971b65c64..e0d6f136ee46 100644
> > --- a/drivers/ptp/Kconfig
> > +++ b/drivers/ptp/Kconfig
> > @@ -186,4 +186,17 @@ config PTP_1588_CLOCK_OCP
> >
> >  	  More information is available at http://www.timingcard.com/
> >
> > +config PTP_DFL_TOD
> > +	tristate "FPGA DFL ToD Driver"
> > +	depends on FPGA_DFL
> > +	help
> > +	  The DFL (Device Feature List) device driver for the Intel ToD
> > +	  (Time-of-Day) device in FPGA card. The ToD IP within the FPGA
> > +	  is exposed as PTP Hardware Clock (PHC) device to the Linux PTP
> > +	  stack to synchronize the system clock to its ToD information
> > +	  using phc2sys utility of the Linux PTP stack.
> > +
> > +	  To compile this driver as a module, choose M here: the module
> > +	  will be called ptp_dfl_tod.
> > +
> >  endmenu
> > diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile index
> > 28a6fe342d3e..553f18bf3c83 100644
> > --- a/drivers/ptp/Makefile
> > +++ b/drivers/ptp/Makefile
> > @@ -18,3 +18,4 @@ obj-$(CONFIG_PTP_1588_CLOCK_IDTCM)	+=
> ptp_clockmatrix.o
> >  obj-$(CONFIG_PTP_1588_CLOCK_IDT82P33)	+= ptp_idt82p33.o
> >  obj-$(CONFIG_PTP_1588_CLOCK_VMW)	+= ptp_vmw.o
> >  obj-$(CONFIG_PTP_1588_CLOCK_OCP)	+= ptp_ocp.o
> > +obj-$(CONFIG_PTP_DFL_TOD)		+= ptp_dfl_tod.o
> > diff --git a/drivers/ptp/ptp_dfl_tod.c b/drivers/ptp/ptp_dfl_tod.c new
> > file mode 100644 index 000000000000..d9fbdfc53bd8
> > --- /dev/null
> > +++ b/drivers/ptp/ptp_dfl_tod.c
> > @@ -0,0 +1,334 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * DFL device driver for Time-of-Day (ToD) private feature
> > + *
> > + * Copyright (C) 2023 Intel Corporation  */
> > +
> > +#include <linux/bitfield.h>
> > +#include <linux/delay.h>
> > +#include <linux/dfl.h>
> > +#include <linux/gcd.h>
> > +#include <linux/iopoll.h>
> > +#include <linux/module.h>
> > +#include <linux/ptp_clock_kernel.h>
> > +#include <linux/spinlock.h>
> > +#include <linux/units.h>
> > +
> > +#define FME_FEATURE_ID_TOD		0x22
> > +
> > +/* ToD clock register space. */
> > +#define TOD_CLK_FREQ			0x038
> > +
> > +/*
> > + * The read sequence of ToD timestamp registers: TOD_NANOSEC,
> > +TOD_SECONDSL and
> > + * TOD_SECONDSH, because there is a hardware snapshot whenever the
> > +TOD_NANOSEC
> > + * register is read.
> > + *
> > + * The ToD IP requires writing registers in the reverse order to the read sequence.
> > + * The timestamp is corrected when the TOD_NANOSEC register is
> > +written, so the
> > + * sequence of write TOD registers: TOD_SECONDSH, TOD_SECONDSL and
> TOD_NANOSEC.
> > + */
> > +#define TOD_SECONDSH			0x100
> > +#define TOD_SECONDSL			0x104
> > +#define TOD_NANOSEC			0x108
> > +#define TOD_PERIOD			0x110
> > +#define TOD_ADJUST_PERIOD		0x114
> > +#define TOD_ADJUST_COUNT		0x118
> > +#define TOD_DRIFT_ADJUST		0x11c
> > +#define TOD_DRIFT_ADJUST_RATE		0x120
> > +#define PERIOD_FRAC_OFFSET		16
> > +#define SECONDS_MSB			GENMASK_ULL(47, 32)
> > +#define SECONDS_LSB			GENMASK_ULL(31, 0)
> > +#define TOD_SECONDSH_SEC_MSB		GENMASK_ULL(15, 0)
> > +
> > +#define CAL_SECONDS(m, l)		((FIELD_GET(TOD_SECONDSH_SEC_MSB,
> (m)) << 32) | (l))
> > +
> > +#define TOD_PERIOD_MASK		GENMASK_ULL(19, 0)
> > +#define TOD_PERIOD_MAX			FIELD_MAX(TOD_PERIOD_MASK)
> > +#define TOD_PERIOD_MIN			0
> > +#define TOD_DRIFT_ADJUST_MASK		GENMASK_ULL(15, 0)
> > +#define TOD_DRIFT_ADJUST_FNS_MAX
> 	FIELD_MAX(TOD_DRIFT_ADJUST_MASK)
> > +#define TOD_DRIFT_ADJUST_RATE_MAX	TOD_DRIFT_ADJUST_FNS_MAX
> > +#define TOD_ADJUST_COUNT_MASK		GENMASK_ULL(19, 0)
> > +#define TOD_ADJUST_COUNT_MAX
> 	FIELD_MAX(TOD_ADJUST_COUNT_MASK)
> > +#define TOD_ADJUST_INTERVAL_US		1000
> > +#define TOD_ADJUST_MS			\
> > +		(((TOD_PERIOD_MAX >> 16) + 1) * (TOD_ADJUST_COUNT_MAX + 1))
> > +#define TOD_ADJUST_MS_MAX		(TOD_ADJUST_MS / MICRO)
> > +#define TOD_ADJUST_MAX_US		(TOD_ADJUST_MS_MAX *
> USEC_PER_MSEC)
> > +#define TOD_MAX_ADJ			(500 * MEGA)
> > +
> > +struct dfl_tod {
> > +	struct ptp_clock_info ptp_clock_ops;
> > +	struct device *dev;
> > +	struct ptp_clock *ptp_clock;
> > +
> > +	/* ToD Clock address space */
> > +	void __iomem *tod_ctrl;
> > +
> > +	/* ToD clock registers protection */
> > +	spinlock_t tod_lock;
> > +};
> > +
> > +/*
> > + * A fine ToD HW clock offset adjustment. To perform the fine offset
> > +adjustment, the
> > + * adjust_period and adjust_count argument are used to update the
> > +TOD_ADJUST_PERIOD
> > + * and TOD_ADJUST_COUNT register for in hardware. The dt->tod_lock
> > +spinlock must be
> > + * held when calling this function.
> > + */
> 
> Calling this function while holding the dt->tod_lock spinlock might cause an error
> since read_poll_timeout() can sleep. If it is required to use a spinlock, there is the
> readl_poll_timeout_atomic() function, which can be called from atomic context.
> However, in this case, it is probably better to use a mutex instead of a spinlock since
> the delay appears to be 1 ms.

To program the TOD registers needs to be done without interruption to ensure the correct values are programmed,
So I think using readl_poll_timeout_atomic() function here is better. This delay should be very faster, the 10 us interval delay
will be better.

> 
> > +static int fine_adjust_tod_clock(struct dfl_tod *dt, u32 adjust_period,
> > +				 u32 adjust_count)
> > +{
> > +	void __iomem *base = dt->tod_ctrl;
> > +	u32 val;
> > +
> > +	writel(adjust_period, base + TOD_ADJUST_PERIOD);
> > +	writel(adjust_count, base + TOD_ADJUST_COUNT);
> > +
> > +	/* Wait for present offset adjustment update to complete */
> > +	return readl_poll_timeout(base + TOD_ADJUST_COUNT, val, !val,
> TOD_ADJUST_INTERVAL_US,
> > +				  TOD_ADJUST_MAX_US);
> > +}
> > +
> > +/*
> > + * A coarse ToD HW clock offset adjustment.
> > + * The coarse time adjustment performs by adding or subtracting the
> > +delta value
> > + * from the current ToD HW clock time.
> > + */
> > +static int coarse_adjust_tod_clock(struct dfl_tod *dt, s64 delta) {
> > +	u32 seconds_msb, seconds_lsb, nanosec;
> > +	void __iomem *base = dt->tod_ctrl;
> > +	u64 seconds, now;
> > +
> > +	if (delta == 0)
> > +		return 0;
> > +
> > +	nanosec = readl(base + TOD_NANOSEC);
> > +	seconds_lsb = readl(base + TOD_SECONDSL);
> > +	seconds_msb = readl(base + TOD_SECONDSH);
> > +
> > +	/* Calculate new time */
> > +	seconds = CAL_SECONDS(seconds_msb, seconds_lsb);
> > +	now = seconds * NSEC_PER_SEC + nanosec + delta;
> > +
> > +	seconds = div_u64_rem(now, NSEC_PER_SEC, &nanosec);
> > +	seconds_msb = FIELD_GET(SECONDS_MSB, seconds);
> > +	seconds_lsb = FIELD_GET(SECONDS_LSB, seconds);
> > +
> > +	writel(seconds_msb, base + TOD_SECONDSH);
> > +	writel(seconds_lsb, base + TOD_SECONDSL);
> > +	writel(nanosec, base + TOD_NANOSEC);
> > +
> > +	return 0;
> > +}
> > +
> > +static int dfl_tod_adjust_fine(struct ptp_clock_info *ptp, long
> > +scaled_ppm) {
> > +	struct dfl_tod *dt = container_of(ptp, struct dfl_tod, ptp_clock_ops);
> > +	u32 tod_period, tod_rem, tod_drift_adjust_fns, tod_drift_adjust_rate;
> > +	void __iomem *base = dt->tod_ctrl;
> > +	unsigned long flags, rate;
> > +	u64 ppb;
> > +
> > +	/* Get the clock rate from clock frequency register offset */
> > +	rate = readl(base + TOD_CLK_FREQ);
> > +
> > +	/* add GIGA as nominal ppb */
> > +	ppb = scaled_ppm_to_ppb(scaled_ppm) + GIGA;
> > +
> > +	tod_period = div_u64_rem(ppb << PERIOD_FRAC_OFFSET, rate, &tod_rem);
> > +	if (tod_period > TOD_PERIOD_MAX)
> > +		return -ERANGE;
> > +
> > +	/*
> > +	 * The drift of ToD adjusted periodically by adding a drift_adjust_fns
> > +	 * correction value every drift_adjust_rate count of clock cycles.
> > +	 */
> > +	tod_drift_adjust_fns = tod_rem / gcd(tod_rem, rate);
> > +	tod_drift_adjust_rate = rate / gcd(tod_rem, rate);
> > +
> > +	while ((tod_drift_adjust_fns > TOD_DRIFT_ADJUST_FNS_MAX) ||
> > +	       (tod_drift_adjust_rate > TOD_DRIFT_ADJUST_RATE_MAX)) {
> > +		tod_drift_adjust_fns >>= 1;
> > +		tod_drift_adjust_rate >>= 1;
> > +	}
> 
> Why both tod_drift_adjust_fns and tod_drift_adjust_rate are iteratively divided if
> one of the two exceeds the maximum value? Wouldn't it be more accurate to set
> each of them to the max if they exceeded their respective maximum value?

Thanks your point out, calculate the tod_drift_adjust_fns and tod_drift_adjust_rate separately is better.

> 
> > +
> > +	if (tod_drift_adjust_fns == 0)
> > +		tod_drift_adjust_rate = 0;
> > +
> > +	spin_lock_irqsave(&dt->tod_lock, flags);
> > +	writel(tod_period, base + TOD_PERIOD);
> > +	writel(0, base + TOD_ADJUST_PERIOD);
> > +	writel(0, base + TOD_ADJUST_COUNT);
> > +	writel(tod_drift_adjust_fns, base + TOD_DRIFT_ADJUST);
> > +	writel(tod_drift_adjust_rate, base + TOD_DRIFT_ADJUST_RATE);
> > +	spin_unlock_irqrestore(&dt->tod_lock, flags);
> > +
> > +	return 0;
> > +}
> > +
> > +static int dfl_tod_adjust_time(struct ptp_clock_info *ptp, s64 delta)
> > +{
> > +	struct dfl_tod *dt = container_of(ptp, struct dfl_tod, ptp_clock_ops);
> > +	u32 period, diff, rem, rem_period, adj_period;
> > +	void __iomem *base = dt->tod_ctrl;
> > +	unsigned long flags;
> > +	bool neg_adj;
> > +	u64 count;
> > +	int ret;
> > +
> > +	neg_adj = delta < 0;
> > +	if (neg_adj)
> > +		delta = -delta;
> > +
> > +	spin_lock_irqsave(&dt->tod_lock, flags);
> > +
> > +	/*
> > +	 * Get the maximum possible value of the Period register offset
> > +	 * adjustment in nanoseconds scale. This depends on the current
> > +	 * Period register setting and the maximum and minimum possible
> > +	 * values of the Period register.
> > +	 */
> > +	period = readl(base + TOD_PERIOD);
> > +
> > +	if (neg_adj) {
> > +		diff = (period - TOD_PERIOD_MIN) >> PERIOD_FRAC_OFFSET;
> > +		adj_period = period - (diff << PERIOD_FRAC_OFFSET);
> > +		rem_period = period - (rem << PERIOD_FRAC_OFFSET);
> 
> rem seems to be uninitialized here.

Yes, the rem should be calculated  in div_u64_rem(), I will change to code like this:

        period = readl(base + TOD_PERIOD);

        if (neg_adj) {
                diff = (period - TOD_PERIOD_MIN) >> PERIOD_FRAC_OFFSET;
                adj_period = period - (diff << PERIOD_FRAC_OFFSET);
                count = div_u64_rem(delta, diff, &rem);
                rem_period = period - (rem << PERIOD_FRAC_OFFSET);
        } else {
                diff = (TOD_PERIOD_MAX - period) >> PERIOD_FRAC_OFFSET;
                adj_period = period + (diff << PERIOD_FRAC_OFFSET);
                count = div_u64_rem(delta, diff, &rem);
                rem_period = period + (rem << PERIOD_FRAC_OFFSET);
        }

        ret = 0;

        if (count > TOD_ADJUST_COUNT_MAX) {
                     ....
Andy Shevchenko March 15, 2023, 1:59 p.m. UTC | #8
+Cc: Nicolas

On Tue, Mar 14, 2023 at 12:46:48PM -0700, Richard Cochran wrote:
> On Tue, Mar 14, 2023 at 12:47:03PM +0200, Andy Shevchenko wrote:
> > The semantics of the above is similar to gpiod_get_optional() and since NULL
> > is a valid return in such cases, the PTP has to handle this transparently to
> > the user. Otherwise it's badly designed API which has to be fixed.
> 
> Does it now?  Whatever.
> 
> > TL;DR: If I'm mistaken, I would like to know why.
> 
> git log.  git blame.
> 
> Get to know the tools of trade.

So, the culprit seems the commit d1cbfd771ce8 ("ptp_clock: Allow for it
to be optional") which did it half way.

Now I would like to know why the good idea got bad implementation.

Nicolas?
Nicolas Pitre March 15, 2023, 2:37 p.m. UTC | #9
On Wed, 15 Mar 2023, Andy Shevchenko wrote:

> +Cc: Nicolas
> 
> On Tue, Mar 14, 2023 at 12:46:48PM -0700, Richard Cochran wrote:
> > On Tue, Mar 14, 2023 at 12:47:03PM +0200, Andy Shevchenko wrote:
> > > The semantics of the above is similar to gpiod_get_optional() and since NULL
> > > is a valid return in such cases, the PTP has to handle this transparently to
> > > the user. Otherwise it's badly designed API which has to be fixed.
> > 
> > Does it now?  Whatever.
> > 
> > > TL;DR: If I'm mistaken, I would like to know why.
> > 
> > git log.  git blame.
> > 
> > Get to know the tools of trade.
> 
> So, the culprit seems the commit d1cbfd771ce8 ("ptp_clock: Allow for it
> to be optional") which did it half way.
> 
> Now I would like to know why the good idea got bad implementation.
> 
> Nicolas?

I'd be happy to help but as presented I simply don't know what you're 
talking about. Please give me more context.


Nicolas
Zhang, Tianfei March 16, 2023, 3:03 p.m. UTC | #10
> -----Original Message-----
> From: Zhang, Tianfei
> Sent: Wednesday, March 15, 2023 10:59 AM
> To: Marco Pagani <marpagan@redhat.com>
> Cc: ilpo.jarvinen@linux.intel.com; andriy.shevchenko@linux.intel.com; Weight,
> Russell H <russell.h.weight@intel.com>; matthew.gerlach@linux.intel.com; pierre-
> louis.bossart@linux.intel.com; Gomes, Vinicius <vinicius.gomes@intel.com>;
> Khadatare, RaghavendraX Anand <raghavendrax.anand.khadatare@intel.com>;
> netdev@vger.kernel.org; linux-fpga@vger.kernel.org; richardcochran@gmail.com
> Subject: RE: [PATCH v1] ptp: add ToD device driver for Intel FPGA cards
> 
> 
> 
> > -----Original Message-----
> > From: Marco Pagani <marpagan@redhat.com>
> > Sent: Tuesday, March 14, 2023 1:50 AM
> > To: Zhang, Tianfei <tianfei.zhang@intel.com>
> > Cc: ilpo.jarvinen@linux.intel.com; andriy.shevchenko@linux.intel.com;
> > Weight, Russell H <russell.h.weight@intel.com>;
> > matthew.gerlach@linux.intel.com; pierre-
> > louis.bossart@linux.intel.com; Gomes, Vinicius
> > <vinicius.gomes@intel.com>; Khadatare, RaghavendraX Anand
> > <raghavendrax.anand.khadatare@intel.com>;
> > netdev@vger.kernel.org; linux-fpga@vger.kernel.org;
> > richardcochran@gmail.com
> > Subject: Re: [PATCH v1] ptp: add ToD device driver for Intel FPGA
> > cards
> >
> >
> >
> > On 2023-03-13 04:02, Tianfei Zhang wrote:
> > > Adding a DFL (Device Feature List) device driver of ToD device for
> > > Intel FPGA cards.
> > >
> > > The Intel FPGA Time of Day(ToD) IP within the FPGA DFL bus is
> > > exposed as PTP Hardware clock(PHC) device to the Linux PTP stack to
> > > synchronize the system clock to its ToD information using phc2sys
> > > utility of the Linux PTP stack. The DFL is a hardware List within
> > > FPGA, which defines a linked list of feature headers within the
> > > device MMIO space to provide an extensible way of adding subdevice features.
> > >
> > > Signed-off-by: Raghavendra Khadatare
> > > <raghavendrax.anand.khadatare@intel.com>
> > > Signed-off-by: Tianfei Zhang <tianfei.zhang@intel.com>
> > > ---
> > >  MAINTAINERS               |   7 +
> > >  drivers/ptp/Kconfig       |  13 ++
> > >  drivers/ptp/Makefile      |   1 +
> > >  drivers/ptp/ptp_dfl_tod.c | 334
> > > ++++++++++++++++++++++++++++++++++++++
> > >  4 files changed, 355 insertions(+)
> > >  create mode 100644 drivers/ptp/ptp_dfl_tod.c
> > >
> > > diff --git a/MAINTAINERS b/MAINTAINERS index
> > > ec57c42ed544..584979abbd92 100644
> > > --- a/MAINTAINERS
> > > +++ b/MAINTAINERS
> > > @@ -15623,6 +15623,13 @@ L:	netdev@vger.kernel.org
> > >  S:	Maintained
> > >  F:	drivers/ptp/ptp_ocp.c
> > >
> > > +INTEL PTP DFL ToD DRIVER
> > > +M:	Tianfei Zhang <tianfei.zhang@intel.com>
> > > +L:	linux-fpga@vger.kernel.org
> > > +L:	netdev@vger.kernel.org
> > > +S:	Maintained
> > > +F:	drivers/ptp/ptp_dfl_tod.c
> > > +
> > >  OPENCORES I2C BUS DRIVER
> > >  M:	Peter Korsgaard <peter@korsgaard.com>
> > >  M:	Andrew Lunn <andrew@lunn.ch>
> > > diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index
> > > fe4971b65c64..e0d6f136ee46 100644
> > > --- a/drivers/ptp/Kconfig
> > > +++ b/drivers/ptp/Kconfig
> > > @@ -186,4 +186,17 @@ config PTP_1588_CLOCK_OCP
> > >
> > >  	  More information is available at http://www.timingcard.com/
> > >
> > > +config PTP_DFL_TOD
> > > +	tristate "FPGA DFL ToD Driver"
> > > +	depends on FPGA_DFL
> > > +	help
> > > +	  The DFL (Device Feature List) device driver for the Intel ToD
> > > +	  (Time-of-Day) device in FPGA card. The ToD IP within the FPGA
> > > +	  is exposed as PTP Hardware Clock (PHC) device to the Linux PTP
> > > +	  stack to synchronize the system clock to its ToD information
> > > +	  using phc2sys utility of the Linux PTP stack.
> > > +
> > > +	  To compile this driver as a module, choose M here: the module
> > > +	  will be called ptp_dfl_tod.
> > > +
> > >  endmenu
> > > diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile index
> > > 28a6fe342d3e..553f18bf3c83 100644
> > > --- a/drivers/ptp/Makefile
> > > +++ b/drivers/ptp/Makefile
> > > @@ -18,3 +18,4 @@ obj-$(CONFIG_PTP_1588_CLOCK_IDTCM)	+=
> > ptp_clockmatrix.o
> > >  obj-$(CONFIG_PTP_1588_CLOCK_IDT82P33)	+= ptp_idt82p33.o
> > >  obj-$(CONFIG_PTP_1588_CLOCK_VMW)	+= ptp_vmw.o
> > >  obj-$(CONFIG_PTP_1588_CLOCK_OCP)	+= ptp_ocp.o
> > > +obj-$(CONFIG_PTP_DFL_TOD)		+= ptp_dfl_tod.o
> > > diff --git a/drivers/ptp/ptp_dfl_tod.c b/drivers/ptp/ptp_dfl_tod.c
> > > new file mode 100644 index 000000000000..d9fbdfc53bd8
> > > --- /dev/null
> > > +++ b/drivers/ptp/ptp_dfl_tod.c
> > > @@ -0,0 +1,334 @@
> > > +// SPDX-License-Identifier: GPL-2.0-only
> > > +/*
> > > + * DFL device driver for Time-of-Day (ToD) private feature
> > > + *
> > > + * Copyright (C) 2023 Intel Corporation  */
> > > +
> > > +#include <linux/bitfield.h>
> > > +#include <linux/delay.h>
> > > +#include <linux/dfl.h>
> > > +#include <linux/gcd.h>
> > > +#include <linux/iopoll.h>
> > > +#include <linux/module.h>
> > > +#include <linux/ptp_clock_kernel.h> #include <linux/spinlock.h>
> > > +#include <linux/units.h>
> > > +
> > > +#define FME_FEATURE_ID_TOD		0x22
> > > +
> > > +/* ToD clock register space. */
> > > +#define TOD_CLK_FREQ			0x038
> > > +
> > > +/*
> > > + * The read sequence of ToD timestamp registers: TOD_NANOSEC,
> > > +TOD_SECONDSL and
> > > + * TOD_SECONDSH, because there is a hardware snapshot whenever the
> > > +TOD_NANOSEC
> > > + * register is read.
> > > + *
> > > + * The ToD IP requires writing registers in the reverse order to the read
> sequence.
> > > + * The timestamp is corrected when the TOD_NANOSEC register is
> > > +written, so the
> > > + * sequence of write TOD registers: TOD_SECONDSH, TOD_SECONDSL and
> > TOD_NANOSEC.
> > > + */
> > > +#define TOD_SECONDSH			0x100
> > > +#define TOD_SECONDSL			0x104
> > > +#define TOD_NANOSEC			0x108
> > > +#define TOD_PERIOD			0x110
> > > +#define TOD_ADJUST_PERIOD		0x114
> > > +#define TOD_ADJUST_COUNT		0x118
> > > +#define TOD_DRIFT_ADJUST		0x11c
> > > +#define TOD_DRIFT_ADJUST_RATE		0x120
> > > +#define PERIOD_FRAC_OFFSET		16
> > > +#define SECONDS_MSB			GENMASK_ULL(47, 32)
> > > +#define SECONDS_LSB			GENMASK_ULL(31, 0)
> > > +#define TOD_SECONDSH_SEC_MSB		GENMASK_ULL(15, 0)
> > > +
> > > +#define CAL_SECONDS(m, l)		((FIELD_GET(TOD_SECONDSH_SEC_MSB,
> > (m)) << 32) | (l))
> > > +
> > > +#define TOD_PERIOD_MASK		GENMASK_ULL(19, 0)
> > > +#define TOD_PERIOD_MAX			FIELD_MAX(TOD_PERIOD_MASK)
> > > +#define TOD_PERIOD_MIN			0
> > > +#define TOD_DRIFT_ADJUST_MASK		GENMASK_ULL(15, 0)
> > > +#define TOD_DRIFT_ADJUST_FNS_MAX
> > 	FIELD_MAX(TOD_DRIFT_ADJUST_MASK)
> > > +#define TOD_DRIFT_ADJUST_RATE_MAX	TOD_DRIFT_ADJUST_FNS_MAX
> > > +#define TOD_ADJUST_COUNT_MASK		GENMASK_ULL(19, 0)
> > > +#define TOD_ADJUST_COUNT_MAX
> > 	FIELD_MAX(TOD_ADJUST_COUNT_MASK)
> > > +#define TOD_ADJUST_INTERVAL_US		1000
> > > +#define TOD_ADJUST_MS			\
> > > +		(((TOD_PERIOD_MAX >> 16) + 1) * (TOD_ADJUST_COUNT_MAX + 1))
> > > +#define TOD_ADJUST_MS_MAX		(TOD_ADJUST_MS / MICRO)
> > > +#define TOD_ADJUST_MAX_US		(TOD_ADJUST_MS_MAX *
> > USEC_PER_MSEC)
> > > +#define TOD_MAX_ADJ			(500 * MEGA)
> > > +
> > > +struct dfl_tod {
> > > +	struct ptp_clock_info ptp_clock_ops;
> > > +	struct device *dev;
> > > +	struct ptp_clock *ptp_clock;
> > > +
> > > +	/* ToD Clock address space */
> > > +	void __iomem *tod_ctrl;
> > > +
> > > +	/* ToD clock registers protection */
> > > +	spinlock_t tod_lock;
> > > +};
> > > +
> > > +/*
> > > + * A fine ToD HW clock offset adjustment. To perform the fine
> > > +offset adjustment, the
> > > + * adjust_period and adjust_count argument are used to update the
> > > +TOD_ADJUST_PERIOD
> > > + * and TOD_ADJUST_COUNT register for in hardware. The dt->tod_lock
> > > +spinlock must be
> > > + * held when calling this function.
> > > + */
> >
> > Calling this function while holding the dt->tod_lock spinlock might
> > cause an error since read_poll_timeout() can sleep. If it is required
> > to use a spinlock, there is the
> > readl_poll_timeout_atomic() function, which can be called from atomic context.
> > However, in this case, it is probably better to use a mutex instead of
> > a spinlock since the delay appears to be 1 ms.
> 
> To program the TOD registers needs to be done without interruption to ensure the
> correct values are programmed, So I think using readl_poll_timeout_atomic()
> function here is better. This delay should be very faster, the 10 us interval delay will
> be better.
> 
> >
> > > +static int fine_adjust_tod_clock(struct dfl_tod *dt, u32 adjust_period,
> > > +				 u32 adjust_count)
> > > +{
> > > +	void __iomem *base = dt->tod_ctrl;
> > > +	u32 val;
> > > +
> > > +	writel(adjust_period, base + TOD_ADJUST_PERIOD);
> > > +	writel(adjust_count, base + TOD_ADJUST_COUNT);
> > > +
> > > +	/* Wait for present offset adjustment update to complete */
> > > +	return readl_poll_timeout(base + TOD_ADJUST_COUNT, val, !val,
> > TOD_ADJUST_INTERVAL_US,
> > > +				  TOD_ADJUST_MAX_US);
> > > +}
> > > +
> > > +/*
> > > + * A coarse ToD HW clock offset adjustment.
> > > + * The coarse time adjustment performs by adding or subtracting the
> > > +delta value
> > > + * from the current ToD HW clock time.
> > > + */
> > > +static int coarse_adjust_tod_clock(struct dfl_tod *dt, s64 delta) {
> > > +	u32 seconds_msb, seconds_lsb, nanosec;
> > > +	void __iomem *base = dt->tod_ctrl;
> > > +	u64 seconds, now;
> > > +
> > > +	if (delta == 0)
> > > +		return 0;
> > > +
> > > +	nanosec = readl(base + TOD_NANOSEC);
> > > +	seconds_lsb = readl(base + TOD_SECONDSL);
> > > +	seconds_msb = readl(base + TOD_SECONDSH);
> > > +
> > > +	/* Calculate new time */
> > > +	seconds = CAL_SECONDS(seconds_msb, seconds_lsb);
> > > +	now = seconds * NSEC_PER_SEC + nanosec + delta;
> > > +
> > > +	seconds = div_u64_rem(now, NSEC_PER_SEC, &nanosec);
> > > +	seconds_msb = FIELD_GET(SECONDS_MSB, seconds);
> > > +	seconds_lsb = FIELD_GET(SECONDS_LSB, seconds);
> > > +
> > > +	writel(seconds_msb, base + TOD_SECONDSH);
> > > +	writel(seconds_lsb, base + TOD_SECONDSL);
> > > +	writel(nanosec, base + TOD_NANOSEC);
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int dfl_tod_adjust_fine(struct ptp_clock_info *ptp, long
> > > +scaled_ppm) {
> > > +	struct dfl_tod *dt = container_of(ptp, struct dfl_tod, ptp_clock_ops);
> > > +	u32 tod_period, tod_rem, tod_drift_adjust_fns, tod_drift_adjust_rate;
> > > +	void __iomem *base = dt->tod_ctrl;
> > > +	unsigned long flags, rate;
> > > +	u64 ppb;
> > > +
> > > +	/* Get the clock rate from clock frequency register offset */
> > > +	rate = readl(base + TOD_CLK_FREQ);
> > > +
> > > +	/* add GIGA as nominal ppb */
> > > +	ppb = scaled_ppm_to_ppb(scaled_ppm) + GIGA;
> > > +
> > > +	tod_period = div_u64_rem(ppb << PERIOD_FRAC_OFFSET, rate, &tod_rem);
> > > +	if (tod_period > TOD_PERIOD_MAX)
> > > +		return -ERANGE;
> > > +
> > > +	/*
> > > +	 * The drift of ToD adjusted periodically by adding a drift_adjust_fns
> > > +	 * correction value every drift_adjust_rate count of clock cycles.
> > > +	 */
> > > +	tod_drift_adjust_fns = tod_rem / gcd(tod_rem, rate);
> > > +	tod_drift_adjust_rate = rate / gcd(tod_rem, rate);
> > > +
> > > +	while ((tod_drift_adjust_fns > TOD_DRIFT_ADJUST_FNS_MAX) ||
> > > +	       (tod_drift_adjust_rate > TOD_DRIFT_ADJUST_RATE_MAX)) {
> > > +		tod_drift_adjust_fns >>= 1;
> > > +		tod_drift_adjust_rate >>= 1;
> > > +	}
> >
> > Why both tod_drift_adjust_fns and tod_drift_adjust_rate are
> > iteratively divided if one of the two exceeds the maximum value?
> > Wouldn't it be more accurate to set each of them to the max if they exceeded their
> respective maximum value?
> 
> Thanks your point out, calculate the tod_drift_adjust_fns and tod_drift_adjust_rate
> separately is better.
> 

Sorry, my last comment is not correct,  I like to add more detail information about this code.

This piece of code adjusts for the non-representable part of the fractional nanosecond part of the target ToD period that corresponds to the nominal or servo-steered frequency.  For example, the clock period for a 10G application (156.25MHz) is 6.4ns. The hexadecimal representation of the integer part (6ns) is 0x6, and the fractional nanosecond part is 0.4 fns (fractional nanosecond), whose 16-bit hexadecimal representation is:

Fractional nanosecond field is 16 bits wide: 0.4 fns = 0.4 * 2^16 = 26214.4 in decimal.
Converting to hexadecimal: 26214 + 0.4 = 0x6666 + 0x0000.4 = 0x6666.4 fns.  

The 16bits register can represent 0x6666, but not 0.4fns. This would cause a systematic drift in the clock equal to 953.6 ns every 1 second.  To compensate for that, the hardware ToD module uses two registers, namely TOD_DRIFT_ADJUST and TOD_DRIFT_ADJUST_RATE. The non-representable 0.4 fns per clock cycle equals to 2fns per 5 clock cycles, both of which are integer and representable. 

Hence is the TOD_DRIFT_ADJUST  and TOD_DRIFT_ADJUST_RATE registers as follow:

TOD_DRIFT_ADJUST = 0x02
TOD_DRIFT_ADJUST_RATE = 0x5.

The while loop divides the higher-precision result of the divisions by 2 in sequential steps, until both tod_drift_adjust_fns and tod_drift_adjust_rate are representable within their hardware register precision and dynamic range. The while loop has a deterministic upper bound. 

Here is the ToD specification:
https://www.intel.com/content/www/us/en/docs/programmable/683044/21-4/adjusting-tod-drift.html
Andy Shevchenko March 20, 2023, 1:20 p.m. UTC | #11
On Wed, Mar 15, 2023 at 10:37:58AM -0400, Nicolas Pitre wrote:
> On Wed, 15 Mar 2023, Andy Shevchenko wrote:
> > On Tue, Mar 14, 2023 at 12:46:48PM -0700, Richard Cochran wrote:
> > > On Tue, Mar 14, 2023 at 12:47:03PM +0200, Andy Shevchenko wrote:
> > > > The semantics of the above is similar to gpiod_get_optional() and since NULL
> > > > is a valid return in such cases, the PTP has to handle this transparently to
> > > > the user. Otherwise it's badly designed API which has to be fixed.
> > > 
> > > Does it now?  Whatever.
> > > 
> > > > TL;DR: If I'm mistaken, I would like to know why.
> > > 
> > > git log.  git blame.
> > > 
> > > Get to know the tools of trade.
> > 
> > So, the culprit seems the commit d1cbfd771ce8 ("ptp_clock: Allow for it
> > to be optional") which did it half way.
> > 
> > Now I would like to know why the good idea got bad implementation.
> > 
> > Nicolas?
> 
> I'd be happy to help but as presented I simply don't know what you're 
> talking about. Please give me more context.

When your change introduced the optionality of the above mentioned API,
i.e. ptp_clock_register(), the function started returning NULL, which
is fine. What's not in my opinion is to ask individual drivers to handle it.
That said, if we take a look at gpiod_*_optional() or clk_*_optional()
we may notice that they handle NULL as a valid parameter (object) to their
respective APIs and individual drivers shouldn't take care about that.

Why PTP is so special?
Nicolas Pitre March 20, 2023, 1:43 p.m. UTC | #12
On Mon, 20 Mar 2023, Andy Shevchenko wrote:

> On Wed, Mar 15, 2023 at 10:37:58AM -0400, Nicolas Pitre wrote:
> > On Wed, 15 Mar 2023, Andy Shevchenko wrote:
> > > On Tue, Mar 14, 2023 at 12:46:48PM -0700, Richard Cochran wrote:
> > > > On Tue, Mar 14, 2023 at 12:47:03PM +0200, Andy Shevchenko wrote:
> > > > > The semantics of the above is similar to gpiod_get_optional() and since NULL
> > > > > is a valid return in such cases, the PTP has to handle this transparently to
> > > > > the user. Otherwise it's badly designed API which has to be fixed.
> > > > 
> > > > Does it now?  Whatever.
> > > > 
> > > > > TL;DR: If I'm mistaken, I would like to know why.
> > > > 
> > > > git log.  git blame.
> > > > 
> > > > Get to know the tools of trade.
> > > 
> > > So, the culprit seems the commit d1cbfd771ce8 ("ptp_clock: Allow for it
> > > to be optional") which did it half way.
> > > 
> > > Now I would like to know why the good idea got bad implementation.
> > > 
> > > Nicolas?
> > 
> > I'd be happy to help but as presented I simply don't know what you're 
> > talking about. Please give me more context.
> 
> When your change introduced the optionality of the above mentioned API,
> i.e. ptp_clock_register(), the function started returning NULL, which
> is fine. What's not in my opinion is to ask individual drivers to handle it.
> That said, if we take a look at gpiod_*_optional() or clk_*_optional()
> we may notice that they handle NULL as a valid parameter (object) to their
> respective APIs and individual drivers shouldn't take care about that.
> 
> Why PTP is so special?

To my knowledge it is not.

The current arrangement has apparently worked well for more than 6 
years. If you see a better way you're welcome to submit patches as 
usual.

Alternatively the above commit can be reverted if no one else 
cares. I personally gave up on the idea of a slimmed down Linux kernel a 
while ago.


Nicolas
Richard Cochran March 20, 2023, 7:41 p.m. UTC | #13
On Mon, Mar 20, 2023 at 09:43:30AM -0400, Nicolas Pitre wrote:

> Alternatively the above commit can be reverted if no one else 
> cares. I personally gave up on the idea of a slimmed down Linux kernel a 
> while ago.

Does this mean I can restore the posix clocks back into the core
unconditionally?

That would be awesome.

Thanks,
Richard
Nicolas Pitre March 20, 2023, 8:53 p.m. UTC | #14
On Mon, 20 Mar 2023, Richard Cochran wrote:

> On Mon, Mar 20, 2023 at 09:43:30AM -0400, Nicolas Pitre wrote:
> 
> > Alternatively the above commit can be reverted if no one else 
> > cares. I personally gave up on the idea of a slimmed down Linux kernel a 
> > while ago.
> 
> Does this mean I can restore the posix clocks back into the core
> unconditionally?

This only means _I_ no longer care. I'm not speaking for others (e.g. 
OpenWRT or the like) who might still rely on splitting it out.
Maybe Andy wants to "fix" it?


Nicolas
Andy Shevchenko March 21, 2023, 1:02 p.m. UTC | #15
On Mon, Mar 20, 2023 at 04:53:07PM -0400, Nicolas Pitre wrote:
> On Mon, 20 Mar 2023, Richard Cochran wrote:
> 
> > On Mon, Mar 20, 2023 at 09:43:30AM -0400, Nicolas Pitre wrote:
> > 
> > > Alternatively the above commit can be reverted if no one else 
> > > cares. I personally gave up on the idea of a slimmed down Linux kernel a 
> > > while ago.
> > 
> > Does this mean I can restore the posix clocks back into the core
> > unconditionally?
> 
> This only means _I_ no longer care. I'm not speaking for others (e.g. 
> OpenWRT or the like) who might still rely on splitting it out.
> Maybe Andy wants to "fix" it?

I would be a good choice, if I have an access to the hardware at hand to test.
That said, I think Richard himself can try to finish that feature (optional PTP)
and on my side I can help with reviewing the code (just Cc me when needed).
Zhang, Tianfei March 21, 2023, 2:28 p.m. UTC | #16
> -----Original Message-----
> From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> Sent: Tuesday, March 21, 2023 9:03 PM
> To: Nicolas Pitre <nico@fluxnic.net>
> Cc: Richard Cochran <richardcochran@gmail.com>; Zhang, Tianfei
> <tianfei.zhang@intel.com>; netdev@vger.kernel.org; linux-fpga@vger.kernel.org;
> ilpo.jarvinen@linux.intel.com; Weight, Russell H <russell.h.weight@intel.com>;
> matthew.gerlach@linux.intel.com; pierre-louis.bossart@linux.intel.com; Gomes,
> Vinicius <vinicius.gomes@intel.com>; Khadatare, RaghavendraX Anand
> <raghavendrax.anand.khadatare@intel.com>
> Subject: Re: [PATCH v1] ptp: add ToD device driver for Intel FPGA cards
> 
> On Mon, Mar 20, 2023 at 04:53:07PM -0400, Nicolas Pitre wrote:
> > On Mon, 20 Mar 2023, Richard Cochran wrote:
> >
> > > On Mon, Mar 20, 2023 at 09:43:30AM -0400, Nicolas Pitre wrote:
> > >
> > > > Alternatively the above commit can be reverted if no one else
> > > > cares. I personally gave up on the idea of a slimmed down Linux
> > > > kernel a while ago.
> > >
> > > Does this mean I can restore the posix clocks back into the core
> > > unconditionally?
> >
> > This only means _I_ no longer care. I'm not speaking for others (e.g.
> > OpenWRT or the like) who might still rely on splitting it out.
> > Maybe Andy wants to "fix" it?
> 
> I would be a good choice, if I have an access to the hardware at hand to test.
> That said, I think Richard himself can try to finish that feature (optional PTP) and on
> my side I can help with reviewing the code (just Cc me when needed).
> 

Hi Richard, Andy,

Handle NULL as a valid parameter (object) to their respective APIs looks a good idea, but this will be a big change and need fully test for all devices,
we can add it on the TODO list. So for this patch, are you agree we continue use the existing ptp_clock_register() API, for example, change my driver like below:

      dt->ptp_clock = ptp_clock_register(&dt->ptp_clock_ops, dev);
        if (IS_ERR_OR_NULL(dt->ptp_clock))
                return dev_err_probe(dt->dev, IS_ERR_OR_NULL(dt->ptp_clock),
                                     "Unable to register PTP clock\n");
Andy Shevchenko March 21, 2023, 2:40 p.m. UTC | #17
On Tue, Mar 21, 2023 at 02:28:15PM +0000, Zhang, Tianfei wrote:
> > From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> > Sent: Tuesday, March 21, 2023 9:03 PM
> > To: Nicolas Pitre <nico@fluxnic.net>
> > On Mon, Mar 20, 2023 at 04:53:07PM -0400, Nicolas Pitre wrote:
> > > On Mon, 20 Mar 2023, Richard Cochran wrote:
> > > > On Mon, Mar 20, 2023 at 09:43:30AM -0400, Nicolas Pitre wrote:
> > > >
> > > > > Alternatively the above commit can be reverted if no one else
> > > > > cares. I personally gave up on the idea of a slimmed down Linux
> > > > > kernel a while ago.
> > > >
> > > > Does this mean I can restore the posix clocks back into the core
> > > > unconditionally?
> > >
> > > This only means _I_ no longer care. I'm not speaking for others (e.g.
> > > OpenWRT or the like) who might still rely on splitting it out.
> > > Maybe Andy wants to "fix" it?
> > 
> > I would be a good choice, if I have an access to the hardware at hand to test.
> > That said, I think Richard himself can try to finish that feature (optional PTP) and on
> > my side I can help with reviewing the code (just Cc me when needed).
> 
> Handle NULL as a valid parameter (object) to their respective APIs looks a
> good idea, but this will be a big change and need fully test for all devices,

Since it's core change, so a few devices that cover 100% APIs that should
handle optional PTP. I don't think it requires enormous work.

> we can add it on the TODO list.

It would be a good idea.

> So for this patch, are you agree we continue use the existing
> ptp_clock_register() API, for example, change my driver like below:

The problem is that it will increase the technical debt.
What about sending with NULL handled variant together with an RFC/RFT
that finishes the optional PTP support?

>       dt->ptp_clock = ptp_clock_register(&dt->ptp_clock_ops, dev);

	ret = PTR_ERR_OR_ZERO(...);
	if (ret)
		return ...

?

>       if (IS_ERR_OR_NULL(dt->ptp_clock))
>               return dev_err_probe(dt->dev, IS_ERR_OR_NULL(dt->ptp_clock),
>                                    "Unable to register PTP clock\n");
Zhang, Tianfei March 21, 2023, 2:52 p.m. UTC | #18
> -----Original Message-----
> From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> Sent: Tuesday, March 21, 2023 10:40 PM
> To: Zhang, Tianfei <tianfei.zhang@intel.com>
> Cc: Nicolas Pitre <nico@fluxnic.net>; Richard Cochran
> <richardcochran@gmail.com>; netdev@vger.kernel.org; linux-
> fpga@vger.kernel.org; ilpo.jarvinen@linux.intel.com; Weight, Russell H
> <russell.h.weight@intel.com>; matthew.gerlach@linux.intel.com; pierre-
> louis.bossart@linux.intel.com; Gomes, Vinicius <vinicius.gomes@intel.com>;
> Khadatare, RaghavendraX Anand <raghavendrax.anand.khadatare@intel.com>
> Subject: Re: [PATCH v1] ptp: add ToD device driver for Intel FPGA cards
> 
> On Tue, Mar 21, 2023 at 02:28:15PM +0000, Zhang, Tianfei wrote:
> > > From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> > > Sent: Tuesday, March 21, 2023 9:03 PM
> > > To: Nicolas Pitre <nico@fluxnic.net> On Mon, Mar 20, 2023 at
> > > 04:53:07PM -0400, Nicolas Pitre wrote:
> > > > On Mon, 20 Mar 2023, Richard Cochran wrote:
> > > > > On Mon, Mar 20, 2023 at 09:43:30AM -0400, Nicolas Pitre wrote:
> > > > >
> > > > > > Alternatively the above commit can be reverted if no one else
> > > > > > cares. I personally gave up on the idea of a slimmed down
> > > > > > Linux kernel a while ago.
> > > > >
> > > > > Does this mean I can restore the posix clocks back into the core
> > > > > unconditionally?
> > > >
> > > > This only means _I_ no longer care. I'm not speaking for others (e.g.
> > > > OpenWRT or the like) who might still rely on splitting it out.
> > > > Maybe Andy wants to "fix" it?
> > >
> > > I would be a good choice, if I have an access to the hardware at hand to test.
> > > That said, I think Richard himself can try to finish that feature
> > > (optional PTP) and on my side I can help with reviewing the code (just Cc me
> when needed).
> >
> > Handle NULL as a valid parameter (object) to their respective APIs
> > looks a good idea, but this will be a big change and need fully test
> > for all devices,
> 
> Since it's core change, so a few devices that cover 100% APIs that should handle
> optional PTP. I don't think it requires enormous work.
> 
> > we can add it on the TODO list.
> 
> It would be a good idea.
> 
> > So for this patch, are you agree we continue use the existing
> > ptp_clock_register() API, for example, change my driver like below:
> 
> The problem is that it will increase the technical debt.
> What about sending with NULL handled variant together with an RFC/RFT that
> finishes the optional PTP support?

I think sending the other patchset to fix this NULL handled issue of PTP core will be better?

> 
> >       dt->ptp_clock = ptp_clock_register(&dt->ptp_clock_ops, dev);
> 
> 	ret = PTR_ERR_OR_ZERO(...);
> 	if (ret)
> 		return ...
> 
> ?
> 
> >       if (IS_ERR_OR_NULL(dt->ptp_clock))
> >               return dev_err_probe(dt->dev, IS_ERR_OR_NULL(dt->ptp_clock),
> >                                    "Unable to register PTP clock\n");


Would you like below:

        dt->ptp_clock = ptp_clock_register(&dt->ptp_clock_ops, dev);
       ret = PTR_ERR_OR_ZERO(dt->ptp_clock);
        return dev_err_probe(dt->dev, ret, "Unable to register PTP clock\n");
Richard Cochran March 22, 2023, 4:17 a.m. UTC | #19
On Tue, Mar 21, 2023 at 02:52:34PM +0000, Zhang, Tianfei wrote:
> I think sending the other patchset to fix this NULL handled issue of PTP core will be better?

Yes, please just fix the driver to conform to the current API.

Thanks,
Richard
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index ec57c42ed544..584979abbd92 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15623,6 +15623,13 @@  L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/ptp/ptp_ocp.c
 
+INTEL PTP DFL ToD DRIVER
+M:	Tianfei Zhang <tianfei.zhang@intel.com>
+L:	linux-fpga@vger.kernel.org
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/ptp/ptp_dfl_tod.c
+
 OPENCORES I2C BUS DRIVER
 M:	Peter Korsgaard <peter@korsgaard.com>
 M:	Andrew Lunn <andrew@lunn.ch>
diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
index fe4971b65c64..e0d6f136ee46 100644
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -186,4 +186,17 @@  config PTP_1588_CLOCK_OCP
 
 	  More information is available at http://www.timingcard.com/
 
+config PTP_DFL_TOD
+	tristate "FPGA DFL ToD Driver"
+	depends on FPGA_DFL
+	help
+	  The DFL (Device Feature List) device driver for the Intel ToD
+	  (Time-of-Day) device in FPGA card. The ToD IP within the FPGA
+	  is exposed as PTP Hardware Clock (PHC) device to the Linux PTP
+	  stack to synchronize the system clock to its ToD information
+	  using phc2sys utility of the Linux PTP stack.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called ptp_dfl_tod.
+
 endmenu
diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile
index 28a6fe342d3e..553f18bf3c83 100644
--- a/drivers/ptp/Makefile
+++ b/drivers/ptp/Makefile
@@ -18,3 +18,4 @@  obj-$(CONFIG_PTP_1588_CLOCK_IDTCM)	+= ptp_clockmatrix.o
 obj-$(CONFIG_PTP_1588_CLOCK_IDT82P33)	+= ptp_idt82p33.o
 obj-$(CONFIG_PTP_1588_CLOCK_VMW)	+= ptp_vmw.o
 obj-$(CONFIG_PTP_1588_CLOCK_OCP)	+= ptp_ocp.o
+obj-$(CONFIG_PTP_DFL_TOD)		+= ptp_dfl_tod.o
diff --git a/drivers/ptp/ptp_dfl_tod.c b/drivers/ptp/ptp_dfl_tod.c
new file mode 100644
index 000000000000..d9fbdfc53bd8
--- /dev/null
+++ b/drivers/ptp/ptp_dfl_tod.c
@@ -0,0 +1,334 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * DFL device driver for Time-of-Day (ToD) private feature
+ *
+ * Copyright (C) 2023 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/dfl.h>
+#include <linux/gcd.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/spinlock.h>
+#include <linux/units.h>
+
+#define FME_FEATURE_ID_TOD		0x22
+
+/* ToD clock register space. */
+#define TOD_CLK_FREQ			0x038
+
+/*
+ * The read sequence of ToD timestamp registers: TOD_NANOSEC, TOD_SECONDSL and
+ * TOD_SECONDSH, because there is a hardware snapshot whenever the TOD_NANOSEC
+ * register is read.
+ *
+ * The ToD IP requires writing registers in the reverse order to the read sequence.
+ * The timestamp is corrected when the TOD_NANOSEC register is written, so the
+ * sequence of write TOD registers: TOD_SECONDSH, TOD_SECONDSL and TOD_NANOSEC.
+ */
+#define TOD_SECONDSH			0x100
+#define TOD_SECONDSL			0x104
+#define TOD_NANOSEC			0x108
+#define TOD_PERIOD			0x110
+#define TOD_ADJUST_PERIOD		0x114
+#define TOD_ADJUST_COUNT		0x118
+#define TOD_DRIFT_ADJUST		0x11c
+#define TOD_DRIFT_ADJUST_RATE		0x120
+#define PERIOD_FRAC_OFFSET		16
+#define SECONDS_MSB			GENMASK_ULL(47, 32)
+#define SECONDS_LSB			GENMASK_ULL(31, 0)
+#define TOD_SECONDSH_SEC_MSB		GENMASK_ULL(15, 0)
+
+#define CAL_SECONDS(m, l)		((FIELD_GET(TOD_SECONDSH_SEC_MSB, (m)) << 32) | (l))
+
+#define TOD_PERIOD_MASK		GENMASK_ULL(19, 0)
+#define TOD_PERIOD_MAX			FIELD_MAX(TOD_PERIOD_MASK)
+#define TOD_PERIOD_MIN			0
+#define TOD_DRIFT_ADJUST_MASK		GENMASK_ULL(15, 0)
+#define TOD_DRIFT_ADJUST_FNS_MAX	FIELD_MAX(TOD_DRIFT_ADJUST_MASK)
+#define TOD_DRIFT_ADJUST_RATE_MAX	TOD_DRIFT_ADJUST_FNS_MAX
+#define TOD_ADJUST_COUNT_MASK		GENMASK_ULL(19, 0)
+#define TOD_ADJUST_COUNT_MAX		FIELD_MAX(TOD_ADJUST_COUNT_MASK)
+#define TOD_ADJUST_INTERVAL_US		1000
+#define TOD_ADJUST_MS			\
+		(((TOD_PERIOD_MAX >> 16) + 1) * (TOD_ADJUST_COUNT_MAX + 1))
+#define TOD_ADJUST_MS_MAX		(TOD_ADJUST_MS / MICRO)
+#define TOD_ADJUST_MAX_US		(TOD_ADJUST_MS_MAX * USEC_PER_MSEC)
+#define TOD_MAX_ADJ			(500 * MEGA)
+
+struct dfl_tod {
+	struct ptp_clock_info ptp_clock_ops;
+	struct device *dev;
+	struct ptp_clock *ptp_clock;
+
+	/* ToD Clock address space */
+	void __iomem *tod_ctrl;
+
+	/* ToD clock registers protection */
+	spinlock_t tod_lock;
+};
+
+/*
+ * A fine ToD HW clock offset adjustment. To perform the fine offset adjustment, the
+ * adjust_period and adjust_count argument are used to update the TOD_ADJUST_PERIOD
+ * and TOD_ADJUST_COUNT register for in hardware. The dt->tod_lock spinlock must be
+ * held when calling this function.
+ */
+static int fine_adjust_tod_clock(struct dfl_tod *dt, u32 adjust_period,
+				 u32 adjust_count)
+{
+	void __iomem *base = dt->tod_ctrl;
+	u32 val;
+
+	writel(adjust_period, base + TOD_ADJUST_PERIOD);
+	writel(adjust_count, base + TOD_ADJUST_COUNT);
+
+	/* Wait for present offset adjustment update to complete */
+	return readl_poll_timeout(base + TOD_ADJUST_COUNT, val, !val, TOD_ADJUST_INTERVAL_US,
+				  TOD_ADJUST_MAX_US);
+}
+
+/*
+ * A coarse ToD HW clock offset adjustment.
+ * The coarse time adjustment performs by adding or subtracting the delta value
+ * from the current ToD HW clock time.
+ */
+static int coarse_adjust_tod_clock(struct dfl_tod *dt, s64 delta)
+{
+	u32 seconds_msb, seconds_lsb, nanosec;
+	void __iomem *base = dt->tod_ctrl;
+	u64 seconds, now;
+
+	if (delta == 0)
+		return 0;
+
+	nanosec = readl(base + TOD_NANOSEC);
+	seconds_lsb = readl(base + TOD_SECONDSL);
+	seconds_msb = readl(base + TOD_SECONDSH);
+
+	/* Calculate new time */
+	seconds = CAL_SECONDS(seconds_msb, seconds_lsb);
+	now = seconds * NSEC_PER_SEC + nanosec + delta;
+
+	seconds = div_u64_rem(now, NSEC_PER_SEC, &nanosec);
+	seconds_msb = FIELD_GET(SECONDS_MSB, seconds);
+	seconds_lsb = FIELD_GET(SECONDS_LSB, seconds);
+
+	writel(seconds_msb, base + TOD_SECONDSH);
+	writel(seconds_lsb, base + TOD_SECONDSL);
+	writel(nanosec, base + TOD_NANOSEC);
+
+	return 0;
+}
+
+static int dfl_tod_adjust_fine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct dfl_tod *dt = container_of(ptp, struct dfl_tod, ptp_clock_ops);
+	u32 tod_period, tod_rem, tod_drift_adjust_fns, tod_drift_adjust_rate;
+	void __iomem *base = dt->tod_ctrl;
+	unsigned long flags, rate;
+	u64 ppb;
+
+	/* Get the clock rate from clock frequency register offset */
+	rate = readl(base + TOD_CLK_FREQ);
+
+	/* add GIGA as nominal ppb */
+	ppb = scaled_ppm_to_ppb(scaled_ppm) + GIGA;
+
+	tod_period = div_u64_rem(ppb << PERIOD_FRAC_OFFSET, rate, &tod_rem);
+	if (tod_period > TOD_PERIOD_MAX)
+		return -ERANGE;
+
+	/*
+	 * The drift of ToD adjusted periodically by adding a drift_adjust_fns
+	 * correction value every drift_adjust_rate count of clock cycles.
+	 */
+	tod_drift_adjust_fns = tod_rem / gcd(tod_rem, rate);
+	tod_drift_adjust_rate = rate / gcd(tod_rem, rate);
+
+	while ((tod_drift_adjust_fns > TOD_DRIFT_ADJUST_FNS_MAX) ||
+	       (tod_drift_adjust_rate > TOD_DRIFT_ADJUST_RATE_MAX)) {
+		tod_drift_adjust_fns >>= 1;
+		tod_drift_adjust_rate >>= 1;
+	}
+
+	if (tod_drift_adjust_fns == 0)
+		tod_drift_adjust_rate = 0;
+
+	spin_lock_irqsave(&dt->tod_lock, flags);
+	writel(tod_period, base + TOD_PERIOD);
+	writel(0, base + TOD_ADJUST_PERIOD);
+	writel(0, base + TOD_ADJUST_COUNT);
+	writel(tod_drift_adjust_fns, base + TOD_DRIFT_ADJUST);
+	writel(tod_drift_adjust_rate, base + TOD_DRIFT_ADJUST_RATE);
+	spin_unlock_irqrestore(&dt->tod_lock, flags);
+
+	return 0;
+}
+
+static int dfl_tod_adjust_time(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct dfl_tod *dt = container_of(ptp, struct dfl_tod, ptp_clock_ops);
+	u32 period, diff, rem, rem_period, adj_period;
+	void __iomem *base = dt->tod_ctrl;
+	unsigned long flags;
+	bool neg_adj;
+	u64 count;
+	int ret;
+
+	neg_adj = delta < 0;
+	if (neg_adj)
+		delta = -delta;
+
+	spin_lock_irqsave(&dt->tod_lock, flags);
+
+	/*
+	 * Get the maximum possible value of the Period register offset
+	 * adjustment in nanoseconds scale. This depends on the current
+	 * Period register setting and the maximum and minimum possible
+	 * values of the Period register.
+	 */
+	period = readl(base + TOD_PERIOD);
+
+	if (neg_adj) {
+		diff = (period - TOD_PERIOD_MIN) >> PERIOD_FRAC_OFFSET;
+		adj_period = period - (diff << PERIOD_FRAC_OFFSET);
+		rem_period = period - (rem << PERIOD_FRAC_OFFSET);
+	} else {
+		diff = (TOD_PERIOD_MAX - period) >> PERIOD_FRAC_OFFSET;
+		adj_period = period + (diff << PERIOD_FRAC_OFFSET);
+		rem_period = period + (rem << PERIOD_FRAC_OFFSET);
+	}
+
+	ret = 0;
+
+	/* Find the number of cycles required for the time adjustment */
+	count = div_u64_rem(delta, diff, &rem);
+
+	if (count > TOD_ADJUST_COUNT_MAX) {
+		ret = coarse_adjust_tod_clock(dt, delta);
+	} else {
+		/* Adjust the period by count cycles to adjust the time */
+		if (count)
+			ret = fine_adjust_tod_clock(dt, adj_period, count);
+
+		/* If there is a remainder, adjust the period for an additional cycle */
+		if (rem)
+			ret = fine_adjust_tod_clock(dt, rem_period, 1);
+	}
+
+	spin_unlock_irqrestore(&dt->tod_lock, flags);
+
+	return ret;
+}
+
+static int dfl_tod_get_timex(struct ptp_clock_info *ptp, struct timespec64 *ts,
+			     struct ptp_system_timestamp *sts)
+{
+	struct dfl_tod *dt = container_of(ptp, struct dfl_tod, ptp_clock_ops);
+	u32 seconds_msb, seconds_lsb, nanosec;
+	void __iomem *base = dt->tod_ctrl;
+	unsigned long flags;
+	u64 seconds;
+
+	spin_lock_irqsave(&dt->tod_lock, flags);
+	ptp_read_system_prets(sts);
+	nanosec = readl(base + TOD_NANOSEC);
+	seconds_lsb = readl(base + TOD_SECONDSL);
+	seconds_msb = readl(base + TOD_SECONDSH);
+	ptp_read_system_postts(sts);
+	spin_unlock_irqrestore(&dt->tod_lock, flags);
+
+	seconds = CAL_SECONDS(seconds_msb, seconds_lsb);
+
+	ts->tv_nsec = nanosec;
+	ts->tv_sec = seconds;
+
+	return 0;
+}
+
+static int dfl_tod_set_time(struct ptp_clock_info *ptp,
+			    const struct timespec64 *ts)
+{
+	struct dfl_tod *dt = container_of(ptp, struct dfl_tod, ptp_clock_ops);
+	u32 seconds_msb = FIELD_GET(SECONDS_MSB, ts->tv_sec);
+	u32 seconds_lsb = FIELD_GET(SECONDS_LSB, ts->tv_sec);
+	u32 nanosec = FIELD_GET(SECONDS_LSB, ts->tv_nsec);
+	void __iomem *base = dt->tod_ctrl;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dt->tod_lock, flags);
+	writel(seconds_msb, base + TOD_SECONDSH);
+	writel(seconds_lsb, base + TOD_SECONDSL);
+	writel(nanosec, base + TOD_NANOSEC);
+	spin_unlock_irqrestore(&dt->tod_lock, flags);
+
+	return 0;
+}
+
+static struct ptp_clock_info dfl_tod_clock_ops = {
+	.owner = THIS_MODULE,
+	.name = "dfl_tod",
+	.max_adj = TOD_MAX_ADJ,
+	.adjfine = dfl_tod_adjust_fine,
+	.adjtime = dfl_tod_adjust_time,
+	.gettimex64 = dfl_tod_get_timex,
+	.settime64 = dfl_tod_set_time,
+};
+
+static int dfl_tod_probe(struct dfl_device *ddev)
+{
+	struct device *dev = &ddev->dev;
+	struct dfl_tod *dt;
+
+	dt = devm_kzalloc(dev, sizeof(*dt), GFP_KERNEL);
+	if (!dt)
+		return -ENOMEM;
+
+	dt->tod_ctrl = devm_ioremap_resource(dev, &ddev->mmio_res);
+	if (IS_ERR(dt->tod_ctrl))
+		return PTR_ERR(dt->tod_ctrl);
+
+	dt->dev = dev;
+	spin_lock_init(&dt->tod_lock);
+	dev_set_drvdata(dev, dt);
+
+	dt->ptp_clock_ops = dfl_tod_clock_ops;
+
+	dt->ptp_clock = ptp_clock_register(&dt->ptp_clock_ops, dev);
+	if (IS_ERR(dt->ptp_clock))
+		return dev_err_probe(dt->dev, PTR_ERR(dt->ptp_clock),
+				     "Unable to register PTP clock\n");
+
+	return 0;
+}
+
+static void dfl_tod_remove(struct dfl_device *ddev)
+{
+	struct dfl_tod *dt = dev_get_drvdata(&ddev->dev);
+
+	ptp_clock_unregister(dt->ptp_clock);
+}
+
+static const struct dfl_device_id dfl_tod_ids[] = {
+	{ FME_ID, FME_FEATURE_ID_TOD },
+	{ }
+};
+MODULE_DEVICE_TABLE(dfl, dfl_tod_ids);
+
+static struct dfl_driver dfl_tod_driver = {
+	.drv = {
+		.name = "dfl-tod",
+	},
+	.id_table = dfl_tod_ids,
+	.probe = dfl_tod_probe,
+	.remove = dfl_tod_remove,
+};
+module_dfl_driver(dfl_tod_driver);
+
+MODULE_DESCRIPTION("FPGA DFL ToD driver");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");