diff mbox series

[net-next,RFC,v4,2/5] net: Expose available time stamping layers to user space.

Message ID 20230406173308.401924-3-kory.maincent@bootlin.com (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series net: Make MAC/PHY time stamping selectable | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 2616 this patch: 2616
netdev/cc_maintainers warning 8 maintainers not CCed: pabeni@redhat.com corbet@lwn.net linux-doc@vger.kernel.org willemdebruijn.kernel@gmail.com edumazet@google.com andrew@lunn.ch linux@rempel-privat.de davem@davemloft.net
netdev/build_clang success Errors and warnings before: 541 this patch: 541
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 2758 this patch: 2758
netdev/checkpatch warning CHECK: Alignment should match open parenthesis CHECK: spaces preferred around that '<<' (ctx:VxV) WARNING: Missing a blank line after declarations WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Kory Maincent April 6, 2023, 5:33 p.m. UTC
From: Kory Maincent <kory.maincent@bootlin.com>

Time stamping on network packets may happen either in the MAC or in
the PHY, but not both.  In preparation for making the choice
selectable, expose both the current and available layers via ethtool.

In accordance with the kernel implementation as it stands, the current
layer will always read as "phy" when a PHY time stamping device is
present.  Future patches will allow changing the current layer
administratively.

Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
---

Notes:
    Changes in v2:
    - Move the introduction of selected_timestamping_layer variable in next
      patch.
    
    Changes in v3:
    - Move on to ethtool instead of syfs
    
    Changes in v4:
    - Move on to netlink ethtool instead of ioctl. I am not familiar with
      netlink so there might be some code that does not follow the good code
      practice.

 Documentation/networking/ethtool-netlink.rst |  40 +++++++
 include/uapi/linux/ethtool_netlink.h         |  15 +++
 include/uapi/linux/net_tstamp.h              |   8 ++
 net/ethtool/Makefile                         |   2 +-
 net/ethtool/netlink.c                        |  20 ++++
 net/ethtool/netlink.h                        |   3 +
 net/ethtool/ts.c                             | 114 +++++++++++++++++++
 7 files changed, 201 insertions(+), 1 deletion(-)
 create mode 100644 net/ethtool/ts.c

Comments

Jakub Kicinski April 7, 2023, 1:46 a.m. UTC | #1
On Thu,  6 Apr 2023 19:33:05 +0200 Köry Maincent wrote:
> +Gets transceiver module parameters.
> +
> +Request contents:
> +
> +  =================================  ======  ==========================
> +  ``ETHTOOL_A_TS_HEADER``            nested  request header
> +  =================================  ======  ==========================
> +
> +Kernel response contents:
> +
> +  =======================  ======  ====================================
> +  ``ETHTOOL_A_TS_HEADER``  nested  reply header
> +  ``ETHTOOL_A_TS_LAYER``   u32     current hardware timestamping
> +  =======================  ======  ====================================
> +
> +TSLIST_GET
> +==========
> +
> +Gets transceiver module parameters.
> +
> +Request contents:
> +
> +  =================================  ======  ==========================
> +  ``ETHTOOL_A_TS_HEADER``            nested  request header
> +  =================================  ======  ==========================
> +
> +Kernel response contents:
> +
> +  =======================  ======  ===================================
> +  ``ETHTOOL_A_TS_HEADER``  nested  reply header
> +  ``ETHTOOL_A_TS_LAYER``   u32     available hardware timestamping
> +  =======================  ======  ===================================

Please put some words in here :) Documentation matters to the users.

Also let me advertise tools/net/ynl/cli.py to you:
https://docs.kernel.org/next/userspace-api/netlink/intro-specs.html

Please fill in the new commands in the ethtool spec. Feel free to ask
if you have any questions, it's still a bit of a rough.. clay?

> +/* Hardware layer of the SO_TIMESTAMPING provider */
> +enum timestamping_layer {
> +	SOF_MAC_TIMESTAMPING = (1<<0),
> +	SOF_PHY_TIMESTAMPING = (1<<1),

What does SOF_ stand for? 
Kory Maincent April 7, 2023, 8:58 a.m. UTC | #2
Thanks for the review.

On Thu, 6 Apr 2023 18:46:46 -0700
Jakub Kicinski <kuba@kernel.org> wrote:
 
> Please put some words in here :) Documentation matters to the users.

Ok.

> 
> Also let me advertise tools/net/ynl/cli.py to you:
> https://docs.kernel.org/next/userspace-api/netlink/intro-specs.html

Ok I will take look.
Seems broken on net-next:
./tools/net/ynl/cli.py --spec Documentation/netlink/specs/ethtool.yaml --do rings-get --json '{"header":{"dev-index": 18}}'
Traceback (most recent call last):
  File "./tools/net/ynl/cli.py", line 52, in <module>
    main()
  File "./tools/net/ynl/cli.py", line 31, in main
    ynl = YnlFamily(args.spec, args.schema)
  File "/home/kmaincent/Documents/linux/tools/net/ynl/lib/ynl.py", line 361, in __init__
    self.family = GenlFamily(self.yaml['name'])
  File "/home/kmaincent/Documents/linux/tools/net/ynl/lib/ynl.py", line 331, in __init__
    self.genl_family = genl_family_name_to_id[family_name]
KeyError: 'ethtool'


> Please fill in the new commands in the ethtool spec. Feel free to ask
> if you have any questions, it's still a bit of a rough.. clay?
> 
> > +/* Hardware layer of the SO_TIMESTAMPING provider */
> > +enum timestamping_layer {
> > +	SOF_MAC_TIMESTAMPING = (1<<0),
> > +	SOF_PHY_TIMESTAMPING = (1<<1),  
> 
> What does SOF_ stand for? 
Jakub Kicinski April 7, 2023, 2:26 p.m. UTC | #3
On Fri, 7 Apr 2023 10:58:57 +0200 Köry Maincent wrote:
> > Also let me advertise tools/net/ynl/cli.py to you:
> > https://docs.kernel.org/next/userspace-api/netlink/intro-specs.html  
> 
> Ok I will take look.
> Seems broken on net-next:
> ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/ethtool.yaml --do rings-get --json '{"header":{"dev-index": 18}}'
> Traceback (most recent call last):
>   File "./tools/net/ynl/cli.py", line 52, in <module>
>     main()
>   File "./tools/net/ynl/cli.py", line 31, in main
>     ynl = YnlFamily(args.spec, args.schema)
>   File "/home/kmaincent/Documents/linux/tools/net/ynl/lib/ynl.py", line 361, in __init__
>     self.family = GenlFamily(self.yaml['name'])
>   File "/home/kmaincent/Documents/linux/tools/net/ynl/lib/ynl.py", line 331, in __init__
>     self.genl_family = genl_family_name_to_id[family_name]
> KeyError: 'ethtool'

IIRC this usually means ethtool netlink is not selected by you Kconfig.
I should add a clearer error for that I guess.
Booting net-next now, I'll get back to you with a confirmation.

> > Please fill in the new commands in the ethtool spec. Feel free to ask
> > if you have any questions, it's still a bit of a rough.. clay?
> >   
> > > +/* Hardware layer of the SO_TIMESTAMPING provider */
> > > +enum timestamping_layer {
> > > +	SOF_MAC_TIMESTAMPING = (1<<0),
> > > +	SOF_PHY_TIMESTAMPING = (1<<1),    
> > 
> > What does SOF_ stand for? 
Jakub Kicinski April 7, 2023, 2:44 p.m. UTC | #4
On Fri, 7 Apr 2023 07:26:15 -0700 Jakub Kicinski wrote:
> > Ok I will take look.
> > Seems broken on net-next:
> > ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/ethtool.yaml --do rings-get --json '{"header":{"dev-index": 18}}'
> > Traceback (most recent call last):
> >   File "./tools/net/ynl/cli.py", line 52, in <module>
> >     main()
> >   File "./tools/net/ynl/cli.py", line 31, in main
> >     ynl = YnlFamily(args.spec, args.schema)
> >   File "/home/kmaincent/Documents/linux/tools/net/ynl/lib/ynl.py", line 361, in __init__
> >     self.family = GenlFamily(self.yaml['name'])
> >   File "/home/kmaincent/Documents/linux/tools/net/ynl/lib/ynl.py", line 331, in __init__
> >     self.genl_family = genl_family_name_to_id[family_name]
> > KeyError: 'ethtool'  
> 
> IIRC this usually means ethtool netlink is not selected by you Kconfig.
> I should add a clearer error for that I guess.
> Booting net-next now, I'll get back to you with a confirmation.

Yeah, works here. FWIW if you want to use it on the VM / remote host
you just need to copy over a couple of dirs:

$ scp -r tools/net/ynl/ $bla:~/
$ scp -r Documentation/netlink/ $bla:~/
$ ssh $bla
bla$ dnf install python-yaml
bla$ ./ynl/cli.py \
	--no-schema \
	--spec netlink/specs/ethtool.yaml \
	--do rings-get \
	--json '{"header":{"dev-index": 2}}'
Richard Cochran April 8, 2023, 2:06 p.m. UTC | #5
On Thu, Apr 06, 2023 at 07:33:05PM +0200, Köry Maincent wrote:
> Signed-off-by: Richard Cochran <richardcochran@gmail.com>
> Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>

Since this patch is now completely re-written, you can drop my Sob and
authorship, keeping all the credit for yourself!

Thanks,
Richard
Michael Walle April 12, 2023, 10:50 a.m. UTC | #6
>> +/* Hardware layer of the SO_TIMESTAMPING provider */
>> +enum timestamping_layer {
>> +	SOF_MAC_TIMESTAMPING =3D (1<<0),
>> +	SOF_PHY_TIMESTAMPING =3D (1<<1),
>
> What does SOF_ stand for?

I'd guess start of frame. The timestamp will be taken at the
beginning of the frame.

-michael
Vladimir Oltean April 12, 2023, 11:08 a.m. UTC | #7
On Wed, Apr 12, 2023 at 12:50:34PM +0200, Michael Walle wrote:
> >> +/* Hardware layer of the SO_TIMESTAMPING provider */
> >> +enum timestamping_layer {
> >> +	SOF_MAC_TIMESTAMPING =3D (1<<0),
> >> +	SOF_PHY_TIMESTAMPING =3D (1<<1),
> >
> > What does SOF_ stand for?
> 
> I'd guess start of frame. The timestamp will be taken at the
> beginning of the frame.

I would suggest (with all due respect) that it was an inapt adaptation
of the Socket Option Flags that can be seen in
Documentation/networking/timestamping.rst.

These are not socket option flags (because these settings are not per
socket), so the namespace/prefix is not really correctly used here.
Michael Walle April 12, 2023, 11:12 a.m. UTC | #8
Am 2023-04-12 13:08, schrieb Vladimir Oltean:
> On Wed, Apr 12, 2023 at 12:50:34PM +0200, Michael Walle wrote:
>> >> +/* Hardware layer of the SO_TIMESTAMPING provider */
>> >> +enum timestamping_layer {
>> >> +	SOF_MAC_TIMESTAMPING =3D (1<<0),
>> >> +	SOF_PHY_TIMESTAMPING =3D (1<<1),
>> >
>> > What does SOF_ stand for?
>> 
>> I'd guess start of frame. The timestamp will be taken at the
>> beginning of the frame.
> 
> I would suggest (with all due respect) that it was an inapt adaptation
> of the Socket Option Flags that can be seen in
> Documentation/networking/timestamping.rst.

Agreed. Noticing the other SOF_TIMESTAMPING_* names, your
suggestion makes way more sense. Sorry for the noise.

-michael
Kory Maincent April 12, 2023, 12:19 p.m. UTC | #9
On Wed, 12 Apr 2023 14:08:40 +0300
Vladimir Oltean <vladimir.oltean@nxp.com> wrote:

> On Wed, Apr 12, 2023 at 12:50:34PM +0200, Michael Walle wrote:
> > >> +/* Hardware layer of the SO_TIMESTAMPING provider */
> > >> +enum timestamping_layer {
> > >> +	SOF_MAC_TIMESTAMPING =3D (1<<0),
> > >> +	SOF_PHY_TIMESTAMPING =3D (1<<1),  
> > >
> > > What does SOF_ stand for?  
> > 
> > I'd guess start of frame. The timestamp will be taken at the
> > beginning of the frame.  
> 
> I would suggest (with all due respect) that it was an inapt adaptation
> of the Socket Option Flags that can be seen in
> Documentation/networking/timestamping.rst.
> 
> These are not socket option flags (because these settings are not per
> socket), so the namespace/prefix is not really correctly used here.

As Jakub said, who knows maybe one day it will be per socket information,
but indeed for now it is not the case.
I will stick to whatever name the community prefer.
Vladimir Oltean May 11, 2023, 8:36 p.m. UTC | #10
On Thu, Apr 06, 2023 at 06:46:46PM -0700, Jakub Kicinski wrote:
> > +/* Hardware layer of the SO_TIMESTAMPING provider */
> > +enum timestamping_layer {
> > +	SOF_MAC_TIMESTAMPING = (1<<0),
> > +	SOF_PHY_TIMESTAMPING = (1<<1),
> 
> We need a value for DMA timestamps here.

What's a DMA timestamp, technically? Is it a snapshot of the PHC's time
domain, just not at the MII pins?

Which drivers provide DMA timestamps, and how do they currently signal
that they do this? Do they do this for all packets that get timestamped,
or for "some"?
Andrew Lunn May 11, 2023, 8:50 p.m. UTC | #11
On Thu, May 11, 2023 at 11:36:46PM +0300, Vladimir Oltean wrote:
> On Thu, Apr 06, 2023 at 06:46:46PM -0700, Jakub Kicinski wrote:
> > > +/* Hardware layer of the SO_TIMESTAMPING provider */
> > > +enum timestamping_layer {
> > > +	SOF_MAC_TIMESTAMPING = (1<<0),
> > > +	SOF_PHY_TIMESTAMPING = (1<<1),
> > 
> > We need a value for DMA timestamps here.
> 
> What's a DMA timestamp, technically? Is it a snapshot of the PHC's time
> domain, just not at the MII pins?

I also wounder if there is one definition of DMA timestampting, or
multiple. It could simply be that the time stamp is in the DMA
descriptor, but the sample of the PHC was taken as the SOF was
received. It could be the sample of the PHC at the point the DMA
descriptor was handed back to the host. It could be the PHC was
sampled when the host reads the descriptor, etc.

I also wounder if there is sufficient documentation to be able to tell
them apart for devices which support it. So maybe it does not even
matter what the exact definition is?

	Andrew
Russell King (Oracle) May 11, 2023, 8:55 p.m. UTC | #12
On Thu, May 11, 2023 at 10:50:30PM +0200, Andrew Lunn wrote:
> On Thu, May 11, 2023 at 11:36:46PM +0300, Vladimir Oltean wrote:
> > On Thu, Apr 06, 2023 at 06:46:46PM -0700, Jakub Kicinski wrote:
> > > > +/* Hardware layer of the SO_TIMESTAMPING provider */
> > > > +enum timestamping_layer {
> > > > +	SOF_MAC_TIMESTAMPING = (1<<0),
> > > > +	SOF_PHY_TIMESTAMPING = (1<<1),
> > > 
> > > We need a value for DMA timestamps here.
> > 
> > What's a DMA timestamp, technically? Is it a snapshot of the PHC's time
> > domain, just not at the MII pins?
> 
> I also wounder if there is one definition of DMA timestampting, or
> multiple. It could simply be that the time stamp is in the DMA
> descriptor,

Surely that is equivalent to MAC timestamping? Whether the MAC
places it in a DMA descriptor, or whether it places it in some
auxiliary information along with the packet is surely irrelevant,
because the MAC has to have the timestamp available to it in some
manner. Where it ends up is just a function of implementation surely?

I'm just wondering what this would mean for mvpp2, where the
timestamps are in the descriptors. If we have a "DMA timestamp"
is that a "DMA timestamp" or a "MAC timestamp"? The timestamp comes
from the MAC in this case.
Vladimir Oltean May 11, 2023, 9:02 p.m. UTC | #13
On Thu, May 11, 2023 at 09:55:39PM +0100, Russell King (Oracle) wrote:
> On Thu, May 11, 2023 at 10:50:30PM +0200, Andrew Lunn wrote:
> > On Thu, May 11, 2023 at 11:36:46PM +0300, Vladimir Oltean wrote:
> > > On Thu, Apr 06, 2023 at 06:46:46PM -0700, Jakub Kicinski wrote:
> > > > > +/* Hardware layer of the SO_TIMESTAMPING provider */
> > > > > +enum timestamping_layer {
> > > > > +	SOF_MAC_TIMESTAMPING = (1<<0),
> > > > > +	SOF_PHY_TIMESTAMPING = (1<<1),
> > > > 
> > > > We need a value for DMA timestamps here.
> > > 
> > > What's a DMA timestamp, technically? Is it a snapshot of the PHC's time
> > > domain, just not at the MII pins?
> > 
> > I also wounder if there is one definition of DMA timestampting, or
> > multiple. It could simply be that the time stamp is in the DMA
> > descriptor,
> 
> Surely that is equivalent to MAC timestamping? Whether the MAC
> places it in a DMA descriptor, or whether it places it in some
> auxiliary information along with the packet is surely irrelevant,
> because the MAC has to have the timestamp available to it in some
> manner. Where it ends up is just a function of implementation surely?
> 
> I'm just wondering what this would mean for mvpp2, where the
> timestamps are in the descriptors. If we have a "DMA timestamp"
> is that a "DMA timestamp" or a "MAC timestamp"? The timestamp comes
> from the MAC in this case.

No, a MAC timestamp carried through a DMA descriptor is still a MAC
timestamp (better said: timestamp taken at the MAC).

DMA timestamps probably have to do with this igc patch set, which I
admit to not having had the patience to follow along all the way and
understand what is its status and if it was ever accepted in that form,
or a different form, or if Vinicius' work for multiple in-flight TX
timestamps is an alternate solution to the same problem as DMA
timestamps, or whatever:
https://lore.kernel.org/netdev/20221018010733.4765-1-muhammad.husaini.zulkifli@intel.com/

So I just asked.
Jakub Kicinski May 11, 2023, 10:09 p.m. UTC | #14
On Fri, 12 May 2023 00:02:37 +0300 Vladimir Oltean wrote:
> > Surely that is equivalent to MAC timestamping? Whether the MAC
> > places it in a DMA descriptor, or whether it places it in some
> > auxiliary information along with the packet is surely irrelevant,
> > because the MAC has to have the timestamp available to it in some
> > manner. Where it ends up is just a function of implementation surely?
> > 
> > I'm just wondering what this would mean for mvpp2, where the
> > timestamps are in the descriptors. If we have a "DMA timestamp"
> > is that a "DMA timestamp" or a "MAC timestamp"? The timestamp comes
> > from the MAC in this case.  
> 
> No, a MAC timestamp carried through a DMA descriptor is still a MAC
> timestamp (better said: timestamp taken at the MAC).

Right. The method of communicating the TS and where TS is taken 
are theoretically unrelated (in practice DMA timestamp not in
a descriptor is likely pointless).

> DMA timestamps probably have to do with this igc patch set, which I
> admit to not having had the patience to follow along all the way and
> understand what is its status and if it was ever accepted in that form,
> or a different form, or if Vinicius' work for multiple in-flight TX

Exactly, think Tx. For Rx hauling the TS in metadata from PHY/MAC to
descriptor is easy. For Tx device will buffer the packet so the DMA
completion happens before the packet actually left onto the wire.

Which is not to say that some devices may not generate the Rx timestamp
when the packet is DMA'ed out of laziness, too.

> timestamps is an alternate solution to the same problem as DMA
> timestamps, or whatever:
> https://lore.kernel.org/netdev/20221018010733.4765-1-muhammad.husaini.zulkifli@intel.com/

What I was thinking was:

 - PHY - per spec, at the RS layer
 - MAC - "close to the wire" in the MAC, specifically the pipeline
   delay (PHY stamp vs MAC stamp) should be constant for all packets;
   there must be no variable-time buffering and (for Tx) the time
   stamping must be past the stage of the pipeline affected by pause
   frames
 - DMA - worst quality, variable delay timestamp, usually taken when
   packets DMA descriptors (Rx or completion) are being written

With these definitions MAC and PHY timestamps are pretty similar
from the perspective of accuracy.
Vladimir Oltean May 11, 2023, 11:07 p.m. UTC | #15
On Thu, May 11, 2023 at 03:09:02PM -0700, Jakub Kicinski wrote:
> Exactly, think Tx. For Rx hauling the TS in metadata from PHY/MAC to
> descriptor is easy. For Tx device will buffer the packet so the DMA
> completion happens before the packet actually left onto the wire.
> 
> Which is not to say that some devices may not generate the Rx timestamp
> when the packet is DMA'ed out of laziness, too.
> 
> > timestamps is an alternate solution to the same problem as DMA
> > timestamps, or whatever:
> > https://lore.kernel.org/netdev/20221018010733.4765-1-muhammad.husaini.zulkifli@intel.com/
> 
> What I was thinking was:
> 
>  - PHY - per spec, at the RS layer
>  - MAC - "close to the wire" in the MAC, specifically the pipeline
>    delay (PHY stamp vs MAC stamp) should be constant for all packets;
>    there must be no variable-time buffering and (for Tx) the time
>    stamping must be past the stage of the pipeline affected by pause
>    frames
>  - DMA - worst quality, variable delay timestamp, usually taken when
>    packets DMA descriptors (Rx or completion) are being written
> 
> With these definitions MAC and PHY timestamps are pretty similar
> from the perspective of accuracy.

So if I add a call to ptp_clock_info :: gettimex64() where the
skb_tx_timestamp() call is located in a driver, could that pass as
a DMA timestamp?

The question is how much do we want to encourage these DMA timestamps:
enough to make them a first-class citizen in the UAPI? Are users even
happy with their existence?

I mean, I can't ignore the fact that there are NICs that can provide
2-step TX timestamps at line rate for all packets (not just PTP) for
line rates exceeding 10G, in band with the descriptor in its TX
completion queue. I don't want to give any names, just to point out
that there isn't any inherent limitation in the technology. AFAIU from
igc_ptp_tx_hwtstamp(), it's just that the igc DMA controller did not
bother to transport the timestamps from the MAC back into the
descriptor, leaving it up to software to do it out of band, which of
course may cause correlation bugs and limits throughput. Surely they
can do better.
Jakub Kicinski May 11, 2023, 11:16 p.m. UTC | #16
On Fri, 12 May 2023 02:07:17 +0300 Vladimir Oltean wrote:
> On Thu, May 11, 2023 at 03:09:02PM -0700, Jakub Kicinski wrote:
> > Exactly, think Tx. For Rx hauling the TS in metadata from PHY/MAC to
> > descriptor is easy. For Tx device will buffer the packet so the DMA
> > completion happens before the packet actually left onto the wire.
> > 
> > Which is not to say that some devices may not generate the Rx timestamp
> > when the packet is DMA'ed out of laziness, too.
> >   
> > > timestamps is an alternate solution to the same problem as DMA
> > > timestamps, or whatever:
> > > https://lore.kernel.org/netdev/20221018010733.4765-1-muhammad.husaini.zulkifli@intel.com/  
> > 
> > What I was thinking was:
> > 
> >  - PHY - per spec, at the RS layer
> >  - MAC - "close to the wire" in the MAC, specifically the pipeline
> >    delay (PHY stamp vs MAC stamp) should be constant for all packets;
> >    there must be no variable-time buffering and (for Tx) the time
> >    stamping must be past the stage of the pipeline affected by pause
> >    frames
> >  - DMA - worst quality, variable delay timestamp, usually taken when
> >    packets DMA descriptors (Rx or completion) are being written
> > 
> > With these definitions MAC and PHY timestamps are pretty similar
> > from the perspective of accuracy.  
> 
> So if I add a call to ptp_clock_info :: gettimex64() where the
> skb_tx_timestamp() call is located in a driver, could that pass as
> a DMA timestamp?

Yes.

> The question is how much do we want to encourage these DMA timestamps:
> enough to make them a first-class citizen in the UAPI? Are users even
> happy with their existence?

I don't want to call anyone out, but multiple high speed devices
with the current, in tree drivers report DMA timestamps when you 
ask for HW timestamps. DMA stamps are still 2 orders of magnitude
better than SW stamping.

> I mean, I can't ignore the fact that there are NICs that can provide
> 2-step TX timestamps at line rate for all packets (not just PTP) for
> line rates exceeding 10G, in band with the descriptor in its TX
> completion queue. I don't want to give any names, just to point out
> that there isn't any inherent limitation in the technology.

Oh, you should tell me, maybe off-list then. 'Cause I don't know any.

> AFAIU from igc_ptp_tx_hwtstamp(), it's just that the igc DMA
> controller did not bother to transport the timestamps from the MAC
> back into the descriptor, leaving it up to software to do it out of
> band, which of course may cause correlation bugs and limits
> throughput. Surely they can do better.

It's not that simple. Any packet loss or diversion and you may end up
missing a completion.
Vladimir Oltean May 12, 2023, 10:29 a.m. UTC | #17
On Thu, May 11, 2023 at 04:16:25PM -0700, Jakub Kicinski wrote:
> > I mean, I can't ignore the fact that there are NICs that can provide
> > 2-step TX timestamps at line rate for all packets (not just PTP) for
> > line rates exceeding 10G, in band with the descriptor in its TX
> > completion queue. I don't want to give any names, just to point out
> > that there isn't any inherent limitation in the technology.
> 
> Oh, you should tell me, maybe off-list then. 'Cause I don't know any.

I hope the examples given in private will make you reconsider the
validity of my argument about DMA timestamps.
Jakub Kicinski May 12, 2023, 5:38 p.m. UTC | #18
On Fri, 12 May 2023 13:29:11 +0300 Vladimir Oltean wrote:
> On Thu, May 11, 2023 at 04:16:25PM -0700, Jakub Kicinski wrote:
> > > I mean, I can't ignore the fact that there are NICs that can provide
> > > 2-step TX timestamps at line rate for all packets (not just PTP) for
> > > line rates exceeding 10G, in band with the descriptor in its TX
> > > completion queue. I don't want to give any names, just to point out
> > > that there isn't any inherent limitation in the technology.  
> > 
> > Oh, you should tell me, maybe off-list then. 'Cause I don't know any.  
> 
> I hope the examples given in private will make you reconsider the
> validity of my argument about DMA timestamps.

I may have lost track of what the argument is. There are devices
which will provide a DMA stamp for Tx and Rx. We need an API that'll
inform the user about it. 

To be clear I'm talking about drivers which are already in the tree,
not opening the door for some shoddy new HW in.
Jakub Kicinski May 17, 2023, 7:19 p.m. UTC | #19
On Fri, 12 May 2023 10:38:52 -0700 Jakub Kicinski wrote:
> On Fri, 12 May 2023 13:29:11 +0300 Vladimir Oltean wrote:
> > On Thu, May 11, 2023 at 04:16:25PM -0700, Jakub Kicinski wrote:  
> > > Oh, you should tell me, maybe off-list then. 'Cause I don't know any.    
> > 
> > I hope the examples given in private will make you reconsider the
> > validity of my argument about DMA timestamps.  
> 
> I may have lost track of what the argument is. There are devices
> which will provide a DMA stamp for Tx and Rx. We need an API that'll
> inform the user about it. 
> 
> To be clear I'm talking about drivers which are already in the tree,
> not opening the door for some shoddy new HW in.

It dawned on me while reading a phylink discussion that I may have
misunderstood the meaning of the MAC vs PHY time stamp sources.
By the standard - stamping happens under the MAC, so MAC is 
the "right" place to stamp, not the PHY. And there can be multiple 
PHYs technically? Are we just using the MAC vs PHY thing as an
implementation aid, to know which driver to send the request to?

Shouldn't we use the clock ID instead?
Andrew Lunn May 17, 2023, 7:46 p.m. UTC | #20
On Wed, May 17, 2023 at 12:19:25PM -0700, Jakub Kicinski wrote:
> On Fri, 12 May 2023 10:38:52 -0700 Jakub Kicinski wrote:
> > On Fri, 12 May 2023 13:29:11 +0300 Vladimir Oltean wrote:
> > > On Thu, May 11, 2023 at 04:16:25PM -0700, Jakub Kicinski wrote:  
> > > > Oh, you should tell me, maybe off-list then. 'Cause I don't know any.    
> > > 
> > > I hope the examples given in private will make you reconsider the
> > > validity of my argument about DMA timestamps.  
> > 
> > I may have lost track of what the argument is. There are devices
> > which will provide a DMA stamp for Tx and Rx. We need an API that'll
> > inform the user about it. 
> > 
> > To be clear I'm talking about drivers which are already in the tree,
> > not opening the door for some shoddy new HW in.
> 
> It dawned on me while reading a phylink discussion that I may have
> misunderstood the meaning of the MAC vs PHY time stamp sources.
> By the standard - stamping happens under the MAC, so MAC is 
> the "right" place to stamp, not the PHY. And there can be multiple 
> PHYs technically? Are we just using the MAC vs PHY thing as an
> implementation aid, to know which driver to send the request to?
> 
> Shouldn't we use the clock ID instead?

As i said in an earlier thread, with a bit of a stretch, there could
be 7 places to take time stamps in the system. We need some sort of
identifier to indicate which of these stampers to use.

Is clock ID unique? In a switch, i think there could be multiple
stampers, one per MAC port, sharing one clock? So you actually need
more than a clock ID.

Also, 'By the standard - stamping happens under the MAC'. Which MAC?
There can be multple MAC's in the pipeline. MACSEC and rate adaptation
in the PHY are often implemented by the PHY having a MAC
reconstituting the frame from the bitstream and putting it into a
queue. Rate adaptation can then be performed by the PHY by sending
pause frames to the 'primary' MAC to slow it down. MACSEC in the PHY
takes frames in the queues and if they match a filter they get
encrypted. The PHY then takes the frame out of the queue and passes
them to a second MAC in the PHY which creates a bitstream and then to
a 'PHY' to generate signals for the line.

In this sort of setup, you obviously don't want the 'primary' MAC
doing the stamping. You want the MAC nearest to the line, or better
still the 'PHY' within the PHY just before the line.

      Andrew
Jacob Keller May 17, 2023, 7:58 p.m. UTC | #21
On 4/7/2023 1:58 AM, Köry Maincent wrote:
> On Thu, 6 Apr 2023 18:46:46 -0700
>> What does SOF_ stand for? 
Jakub Kicinski May 17, 2023, 8:07 p.m. UTC | #22
On Wed, 17 May 2023 21:46:43 +0200 Andrew Lunn wrote:
> As i said in an earlier thread, with a bit of a stretch, there could
> be 7 places to take time stamps in the system. We need some sort of
> identifier to indicate which of these stampers to use.
> 
> Is clock ID unique? In a switch, i think there could be multiple
> stampers, one per MAC port, sharing one clock? So you actually need
> more than a clock ID.

Clock ID is a bit vague too, granted, but in practice clock ID should
correspond to the driver fairly well? My thinking was - use clock ID
to select the (silicon) device, use a different attribute to select 
the stamping point.

IOW try to use the existing attribute before inventing a new one.

> Also, 'By the standard - stamping happens under the MAC'. Which MAC?
> There can be multple MAC's in the pipeline. MACSEC and rate adaptation
> in the PHY are often implemented by the PHY having a MAC
> reconstituting the frame from the bitstream and putting it into a
> queue. Rate adaptation can then be performed by the PHY by sending
> pause frames to the 'primary' MAC to slow it down. MACSEC in the PHY
> takes frames in the queues and if they match a filter they get
> encrypted. The PHY then takes the frame out of the queue and passes
> them to a second MAC in the PHY which creates a bitstream and then to
> a 'PHY' to generate signals for the line.
> 
> In this sort of setup, you obviously don't want the 'primary' MAC
> doing the stamping. You want the MAC nearest to the line, or better
> still the 'PHY' within the PHY just before the line.

For a PHY "always use the point closest to the wire" makes sense.

For DMA we'd have a different set of priorities - precision vs
volume vs 1-step / 2-step; but all from the same clock. I think 
that we may want to defer figuring out selection within a single
clock for now, to make some progress.
Jacob Keller May 17, 2023, 10:01 p.m. UTC | #23
On 5/11/2023 4:16 PM, Jakub Kicinski wrote:
> On Fri, 12 May 2023 02:07:17 +0300 Vladimir Oltean wrote:
>> AFAIU from igc_ptp_tx_hwtstamp(), it's just that the igc DMA
>> controller did not bother to transport the timestamps from the MAC
>> back into the descriptor, leaving it up to software to do it out of
>> band, which of course may cause correlation bugs and limits
>> throughput. Surely they can do better.
> 

In myy understanding for for MAC timestamping, the igc hardware stores
the timestamp in a register and assumes you only have one outstanding
timestamp request at a time.

The MAC timestamp is captured well after the Tx descriptor is written
back as complete. There's no completion queue on this device to send
such a message out-of-band of the Tx writeback. Delaying the writeback
until after the Tx timestamp is captured would have its own challenges.

Obviously it *can* be done better than this, but thats not how the igc
hardware was designed.
Jacob Keller May 17, 2023, 10:13 p.m. UTC | #24
On 4/6/2023 6:46 PM, Jakub Kicinski wrote:
> On Thu,  6 Apr 2023 19:33:05 +0200 Köry Maincent wrote:
>> +/* TSLIST_GET */
>> +static int tslist_prepare_data(const struct ethnl_req_info *req_base,
>> +			       struct ethnl_reply_data *reply_base,
>> +			       struct genl_info *info)
>> +{
>> +	struct ts_reply_data *data = TS_REPDATA(reply_base);
>> +	struct net_device *dev = reply_base->dev;
>> +	const struct ethtool_ops *ops = dev->ethtool_ops;
>> +	int ret;
>> +
>> +	ret = ethnl_ops_begin(dev);
>> +	if (ret < 0)
>> +		return ret;
>> +
>> +	data->ts = 0;
>> +	if (phy_has_tsinfo(dev->phydev))
>> +		data->ts = SOF_PHY_TIMESTAMPING;
>> +	if (ops->get_ts_info)
>> +		data->ts |= SOF_MAC_TIMESTAMPING;
> 
> We can't make that assumption, that info must come from the driver.
> 
> Also don't we need some way to identify the device / phc from which 
> the timestamp at the given layer will come?

For example, ice hardware captures timestamp data in its internal PHY
well after the MAC layer finishes, but it doesn't expose the PHY to the
host at all..

From a timing perspective it matters that its PHY, but from an
implementation perspective there's not much difference since we don't
support MAC timestamping at all (and the PHY isn't accessible through
phylink...)

Perhaps that doesn't fit the use cace here though where the issue is
with supporting both MAC and PHY but no way to configure that
Richard Cochran May 17, 2023, 10:46 p.m. UTC | #25
On Wed, May 17, 2023 at 03:13:06PM -0700, Jacob Keller wrote:

> For example, ice hardware captures timestamp data in its internal PHY
> well after the MAC layer finishes, but it doesn't expose the PHY to the
> host at all..
> 
> From a timing perspective it matters that its PHY, but from an
> implementation perspective there's not much difference since we don't
> support MAC timestamping at all (and the PHY isn't accessible through
> phylink...)

Here is a crazy idea:  Wouldn't it be nice to have all PHYs represented
in the kernel driver world, even those PHYs that are built in?

I've long thought that having NETWORK_PHY_TIMESTAMPING limited to
PHYLIB (and in practice device tree) systems is unfortunate.

Thanks,
Richard
Jacob Keller May 18, 2023, 11:23 p.m. UTC | #26
On 5/17/2023 3:46 PM, Richard Cochran wrote:
> On Wed, May 17, 2023 at 03:13:06PM -0700, Jacob Keller wrote:
> 
>> For example, ice hardware captures timestamp data in its internal PHY
>> well after the MAC layer finishes, but it doesn't expose the PHY to the
>> host at all..
>>
>> From a timing perspective it matters that its PHY, but from an
>> implementation perspective there's not much difference since we don't
>> support MAC timestamping at all (and the PHY isn't accessible through
>> phylink...)
> 
> Here is a crazy idea:  Wouldn't it be nice to have all PHYs represented
> in the kernel driver world, even those PHYs that are built in?
> 

I agree. I've wanted us to enable phylib/phylink/something for our
internal PHYs, but never got traction here to actually make it happen.
There was a push a few years ago for using it in igb, but ultimately
couldn't get enough support to make the development happen :( Similar
with using the i2c interfaces... These days, so much of the control
happens only in firmware that it has its own challenges.

> I've long thought that having NETWORK_PHY_TIMESTAMPING limited to
> PHYLIB (and in practice device tree) systems is unfortunate.
> 

It is a bit unfortunate. In the ice driver case, we just report the
timestamps in the usual way for a network driver, which is ok enough
since no other timestamps exist for us.

> Thanks,
> Richard
Andrew Lunn May 19, 2023, 12:50 p.m. UTC | #27
On Thu, May 18, 2023 at 04:23:10PM -0700, Jacob Keller wrote:
> 
> 
> On 5/17/2023 3:46 PM, Richard Cochran wrote:
> > On Wed, May 17, 2023 at 03:13:06PM -0700, Jacob Keller wrote:
> > 
> >> For example, ice hardware captures timestamp data in its internal PHY
> >> well after the MAC layer finishes, but it doesn't expose the PHY to the
> >> host at all..
> >>
> >> From a timing perspective it matters that its PHY, but from an
> >> implementation perspective there's not much difference since we don't
> >> support MAC timestamping at all (and the PHY isn't accessible through
> >> phylink...)
> > 
> > Here is a crazy idea:  Wouldn't it be nice to have all PHYs represented
> > in the kernel driver world, even those PHYs that are built in?
> > 
> 
> I agree. I've wanted us to enable phylib/phylink/something for our
> internal PHYs, but never got traction here to actually make it happen.
> There was a push a few years ago for using it in igb, but ultimately
> couldn't get enough support to make the development happen :( Similar
> with using the i2c interfaces... These days, so much of the control
> happens only in firmware that it has its own challenges.

I know there is some cleanup going on reducing replicated code in
Intel Ethernet drivers. I was wondering if that would extend to
PHYs. But as you say, recent Intel hardware have firmware controlling
the PHYs, not Linux. So such cleanups would be limited to older
controllers which i guess Intel probably no longer cares about.

> > I've long thought that having NETWORK_PHY_TIMESTAMPING limited to
> > PHYLIB (and in practice device tree) systems is unfortunate.
> > 
> 
> It is a bit unfortunate. In the ice driver case, we just report the
> timestamps in the usual way for a network driver, which is ok enough
> since no other timestamps exist for us.

I would actually say there is nothing fundamentally blocking using
NETWORK_PHY_TIMESTAMPING with something other than DT. It just needs
somebody to lead the way.

For ACPI in the scope of networking, everybody just seems to accept DT
won, and stuffs DT properties into ACPI tables. For PCI devices, there
has been some good work being done by Trustnetic using software nodes,
for gluing together GPIO controllers, I2C controller, SFP and
PHYLINK. It should be possible to do the same with PHY timestampers.

	 Andrew
Vladimir Oltean May 19, 2023, 1:28 p.m. UTC | #28
On Fri, May 12, 2023 at 10:38:52AM -0700, Jakub Kicinski wrote:
> On Fri, 12 May 2023 13:29:11 +0300 Vladimir Oltean wrote:
> > On Thu, May 11, 2023 at 04:16:25PM -0700, Jakub Kicinski wrote:
> > > > I mean, I can't ignore the fact that there are NICs that can provide
> > > > 2-step TX timestamps at line rate for all packets (not just PTP) for
> > > > line rates exceeding 10G, in band with the descriptor in its TX
> > > > completion queue. I don't want to give any names, just to point out
> > > > that there isn't any inherent limitation in the technology.  
> > > 
> > > Oh, you should tell me, maybe off-list then. 'Cause I don't know any.  
> > 
> > I hope the examples given in private will make you reconsider the
> > validity of my argument about DMA timestamps.
> 
> I may have lost track of what the argument is. There are devices
> which will provide a DMA stamp for Tx and Rx. We need an API that'll
> inform the user about it. 
> 
> To be clear I'm talking about drivers which are already in the tree,
> not opening the door for some shoddy new HW in.

So this is circling back to my original question (with emphasis on the
last part):

| Which drivers provide DMA timestamps, and how do they currently signal
| that they do this? Do they do this for all packets that get timestamped,
| or for "some"?

https://lore.kernel.org/netdev/20230511203646.ihljeknxni77uu5j@skbuf/

If only "some" packets (unpredictable which) get DMA timestamps when
a MAC timestamp isn't available, what UAPI can satisfactorily describe
that? And who would want that?
Richard Cochran May 19, 2023, 1:50 p.m. UTC | #29
On Fri, May 19, 2023 at 02:50:42PM +0200, Andrew Lunn wrote:

> I would actually say there is nothing fundamentally blocking using
> NETWORK_PHY_TIMESTAMPING with something other than DT. It just needs
> somebody to lead the way.

+1
 
> For ACPI in the scope of networking, everybody just seems to accept DT
> won, and stuffs DT properties into ACPI tables.

Is that stuff mainline?

> For PCI devices, there
> has been some good work being done by Trustnetic using software nodes,
> for gluing together GPIO controllers, I2C controller, SFP and
> PHYLINK.

mainline also?

> It should be possible to do the same with PHY timestampers.

Sounds promising...

Thanks,
Richard
Andrew Lunn May 19, 2023, 3:18 p.m. UTC | #30
On Fri, May 19, 2023 at 06:50:19AM -0700, Richard Cochran wrote:
> On Fri, May 19, 2023 at 02:50:42PM +0200, Andrew Lunn wrote:
> 
> > I would actually say there is nothing fundamentally blocking using
> > NETWORK_PHY_TIMESTAMPING with something other than DT. It just needs
> > somebody to lead the way.
> 
> +1
>  
> > For ACPI in the scope of networking, everybody just seems to accept DT
> > won, and stuffs DT properties into ACPI tables.
> 
> Is that stuff mainline?

There is some support. But it is somewhat limited.

ACPI and networking is an odd area. ACPI has historically been
x86. And few x86 SoCs have integrated networking. Those that do seem
to me to be PCI devices internally glued onto the PCIe bus, and
firmware driving the hardware, not Linux.

Integrated networking is much more popular for other architectures
SoCs, ARM, MIPS, PowerPc. These are all DT. And in general, Linux is
controlling the hardware, which is why we have good standardised DT
bindings for MDIO busses, PHYs, SFPs, etc.

Then ARM pushed ACPI for server class ARM systems. Now server class
systems generally don't have integrated Ethernet. They have lots of
PCIe lanes, and it seems normal to put one or more NICs on PCIe. That
also gives the flexibility you can get a high performance DPU from a
network specialist, or just a plain boring 10G PCIe device. As a
result, ACPI not saying anything about networking is not really an
issue for server class machines.

The little interest i've seen for ACPI networking has come from
'hobbist' trying to use ACPI on ARM systems which are not intended to
be servers. Generally, there is a fully working DT description of the
hardware, and Linux is happy to control the hardware using that DT
description. Getting ACPI to work is mostly straight forward, due to
most building blocks being standard. xhci for USB, ata for block
devices, etc.  But they then run into the complete lack of
standardisation for networking, and nothing at all about networking in
the ACPI standard. And these people tend not to be ACPI gurus who
could extend ACPI to cover the complexity of networking hardware. So
they just stuff the existing DT properties into ACPI tables and call
it done. And i have to push back on this, because they try to stuff
everything in, including properties we have deprecated because DT has
a long history and we got things wrong along the way.
 
> > For PCI devices, there
> > has been some good work being done by Trustnetic using software nodes,
> > for gluing together GPIO controllers, I2C controller, SFP and
> > PHYLINK.
> 
> mainline also?

On the way. Trustnetic got thrown in the deep end. They are new to
mainline. They brought a typical "vendor crap driver" and tried to get
it mainlined. It reinvented everything rather then reuse what already
exists in Linux. So they are effectively writing a new driver under
our guidance. It is an unusual device, because it is a PCIe device,
but without firmware. Linux controls everything. So they have the
double trouble of being mainline newbies, plus having to do things
with mainline that nobody else has done before in order to support
their hardware. So it is moving slow. But they are sticking at it, so
i think in the end they will get it working, and it could become a
reference for others to follow.

      Andrew
Jakub Kicinski May 19, 2023, 8:22 p.m. UTC | #31
On Fri, 19 May 2023 16:28:02 +0300 Vladimir Oltean wrote:
> > I may have lost track of what the argument is. There are devices
> > which will provide a DMA stamp for Tx and Rx. We need an API that'll
> > inform the user about it. 
> > 
> > To be clear I'm talking about drivers which are already in the tree,
> > not opening the door for some shoddy new HW in.  
> 
> So this is circling back to my original question (with emphasis on the
> last part):
> 
> | Which drivers provide DMA timestamps, and how do they currently signal
> | that they do this? Do they do this for all packets that get timestamped,
> | or for "some"?
> 
> https://lore.kernel.org/netdev/20230511203646.ihljeknxni77uu5j@skbuf/
> 
> If only "some" packets (unpredictable which) get DMA timestamps when
> a MAC timestamp isn't available, what UAPI can satisfactorily describe
> that? And who would want that?

The short answer is "I don't know". There's been a lot of talk about
time stamps due to Swift/BBRv2, but I don't have first hand experience
with it. That'd require time stamping "all" packets but the precision 
is really only required to be in usec.

Maybe Muhammad has a clearer use case in mind.
Zulkifli, Muhammad Husaini May 22, 2023, 3:56 a.m. UTC | #32
Hi All,

> On Fri, 19 May 2023 16:28:02 +0300 Vladimir Oltean wrote:
> > > I may have lost track of what the argument is. There are devices
> > > which will provide a DMA stamp for Tx and Rx. We need an API that'll
> > > inform the user about it.
> > >
> > > To be clear I'm talking about drivers which are already in the tree,
> > > not opening the door for some shoddy new HW in.
> >
> > So this is circling back to my original question (with emphasis on the
> > last part):
> >
> > | Which drivers provide DMA timestamps, and how do they currently
> > | signal that they do this? Do they do this for all packets that get
> > | timestamped, or for "some"?
> >
> > https://lore.kernel.org/netdev/20230511203646.ihljeknxni77uu5j@skbuf/
> >
> > If only "some" packets (unpredictable which) get DMA timestamps when a
> > MAC timestamp isn't available, what UAPI can satisfactorily describe
> > that? And who would want that?
> 
> The short answer is "I don't know". There's been a lot of talk about time
> stamps due to Swift/BBRv2, but I don't have first hand experience with it.
> That'd require time stamping "all" packets but the precision is really only
> required to be in usec.
> 
> Maybe Muhammad has a clearer use case in mind.

A controller may only support one HW Timestamp (PHY/Port) and one MAC Timestamp 
(DMA Timestamp) for packet timestamp activity. If a PTP packet has used the HW Timestamp (PHY/Port), 
the MAC timestamp can be used for another packet timestamp activity (ex. AF_XDP/AF_PACKET).
If all packets require and use the same HW Timestamp (PHY/Port timestamp) and we send a huge 
amount of traffic for AF_XDP/AF_PACKET, we may discover that some packets are missing the 
timestamp since every packet is attempting to read the same HW Port/PHY Timestamp register. 
You may see the tx_timestamp_timeout from ptp4l also in this scenario. 
Giving the user the choice of selecting MAC or PHY timestamp through the socket options can help 
resolve the above issue if they enable per-packet MAC(DMA) timestamping.

Thanks, 
Husaini
Richard Cochran May 22, 2023, 8:04 p.m. UTC | #33
On Mon, May 22, 2023 at 03:56:36AM +0000, Zulkifli, Muhammad Husaini wrote:

> A controller may only support one HW Timestamp (PHY/Port) and one MAC Timestamp 
> (DMA Timestamp) for packet timestamp activity. If a PTP packet has used the HW Timestamp (PHY/Port), 

This is wrong.

The time stamping setting is global, at the device level, not at the
socket.  And that is not going to change.  This series is about
selecting between MAC/PHY time stamping globally, at the device level.

Thanks,
Richard
Jakub Kicinski May 22, 2023, 8:21 p.m. UTC | #34
On Mon, 22 May 2023 13:04:33 -0700 Richard Cochran wrote:
> On Mon, May 22, 2023 at 03:56:36AM +0000, Zulkifli, Muhammad Husaini wrote:
> 
> > A controller may only support one HW Timestamp (PHY/Port) and one MAC Timestamp 
> > (DMA Timestamp) for packet timestamp activity. If a PTP packet has used the HW Timestamp (PHY/Port),   
> 
> This is wrong.
>
> The time stamping setting is global, at the device level, not at the
> socket.  And that is not going to change.  This series is about
> selecting between MAC/PHY time stamping globally, at the device level.

What constitutes a device?

I'd present the facts differently. This series selects which _device_
(MAC or PHY) is responsible for delivering timestamps for a given
netdev.

HW which supports different timestamping points with different
capabilities is commonplace, so an API in this vicinity should be
extended to support the configuration. Today it's configured via device
private flags, or some out-of-tree tooling, which helps nobody :|
Richard Cochran May 23, 2023, 3:54 a.m. UTC | #35
On Mon, May 22, 2023 at 01:21:44PM -0700, Jakub Kicinski wrote:
> On Mon, 22 May 2023 13:04:33 -0700 Richard Cochran wrote:
> > On Mon, May 22, 2023 at 03:56:36AM +0000, Zulkifli, Muhammad Husaini wrote:
> > 
> > > A controller may only support one HW Timestamp (PHY/Port) and one MAC Timestamp 
> > > (DMA Timestamp) for packet timestamp activity. If a PTP packet has used the HW Timestamp (PHY/Port),   
> > 
> > This is wrong.
> >
> > The time stamping setting is global, at the device level, not at the
> > socket.  And that is not going to change.  This series is about
> > selecting between MAC/PHY time stamping globally, at the device level.
> 
> What constitutes a device?

Sorry, maybe I should have said "interface".

You cannot have one socket with MAC and another with DMA time stamps,
as the above post implies or wishes for.

Thanks,
Richard
Kory Maincent Sept. 4, 2023, 3:22 p.m. UTC | #36
Hello I am resuming my work on selectable timestamping layers.

On Wed, 17 May 2023 13:07:06 -0700
Jakub Kicinski <kuba@kernel.org> wrote:

> On Wed, 17 May 2023 21:46:43 +0200 Andrew Lunn wrote:
> > As i said in an earlier thread, with a bit of a stretch, there could
> > be 7 places to take time stamps in the system. We need some sort of
> > identifier to indicate which of these stampers to use.
> > 
> > Is clock ID unique? In a switch, i think there could be multiple
> > stampers, one per MAC port, sharing one clock? So you actually need
> > more than a clock ID.  
> 
> Clock ID is a bit vague too, granted, but in practice clock ID should
> correspond to the driver fairly well? My thinking was - use clock ID
> to select the (silicon) device, use a different attribute to select 
> the stamping point.
> 
> IOW try to use the existing attribute before inventing a new one.

What do you think of using the clock ID to select the timestamp layer, and add
a ts_layer field in ts_info that will describe the timestamp layer. This allow
to have more information than the vague clock ID. We set it in the driver.
With it, we could easily add new layers different than simple mac and phy.
I am currently working on this implementation.
Richard Cochran Sept. 4, 2023, 5:47 p.m. UTC | #37
On Mon, Sep 04, 2023 at 05:22:45PM +0200, Köry Maincent wrote:
> Hello I am resuming my work on selectable timestamping layers.
> 
> On Wed, 17 May 2023 13:07:06 -0700
> Jakub Kicinski <kuba@kernel.org> wrote:
> 
> > On Wed, 17 May 2023 21:46:43 +0200 Andrew Lunn wrote:
> > > As i said in an earlier thread, with a bit of a stretch, there could
> > > be 7 places to take time stamps in the system. We need some sort of
> > > identifier to indicate which of these stampers to use.
> > > 
> > > Is clock ID unique? In a switch, i think there could be multiple
> > > stampers, one per MAC port, sharing one clock? So you actually need
> > > more than a clock ID.  

Let's not mix things up.  A clock ID identifies a dynamic posix clock,
just like the name suggests.  A clock is a device that supports
operations like gettime and settime.

A given clock might or might not generate time stamps.

The time stamp API is completely separate.

> > Clock ID is a bit vague too, granted, but in practice clock ID should
> > correspond to the driver fairly well? My thinking was - use clock ID
> > to select the (silicon) device, use a different attribute to select 
> > the stamping point.

I've never heard of a device that has different time stamping points.
If one ever appeared, then it ought to present two interfaces.

> > IOW try to use the existing attribute before inventing a new one.
> 
> What do you think of using the clock ID to select the timestamp layer, and add
> a ts_layer field in ts_info that will describe the timestamp layer. This allow
> to have more information than the vague clock ID. We set it in the driver.
> With it, we could easily add new layers different than simple mac and phy.
> I am currently working on this implementation.

I think you should model the layers explicitly.

Thanks,
Richard
Jakub Kicinski Sept. 5, 2023, 6:47 p.m. UTC | #38
On Mon, 4 Sep 2023 10:47:00 -0700 Richard Cochran wrote:
> > > Clock ID is a bit vague too, granted, but in practice clock ID should
> > > correspond to the driver fairly well? My thinking was - use clock ID
> > > to select the (silicon) device, use a different attribute to select 
> > > the stamping point.  
> 
> I've never heard of a device that has different time stamping points.
> If one ever appeared, then it ought to present two interfaces.
> 
> > > IOW try to use the existing attribute before inventing a new one.  
> > 
> > What do you think of using the clock ID to select the timestamp layer, and add
> > a ts_layer field in ts_info that will describe the timestamp layer. This allow
> > to have more information than the vague clock ID. We set it in the driver.
> > With it, we could easily add new layers different than simple mac and phy.
> > I am currently working on this implementation.  
> 
> I think you should model the layers explicitly.

Maybe we should try to enumerate the use cases, I don't remember now
but I think the concern was that there may be multiple PHYs?

(Using [] to denote a single device)

#0    integrated NIC: [DMA MAC PHY]
#1       "Linux" NIC: [DMA MAC][PHY]
#2   DSA switch trap: [DMA MAC][MAC PHY]
#3 DSA switch switch: [PHY MAC  MAC PHY]
#4   DSA distributed: [PHY MAC][MAC PHY]

All these should be fine with netdev + layer, IIUC.
Andrew Lunn Sept. 5, 2023, 8:29 p.m. UTC | #39
> Maybe we should try to enumerate the use cases, I don't remember now
> but I think the concern was that there may be multiple PHYs?

You often see a Marvell 10G PHY between a MAC and an SFP cage. You can
then get a copper SFP module which has a PHY in it.

So:

"Linux" NIC: [DMA MAC][PHY][PHY] 

And just to make it more interesting, you sometimes see:

[MAC] - MII MUX -+---[PHY][PHY]
                 |
                 +---[PHY]

This is currently not supported, but there is work in progress to
address this, by giving each PHY and ID, and extending the netlink
ethtool so you can enumerate PHYs and individually configure them.

And i pointed out maybe the worst case scenario:

[MAC][PHY][PHY][MAC]switch core[MAC][PHY][PHY]

	Andrew
Kory Maincent Sept. 6, 2023, 9:22 a.m. UTC | #40
On Tue, 5 Sep 2023 22:29:51 +0200
Andrew Lunn <andrew@lunn.ch> wrote:

> > Maybe we should try to enumerate the use cases, I don't remember now
> > but I think the concern was that there may be multiple PHYs?  
> 
> You often see a Marvell 10G PHY between a MAC and an SFP cage. You can
> then get a copper SFP module which has a PHY in it.
> 
> So:
> 
> "Linux" NIC: [DMA MAC][PHY][PHY] 
> 
> And just to make it more interesting, you sometimes see:
> 
> [MAC] - MII MUX -+---[PHY][PHY]
>                  |
>                  +---[PHY]
> 
> This is currently not supported, but there is work in progress to
> address this, by giving each PHY and ID, and extending the netlink
> ethtool so you can enumerate PHYs and individually configure them.

Yes, I have talked to maxime about his PHY ID support patch series.

> And i pointed out maybe the worst case scenario:
> 
> [MAC][PHY][PHY][MAC]switch core[MAC][PHY][PHY]

always more complex! :)
Russell King (Oracle) Sept. 7, 2023, 9:29 a.m. UTC | #41
On Tue, Sep 05, 2023 at 10:29:51PM +0200, Andrew Lunn wrote:
> > Maybe we should try to enumerate the use cases, I don't remember now
> > but I think the concern was that there may be multiple PHYs?
> 
> You often see a Marvell 10G PHY between a MAC and an SFP cage. You can
> then get a copper SFP module which has a PHY in it.
> 
> So:
> 
> "Linux" NIC: [DMA MAC][PHY][PHY] 

Let's be clear - one of the reasons that this whole topic was triggered
was because of mvpp2 plus Marvell 1G PHYs. mvpp2 has a good PTP
implementation, where as Marvell 1G PHYs are not quite as good. With
the code as it stood, merely adding PTP support to Marvell PHYs would
result in setups that use a Marvell 1G PHY with mvpp2 to break - some
PTP API calls would end up going to one PTP implementation while other
PTP API calls end up going to the other.

Once this gets solved properly, then I can think about sending the
patches that add support for PTP in Marvell 1G PHYs, and then we will
have the situation where we have a MAC and a PHY on the *same* network
interface that are PTP capable.

People have been asking me about the Marvell PHY PTP patches - and I
have had to tell them that they can't be merged because of the PTP
API crazyness.

So... it would be entirely possible, if PTP were to be implemented for
the Marvell 10G PHYs, that there would be _three_ points with a SFP
module to do PTP, although it probably does not make much sense to
attempt to do so on the SFP PHY. In any case, before we get to that
point, we first need to work out how to support multiple ethernet PHYs
on one MAC.

Even with that solved, the situation you describe above is likely to be
problematical. PHYs that connect to SFPs generally only support fibre
interface modes and not SGMII on that port which limits the usefulness
of copper SFPs - they won't be able to do 10M / 100M unless they're one
of those PHYs that does rate adaption which seem fairly rare at the
moment.
diff mbox series

Patch

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index cd0973d4ba01..539425fdaf7c 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -210,6 +210,8 @@  Userspace to kernel:
   ``ETHTOOL_MSG_EEE_GET``               get EEE settings
   ``ETHTOOL_MSG_EEE_SET``               set EEE settings
   ``ETHTOOL_MSG_TSINFO_GET``		get timestamping info
+  ``ETHTOOL_MSG_TS_GET``                get current hardware timestamping
+  ``ETHTOOL_MSG_TSLIST_GET``            list available hardware timestamping
   ``ETHTOOL_MSG_CABLE_TEST_ACT``        action start cable test
   ``ETHTOOL_MSG_CABLE_TEST_TDR_ACT``    action start raw TDR cable test
   ``ETHTOOL_MSG_TUNNEL_INFO_GET``       get tunnel offload info
@@ -1990,6 +1992,42 @@  The attributes are propagated to the driver through the following structure:
 .. kernel-doc:: include/linux/ethtool.h
     :identifiers: ethtool_mm_cfg
 
+TS_GET
+======
+
+Gets transceiver module parameters.
+
+Request contents:
+
+  =================================  ======  ==========================
+  ``ETHTOOL_A_TS_HEADER``            nested  request header
+  =================================  ======  ==========================
+
+Kernel response contents:
+
+  =======================  ======  ====================================
+  ``ETHTOOL_A_TS_HEADER``  nested  reply header
+  ``ETHTOOL_A_TS_LAYER``   u32     current hardware timestamping
+  =======================  ======  ====================================
+
+TSLIST_GET
+==========
+
+Gets transceiver module parameters.
+
+Request contents:
+
+  =================================  ======  ==========================
+  ``ETHTOOL_A_TS_HEADER``            nested  request header
+  =================================  ======  ==========================
+
+Kernel response contents:
+
+  =======================  ======  ===================================
+  ``ETHTOOL_A_TS_HEADER``  nested  reply header
+  ``ETHTOOL_A_TS_LAYER``   u32     available hardware timestamping
+  =======================  ======  ===================================
+
 Request translation
 ===================
 
@@ -2096,4 +2134,6 @@  are netlink only.
   n/a                                 ``ETHTOOL_MSG_PLCA_GET_STATUS``
   n/a                                 ``ETHTOOL_MSG_MM_GET``
   n/a                                 ``ETHTOOL_MSG_MM_SET``
+  n/a                                 ``ETHTOOL_MSG_TS_GET``
+  n/a                                 ``ETHTOOL_MSG_TSLIST_GET``
   =================================== =====================================
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 1ebf8d455f07..447908393b91 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -39,6 +39,8 @@  enum {
 	ETHTOOL_MSG_EEE_GET,
 	ETHTOOL_MSG_EEE_SET,
 	ETHTOOL_MSG_TSINFO_GET,
+	ETHTOOL_MSG_TSLIST_GET,
+	ETHTOOL_MSG_TS_GET,
 	ETHTOOL_MSG_CABLE_TEST_ACT,
 	ETHTOOL_MSG_CABLE_TEST_TDR_ACT,
 	ETHTOOL_MSG_TUNNEL_INFO_GET,
@@ -92,6 +94,8 @@  enum {
 	ETHTOOL_MSG_EEE_GET_REPLY,
 	ETHTOOL_MSG_EEE_NTF,
 	ETHTOOL_MSG_TSINFO_GET_REPLY,
+	ETHTOOL_MSG_TSLIST_GET_REPLY,
+	ETHTOOL_MSG_TS_GET_REPLY,
 	ETHTOOL_MSG_CABLE_TEST_NTF,
 	ETHTOOL_MSG_CABLE_TEST_TDR_NTF,
 	ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY,
@@ -484,6 +488,17 @@  enum {
 	ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1)
 };
 
+/* TS LAYER */
+
+enum {
+	ETHTOOL_A_TS_UNSPEC,
+	ETHTOOL_A_TS_HEADER,			/* nest - _A_HEADER_* */
+	ETHTOOL_A_TS_LAYER,			/* u32 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_TS_CNT,
+	ETHTOOL_A_TS_MAX = (__ETHTOOL_A_TS_CNT - 1)
+};
 /* PHC VCLOCKS */
 
 enum {
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index a2c66b3d7f0f..d7c1798d45fe 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -13,6 +13,14 @@ 
 #include <linux/types.h>
 #include <linux/socket.h>   /* for SO_TIMESTAMPING */
 
+/* Hardware layer of the SO_TIMESTAMPING provider */
+enum timestamping_layer {
+	SOF_MAC_TIMESTAMPING = (1<<0),
+	SOF_PHY_TIMESTAMPING = (1<<1),
+
+	SOF_LAYER_TIMESTAMPING_LAST = SOF_PHY_TIMESTAMPING,
+};
+
 /* SO_TIMESTAMPING flags */
 enum {
 	SOF_TIMESTAMPING_TX_HARDWARE = (1<<0),
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 504f954a1b28..4ea64c080639 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -8,4 +8,4 @@  ethtool_nl-y	:= netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \
 		   linkstate.o debug.o wol.o features.o privflags.o rings.o \
 		   channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
 		   tunnels.o fec.o eeprom.o stats.o phc_vclocks.o mm.o \
-		   module.o pse-pd.o plca.o mm.o
+		   module.o pse-pd.o plca.o mm.o ts.o
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 08120095cc68..8d9e27b13e28 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -293,6 +293,8 @@  ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
 	[ETHTOOL_MSG_FEC_GET]		= &ethnl_fec_request_ops,
 	[ETHTOOL_MSG_FEC_SET]		= &ethnl_fec_request_ops,
 	[ETHTOOL_MSG_TSINFO_GET]	= &ethnl_tsinfo_request_ops,
+	[ETHTOOL_MSG_TS_GET]		= &ethnl_ts_request_ops,
+	[ETHTOOL_MSG_TSLIST_GET]	= &ethnl_tslist_request_ops,
 	[ETHTOOL_MSG_MODULE_EEPROM_GET]	= &ethnl_module_eeprom_request_ops,
 	[ETHTOOL_MSG_STATS_GET]		= &ethnl_stats_request_ops,
 	[ETHTOOL_MSG_PHC_VCLOCKS_GET]	= &ethnl_phc_vclocks_request_ops,
@@ -1011,6 +1013,24 @@  static const struct genl_ops ethtool_genl_ops[] = {
 		.policy = ethnl_tsinfo_get_policy,
 		.maxattr = ARRAY_SIZE(ethnl_tsinfo_get_policy) - 1,
 	},
+	{
+		.cmd	= ETHTOOL_MSG_TSLIST_GET,
+		.doit	= ethnl_default_doit,
+		.start	= ethnl_default_start,
+		.dumpit	= ethnl_default_dumpit,
+		.done	= ethnl_default_done,
+		.policy = ethnl_ts_get_policy,
+		.maxattr = ARRAY_SIZE(ethnl_ts_get_policy) - 1,
+	},
+	{
+		.cmd	= ETHTOOL_MSG_TS_GET,
+		.doit	= ethnl_default_doit,
+		.start	= ethnl_default_start,
+		.dumpit	= ethnl_default_dumpit,
+		.done	= ethnl_default_done,
+		.policy = ethnl_ts_get_policy,
+		.maxattr = ARRAY_SIZE(ethnl_ts_get_policy) - 1,
+	},
 	{
 		.cmd	= ETHTOOL_MSG_CABLE_TEST_ACT,
 		.flags	= GENL_UNS_ADMIN_PERM,
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 79424b34b553..49c700777a32 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -385,6 +385,8 @@  extern const struct ethnl_request_ops ethnl_coalesce_request_ops;
 extern const struct ethnl_request_ops ethnl_pause_request_ops;
 extern const struct ethnl_request_ops ethnl_eee_request_ops;
 extern const struct ethnl_request_ops ethnl_tsinfo_request_ops;
+extern const struct ethnl_request_ops ethnl_ts_request_ops;
+extern const struct ethnl_request_ops ethnl_tslist_request_ops;
 extern const struct ethnl_request_ops ethnl_fec_request_ops;
 extern const struct ethnl_request_ops ethnl_module_eeprom_request_ops;
 extern const struct ethnl_request_ops ethnl_stats_request_ops;
@@ -423,6 +425,7 @@  extern const struct nla_policy ethnl_pause_set_policy[ETHTOOL_A_PAUSE_TX + 1];
 extern const struct nla_policy ethnl_eee_get_policy[ETHTOOL_A_EEE_HEADER + 1];
 extern const struct nla_policy ethnl_eee_set_policy[ETHTOOL_A_EEE_TX_LPI_TIMER + 1];
 extern const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_HEADER + 1];
+extern const struct nla_policy ethnl_ts_get_policy[ETHTOOL_A_TS_HEADER + 1];
 extern const struct nla_policy ethnl_cable_test_act_policy[ETHTOOL_A_CABLE_TEST_HEADER + 1];
 extern const struct nla_policy ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG + 1];
 extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1];
diff --git a/net/ethtool/ts.c b/net/ethtool/ts.c
new file mode 100644
index 000000000000..a71c47ff0c6b
--- /dev/null
+++ b/net/ethtool/ts.c
@@ -0,0 +1,114 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/net_tstamp.h>
+#include <linux/phy.h>
+
+#include "netlink.h"
+#include "common.h"
+#include "bitset.h"
+
+struct ts_req_info {
+	struct ethnl_req_info		base;
+};
+
+struct ts_reply_data {
+	struct ethnl_reply_data		base;
+	u32				ts;
+};
+
+#define TS_REPDATA(__reply_base) \
+	container_of(__reply_base, struct ts_reply_data, base)
+
+/* TS_GET */
+const struct nla_policy ethnl_ts_get_policy[] = {
+	[ETHTOOL_A_TS_HEADER]		=
+		NLA_POLICY_NESTED(ethnl_header_policy),
+};
+
+static int ts_prepare_data(const struct ethnl_req_info *req_base,
+			       struct ethnl_reply_data *reply_base,
+			       struct genl_info *info)
+{
+	struct ts_reply_data *data = TS_REPDATA(reply_base);
+	struct net_device *dev = reply_base->dev;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	int ret;
+
+	ret = ethnl_ops_begin(dev);
+	if (ret < 0)
+		return ret;
+
+	if (phy_has_tsinfo(dev->phydev))
+		data->ts = SOF_PHY_TIMESTAMPING;
+	else if (ops->get_ts_info)
+		data->ts = SOF_MAC_TIMESTAMPING;
+	else
+		return -EOPNOTSUPP;
+
+	ethnl_ops_complete(dev);
+
+	return ret;
+}
+
+static int ts_reply_size(const struct ethnl_req_info *req_base,
+			     const struct ethnl_reply_data *reply_base)
+{
+	return nla_total_size(sizeof(u32));
+}
+
+static int ts_fill_reply(struct sk_buff *skb,
+			     const struct ethnl_req_info *req_base,
+			     const struct ethnl_reply_data *reply_base)
+{
+	struct ts_reply_data *data = TS_REPDATA(reply_base);
+	return nla_put_u32(skb, ETHTOOL_A_TS_LAYER, data->ts);
+}
+
+const struct ethnl_request_ops ethnl_ts_request_ops = {
+	.request_cmd		= ETHTOOL_MSG_TS_GET,
+	.reply_cmd		= ETHTOOL_MSG_TS_GET_REPLY,
+	.hdr_attr		= ETHTOOL_A_TS_HEADER,
+	.req_info_size		= sizeof(struct ts_req_info),
+	.reply_data_size	= sizeof(struct ts_reply_data),
+
+	.prepare_data		= ts_prepare_data,
+	.reply_size		= ts_reply_size,
+	.fill_reply		= ts_fill_reply,
+};
+
+/* TSLIST_GET */
+static int tslist_prepare_data(const struct ethnl_req_info *req_base,
+			       struct ethnl_reply_data *reply_base,
+			       struct genl_info *info)
+{
+	struct ts_reply_data *data = TS_REPDATA(reply_base);
+	struct net_device *dev = reply_base->dev;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	int ret;
+
+	ret = ethnl_ops_begin(dev);
+	if (ret < 0)
+		return ret;
+
+	data->ts = 0;
+	if (phy_has_tsinfo(dev->phydev))
+		data->ts = SOF_PHY_TIMESTAMPING;
+	if (ops->get_ts_info)
+		data->ts |= SOF_MAC_TIMESTAMPING;
+
+	ethnl_ops_complete(dev);
+
+	return ret;
+}
+
+const struct ethnl_request_ops ethnl_tslist_request_ops = {
+	.request_cmd		= ETHTOOL_MSG_TSLIST_GET,
+	.reply_cmd		= ETHTOOL_MSG_TSLIST_GET_REPLY,
+	.hdr_attr		= ETHTOOL_A_TS_HEADER,
+	.req_info_size		= sizeof(struct ts_req_info),
+	.reply_data_size	= sizeof(struct ts_reply_data),
+
+	.prepare_data		= tslist_prepare_data,
+	.reply_size		= ts_reply_size,
+	.fill_reply		= ts_fill_reply,
+};