diff mbox series

[bpf-next,v2,06/20] ice: Support HW timestamp hint

Message ID 20230703181226.19380-7-larysa.zaremba@intel.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series XDP metadata via kfuncs for ice | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain_full }}
bpf/vmtest-bpf-next-VM_Test-2 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 fail Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-7 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-8 success Logs for veristat
netdev/series_format fail Series longer than 15 patches (and no cover letter)
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 24 this patch: 9
netdev/cc_maintainers warning 8 maintainers not CCed: hawk@kernel.org jesse.brandeburg@intel.com intel-wired-lan@lists.osuosl.org davem@davemloft.net anthony.l.nguyen@intel.com pabeni@redhat.com richardcochran@gmail.com edumazet@google.com
netdev/build_clang fail Errors and warnings before: 35 this patch: 20
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 24 this patch: 9
netdev/checkpatch warning WARNING: line length of 86 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Larysa Zaremba July 3, 2023, 6:12 p.m. UTC
Use previously refactored code and create a function
that allows XDP code to read HW timestamp.

Also, move cached_phctime into packet context, this way this data still
stays in the ring structure, just at the different address.

HW timestamp is the first supported hint in the driver,
so also add xdp_metadata_ops.

Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h          |  2 ++
 drivers/net/ethernet/intel/ice/ice_ethtool.c  |  2 +-
 drivers/net/ethernet/intel/ice/ice_lib.c      |  2 +-
 drivers/net/ethernet/intel/ice/ice_main.c     |  1 +
 drivers/net/ethernet/intel/ice/ice_ptp.c      |  2 +-
 drivers/net/ethernet/intel/ice/ice_txrx.h     |  2 +-
 drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 24 +++++++++++++++++++
 7 files changed, 31 insertions(+), 4 deletions(-)

Comments

Stanislav Fomichev July 5, 2023, 5:30 p.m. UTC | #1
On 07/03, Larysa Zaremba wrote:
> Use previously refactored code and create a function
> that allows XDP code to read HW timestamp.
> 
> Also, move cached_phctime into packet context, this way this data still
> stays in the ring structure, just at the different address.
> 
> HW timestamp is the first supported hint in the driver,
> so also add xdp_metadata_ops.
> 
> Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> ---
>  drivers/net/ethernet/intel/ice/ice.h          |  2 ++
>  drivers/net/ethernet/intel/ice/ice_ethtool.c  |  2 +-
>  drivers/net/ethernet/intel/ice/ice_lib.c      |  2 +-
>  drivers/net/ethernet/intel/ice/ice_main.c     |  1 +
>  drivers/net/ethernet/intel/ice/ice_ptp.c      |  2 +-
>  drivers/net/ethernet/intel/ice/ice_txrx.h     |  2 +-
>  drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 24 +++++++++++++++++++
>  7 files changed, 31 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
> index 4ba3d99439a0..7a973a2229f1 100644
> --- a/drivers/net/ethernet/intel/ice/ice.h
> +++ b/drivers/net/ethernet/intel/ice/ice.h
> @@ -943,4 +943,6 @@ static inline void ice_clear_rdma_cap(struct ice_pf *pf)
>  	set_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags);
>  	clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
>  }
> +
> +extern const struct xdp_metadata_ops ice_xdp_md_ops;
>  #endif /* _ICE_H_ */
> diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> index 8d5cbbd0b3d5..3c3b9cbfbcd3 100644
> --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
> +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> @@ -2837,7 +2837,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
>  		/* clone ring and setup updated count */
>  		rx_rings[i] = *vsi->rx_rings[i];
>  		rx_rings[i].count = new_rx_cnt;
> -		rx_rings[i].cached_phctime = pf->ptp.cached_phc_time;
> +		rx_rings[i].pkt_ctx.cached_phctime = pf->ptp.cached_phc_time;
>  		rx_rings[i].desc = NULL;
>  		rx_rings[i].rx_buf = NULL;
>  		/* this is to allow wr32 to have something to write to
> diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
> index 00e3afd507a4..eb69b0ac7956 100644
> --- a/drivers/net/ethernet/intel/ice/ice_lib.c
> +++ b/drivers/net/ethernet/intel/ice/ice_lib.c
> @@ -1445,7 +1445,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
>  		ring->netdev = vsi->netdev;
>  		ring->dev = dev;
>  		ring->count = vsi->num_rx_desc;
> -		ring->cached_phctime = pf->ptp.cached_phc_time;
> +		ring->pkt_ctx.cached_phctime = pf->ptp.cached_phc_time;
>  		WRITE_ONCE(vsi->rx_rings[i], ring);
>  	}
>  
> diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
> index 93979ab18bc1..f21996b812ea 100644
> --- a/drivers/net/ethernet/intel/ice/ice_main.c
> +++ b/drivers/net/ethernet/intel/ice/ice_main.c
> @@ -3384,6 +3384,7 @@ static void ice_set_ops(struct ice_vsi *vsi)
>  
>  	netdev->netdev_ops = &ice_netdev_ops;
>  	netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic;
> +	netdev->xdp_metadata_ops = &ice_xdp_md_ops;
>  	ice_set_ethtool_ops(netdev);
>  
>  	if (vsi->type != ICE_VSI_PF)
> diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
> index a31333972c68..70697e4829dd 100644
> --- a/drivers/net/ethernet/intel/ice/ice_ptp.c
> +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
> @@ -1038,7 +1038,7 @@ static int ice_ptp_update_cached_phctime(struct ice_pf *pf)
>  		ice_for_each_rxq(vsi, j) {
>  			if (!vsi->rx_rings[j])
>  				continue;
> -			WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime);
> +			WRITE_ONCE(vsi->rx_rings[j]->pkt_ctx.cached_phctime, systime);
>  		}
>  	}
>  	clear_bit(ICE_CFG_BUSY, pf->state);
> diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
> index d0ab2c4c0c91..4237702a58a9 100644
> --- a/drivers/net/ethernet/intel/ice/ice_txrx.h
> +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
> @@ -259,6 +259,7 @@ enum ice_rx_dtype {
>  
>  struct ice_pkt_ctx {
>  	const union ice_32b_rx_flex_desc *eop_desc;
> +	u64 cached_phctime;
>  };
>  
>  struct ice_xdp_buff {
> @@ -354,7 +355,6 @@ struct ice_rx_ring {
>  	struct ice_tx_ring *xdp_ring;
>  	struct xsk_buff_pool *xsk_pool;
>  	dma_addr_t dma;			/* physical address of ring */
> -	u64 cached_phctime;
>  	u16 rx_buf_len;
>  	u8 dcb_tc;			/* Traffic class of ring */
>  	u8 ptp_rx;
> diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
> index beb1c5bb392a..463d9e5cbe05 100644
> --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
> +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
> @@ -546,3 +546,27 @@ void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res,
>  			spin_unlock(&xdp_ring->tx_lock);
>  	}
>  }
> +
> +/**
> + * ice_xdp_rx_hw_ts - HW timestamp XDP hint handler
> + * @ctx: XDP buff pointer
> + * @ts_ns: destination address
> + *
> + * Copy HW timestamp (if available) to the destination address.
> + */
> +static int ice_xdp_rx_hw_ts(const struct xdp_md *ctx, u64 *ts_ns)
> +{
> +	const struct ice_xdp_buff *xdp_ext = (void *)ctx;
> +	u64 cached_time;
> +
> +	cached_time = READ_ONCE(xdp_ext->pkt_ctx.cached_phctime);

I believe we have to have something like the following here:

if (!ts_ns)
	return -EINVAL;

IOW, I don't think verifier guarantees that those pointer args are
non-NULL. Same for the other ice kfunc you're adding and veth changes.

Can you also fix it for the existing veth kfuncs? (or lmk if you prefer me
to fix it).
Larysa Zaremba July 6, 2023, 2:22 p.m. UTC | #2
On Wed, Jul 05, 2023 at 10:30:56AM -0700, Stanislav Fomichev wrote:
> On 07/03, Larysa Zaremba wrote:
> > Use previously refactored code and create a function
> > that allows XDP code to read HW timestamp.
> > 
> > Also, move cached_phctime into packet context, this way this data still
> > stays in the ring structure, just at the different address.
> > 
> > HW timestamp is the first supported hint in the driver,
> > so also add xdp_metadata_ops.
> > 
> > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > ---
> >  drivers/net/ethernet/intel/ice/ice.h          |  2 ++
> >  drivers/net/ethernet/intel/ice/ice_ethtool.c  |  2 +-
> >  drivers/net/ethernet/intel/ice/ice_lib.c      |  2 +-
> >  drivers/net/ethernet/intel/ice/ice_main.c     |  1 +
> >  drivers/net/ethernet/intel/ice/ice_ptp.c      |  2 +-
> >  drivers/net/ethernet/intel/ice/ice_txrx.h     |  2 +-
> >  drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 24 +++++++++++++++++++
> >  7 files changed, 31 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
> > index 4ba3d99439a0..7a973a2229f1 100644
> > --- a/drivers/net/ethernet/intel/ice/ice.h
> > +++ b/drivers/net/ethernet/intel/ice/ice.h
> > @@ -943,4 +943,6 @@ static inline void ice_clear_rdma_cap(struct ice_pf *pf)
> >  	set_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags);
> >  	clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
> >  }
> > +
> > +extern const struct xdp_metadata_ops ice_xdp_md_ops;
> >  #endif /* _ICE_H_ */
> > diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> > index 8d5cbbd0b3d5..3c3b9cbfbcd3 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
> > +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> > @@ -2837,7 +2837,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
> >  		/* clone ring and setup updated count */
> >  		rx_rings[i] = *vsi->rx_rings[i];
> >  		rx_rings[i].count = new_rx_cnt;
> > -		rx_rings[i].cached_phctime = pf->ptp.cached_phc_time;
> > +		rx_rings[i].pkt_ctx.cached_phctime = pf->ptp.cached_phc_time;
> >  		rx_rings[i].desc = NULL;
> >  		rx_rings[i].rx_buf = NULL;
> >  		/* this is to allow wr32 to have something to write to
> > diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
> > index 00e3afd507a4..eb69b0ac7956 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_lib.c
> > +++ b/drivers/net/ethernet/intel/ice/ice_lib.c
> > @@ -1445,7 +1445,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
> >  		ring->netdev = vsi->netdev;
> >  		ring->dev = dev;
> >  		ring->count = vsi->num_rx_desc;
> > -		ring->cached_phctime = pf->ptp.cached_phc_time;
> > +		ring->pkt_ctx.cached_phctime = pf->ptp.cached_phc_time;
> >  		WRITE_ONCE(vsi->rx_rings[i], ring);
> >  	}
> >  
> > diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
> > index 93979ab18bc1..f21996b812ea 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_main.c
> > +++ b/drivers/net/ethernet/intel/ice/ice_main.c
> > @@ -3384,6 +3384,7 @@ static void ice_set_ops(struct ice_vsi *vsi)
> >  
> >  	netdev->netdev_ops = &ice_netdev_ops;
> >  	netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic;
> > +	netdev->xdp_metadata_ops = &ice_xdp_md_ops;
> >  	ice_set_ethtool_ops(netdev);
> >  
> >  	if (vsi->type != ICE_VSI_PF)
> > diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
> > index a31333972c68..70697e4829dd 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_ptp.c
> > +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
> > @@ -1038,7 +1038,7 @@ static int ice_ptp_update_cached_phctime(struct ice_pf *pf)
> >  		ice_for_each_rxq(vsi, j) {
> >  			if (!vsi->rx_rings[j])
> >  				continue;
> > -			WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime);
> > +			WRITE_ONCE(vsi->rx_rings[j]->pkt_ctx.cached_phctime, systime);
> >  		}
> >  	}
> >  	clear_bit(ICE_CFG_BUSY, pf->state);
> > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
> > index d0ab2c4c0c91..4237702a58a9 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_txrx.h
> > +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
> > @@ -259,6 +259,7 @@ enum ice_rx_dtype {
> >  
> >  struct ice_pkt_ctx {
> >  	const union ice_32b_rx_flex_desc *eop_desc;
> > +	u64 cached_phctime;
> >  };
> >  
> >  struct ice_xdp_buff {
> > @@ -354,7 +355,6 @@ struct ice_rx_ring {
> >  	struct ice_tx_ring *xdp_ring;
> >  	struct xsk_buff_pool *xsk_pool;
> >  	dma_addr_t dma;			/* physical address of ring */
> > -	u64 cached_phctime;
> >  	u16 rx_buf_len;
> >  	u8 dcb_tc;			/* Traffic class of ring */
> >  	u8 ptp_rx;
> > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
> > index beb1c5bb392a..463d9e5cbe05 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
> > +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
> > @@ -546,3 +546,27 @@ void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res,
> >  			spin_unlock(&xdp_ring->tx_lock);
> >  	}
> >  }
> > +
> > +/**
> > + * ice_xdp_rx_hw_ts - HW timestamp XDP hint handler
> > + * @ctx: XDP buff pointer
> > + * @ts_ns: destination address
> > + *
> > + * Copy HW timestamp (if available) to the destination address.
> > + */
> > +static int ice_xdp_rx_hw_ts(const struct xdp_md *ctx, u64 *ts_ns)
> > +{
> > +	const struct ice_xdp_buff *xdp_ext = (void *)ctx;
> > +	u64 cached_time;
> > +
> > +	cached_time = READ_ONCE(xdp_ext->pkt_ctx.cached_phctime);
> 
> I believe we have to have something like the following here:
> 
> if (!ts_ns)
> 	return -EINVAL;
> 
> IOW, I don't think verifier guarantees that those pointer args are
> non-NULL.

Oh, that's a shame.

> Same for the other ice kfunc you're adding and veth changes.
> 
> Can you also fix it for the existing veth kfuncs? (or lmk if you prefer me
> to fix it).

I think I can send fixes for RX hash and timestamp in veth separately, before 
v3 of this patchset, code probably doesn't intersect.

But argument checks in kfuncs are a little bit a gray area for me, whether they 
should be sent to stable tree or not?
Stanislav Fomichev July 6, 2023, 4:39 p.m. UTC | #3
On Thu, Jul 6, 2023 at 7:27 AM Larysa Zaremba <larysa.zaremba@intel.com> wrote:
>
> On Wed, Jul 05, 2023 at 10:30:56AM -0700, Stanislav Fomichev wrote:
> > On 07/03, Larysa Zaremba wrote:
> > > Use previously refactored code and create a function
> > > that allows XDP code to read HW timestamp.
> > >
> > > Also, move cached_phctime into packet context, this way this data still
> > > stays in the ring structure, just at the different address.
> > >
> > > HW timestamp is the first supported hint in the driver,
> > > so also add xdp_metadata_ops.
> > >
> > > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > > ---
> > >  drivers/net/ethernet/intel/ice/ice.h          |  2 ++
> > >  drivers/net/ethernet/intel/ice/ice_ethtool.c  |  2 +-
> > >  drivers/net/ethernet/intel/ice/ice_lib.c      |  2 +-
> > >  drivers/net/ethernet/intel/ice/ice_main.c     |  1 +
> > >  drivers/net/ethernet/intel/ice/ice_ptp.c      |  2 +-
> > >  drivers/net/ethernet/intel/ice/ice_txrx.h     |  2 +-
> > >  drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 24 +++++++++++++++++++
> > >  7 files changed, 31 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
> > > index 4ba3d99439a0..7a973a2229f1 100644
> > > --- a/drivers/net/ethernet/intel/ice/ice.h
> > > +++ b/drivers/net/ethernet/intel/ice/ice.h
> > > @@ -943,4 +943,6 @@ static inline void ice_clear_rdma_cap(struct ice_pf *pf)
> > >     set_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags);
> > >     clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
> > >  }
> > > +
> > > +extern const struct xdp_metadata_ops ice_xdp_md_ops;
> > >  #endif /* _ICE_H_ */
> > > diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> > > index 8d5cbbd0b3d5..3c3b9cbfbcd3 100644
> > > --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
> > > +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> > > @@ -2837,7 +2837,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
> > >             /* clone ring and setup updated count */
> > >             rx_rings[i] = *vsi->rx_rings[i];
> > >             rx_rings[i].count = new_rx_cnt;
> > > -           rx_rings[i].cached_phctime = pf->ptp.cached_phc_time;
> > > +           rx_rings[i].pkt_ctx.cached_phctime = pf->ptp.cached_phc_time;
> > >             rx_rings[i].desc = NULL;
> > >             rx_rings[i].rx_buf = NULL;
> > >             /* this is to allow wr32 to have something to write to
> > > diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
> > > index 00e3afd507a4..eb69b0ac7956 100644
> > > --- a/drivers/net/ethernet/intel/ice/ice_lib.c
> > > +++ b/drivers/net/ethernet/intel/ice/ice_lib.c
> > > @@ -1445,7 +1445,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
> > >             ring->netdev = vsi->netdev;
> > >             ring->dev = dev;
> > >             ring->count = vsi->num_rx_desc;
> > > -           ring->cached_phctime = pf->ptp.cached_phc_time;
> > > +           ring->pkt_ctx.cached_phctime = pf->ptp.cached_phc_time;
> > >             WRITE_ONCE(vsi->rx_rings[i], ring);
> > >     }
> > >
> > > diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
> > > index 93979ab18bc1..f21996b812ea 100644
> > > --- a/drivers/net/ethernet/intel/ice/ice_main.c
> > > +++ b/drivers/net/ethernet/intel/ice/ice_main.c
> > > @@ -3384,6 +3384,7 @@ static void ice_set_ops(struct ice_vsi *vsi)
> > >
> > >     netdev->netdev_ops = &ice_netdev_ops;
> > >     netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic;
> > > +   netdev->xdp_metadata_ops = &ice_xdp_md_ops;
> > >     ice_set_ethtool_ops(netdev);
> > >
> > >     if (vsi->type != ICE_VSI_PF)
> > > diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
> > > index a31333972c68..70697e4829dd 100644
> > > --- a/drivers/net/ethernet/intel/ice/ice_ptp.c
> > > +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
> > > @@ -1038,7 +1038,7 @@ static int ice_ptp_update_cached_phctime(struct ice_pf *pf)
> > >             ice_for_each_rxq(vsi, j) {
> > >                     if (!vsi->rx_rings[j])
> > >                             continue;
> > > -                   WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime);
> > > +                   WRITE_ONCE(vsi->rx_rings[j]->pkt_ctx.cached_phctime, systime);
> > >             }
> > >     }
> > >     clear_bit(ICE_CFG_BUSY, pf->state);
> > > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
> > > index d0ab2c4c0c91..4237702a58a9 100644
> > > --- a/drivers/net/ethernet/intel/ice/ice_txrx.h
> > > +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
> > > @@ -259,6 +259,7 @@ enum ice_rx_dtype {
> > >
> > >  struct ice_pkt_ctx {
> > >     const union ice_32b_rx_flex_desc *eop_desc;
> > > +   u64 cached_phctime;
> > >  };
> > >
> > >  struct ice_xdp_buff {
> > > @@ -354,7 +355,6 @@ struct ice_rx_ring {
> > >     struct ice_tx_ring *xdp_ring;
> > >     struct xsk_buff_pool *xsk_pool;
> > >     dma_addr_t dma;                 /* physical address of ring */
> > > -   u64 cached_phctime;
> > >     u16 rx_buf_len;
> > >     u8 dcb_tc;                      /* Traffic class of ring */
> > >     u8 ptp_rx;
> > > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
> > > index beb1c5bb392a..463d9e5cbe05 100644
> > > --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
> > > +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
> > > @@ -546,3 +546,27 @@ void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res,
> > >                     spin_unlock(&xdp_ring->tx_lock);
> > >     }
> > >  }
> > > +
> > > +/**
> > > + * ice_xdp_rx_hw_ts - HW timestamp XDP hint handler
> > > + * @ctx: XDP buff pointer
> > > + * @ts_ns: destination address
> > > + *
> > > + * Copy HW timestamp (if available) to the destination address.
> > > + */
> > > +static int ice_xdp_rx_hw_ts(const struct xdp_md *ctx, u64 *ts_ns)
> > > +{
> > > +   const struct ice_xdp_buff *xdp_ext = (void *)ctx;
> > > +   u64 cached_time;
> > > +
> > > +   cached_time = READ_ONCE(xdp_ext->pkt_ctx.cached_phctime);
> >
> > I believe we have to have something like the following here:
> >
> > if (!ts_ns)
> >       return -EINVAL;
> >
> > IOW, I don't think verifier guarantees that those pointer args are
> > non-NULL.
>
> Oh, that's a shame.
>
> > Same for the other ice kfunc you're adding and veth changes.
> >
> > Can you also fix it for the existing veth kfuncs? (or lmk if you prefer me
> > to fix it).
>
> I think I can send fixes for RX hash and timestamp in veth separately, before
> v3 of this patchset, code probably doesn't intersect.
>
> But argument checks in kfuncs are a little bit a gray area for me, whether they
> should be sent to stable tree or not?

Add a Fixes tag and they will get into the stable trees automatically I believe?
Larysa Zaremba July 10, 2023, 3:49 p.m. UTC | #4
On Thu, Jul 06, 2023 at 09:39:29AM -0700, Stanislav Fomichev wrote:
> On Thu, Jul 6, 2023 at 7:27 AM Larysa Zaremba <larysa.zaremba@intel.com> wrote:
> >
> > On Wed, Jul 05, 2023 at 10:30:56AM -0700, Stanislav Fomichev wrote:
> > > On 07/03, Larysa Zaremba wrote:
> > > > Use previously refactored code and create a function
> > > > that allows XDP code to read HW timestamp.
> > > >
> > > > Also, move cached_phctime into packet context, this way this data still
> > > > stays in the ring structure, just at the different address.
> > > >
> > > > HW timestamp is the first supported hint in the driver,
> > > > so also add xdp_metadata_ops.
> > > >
> > > > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > > > ---
> > > >  drivers/net/ethernet/intel/ice/ice.h          |  2 ++
> > > >  drivers/net/ethernet/intel/ice/ice_ethtool.c  |  2 +-
> > > >  drivers/net/ethernet/intel/ice/ice_lib.c      |  2 +-
> > > >  drivers/net/ethernet/intel/ice/ice_main.c     |  1 +
> > > >  drivers/net/ethernet/intel/ice/ice_ptp.c      |  2 +-
> > > >  drivers/net/ethernet/intel/ice/ice_txrx.h     |  2 +-
> > > >  drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 24 +++++++++++++++++++
> > > >  7 files changed, 31 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
> > > > index 4ba3d99439a0..7a973a2229f1 100644
> > > > --- a/drivers/net/ethernet/intel/ice/ice.h
> > > > +++ b/drivers/net/ethernet/intel/ice/ice.h
> > > > @@ -943,4 +943,6 @@ static inline void ice_clear_rdma_cap(struct ice_pf *pf)
> > > >     set_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags);
> > > >     clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
> > > >  }
> > > > +
> > > > +extern const struct xdp_metadata_ops ice_xdp_md_ops;
> > > >  #endif /* _ICE_H_ */
> > > > diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> > > > index 8d5cbbd0b3d5..3c3b9cbfbcd3 100644
> > > > --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
> > > > +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> > > > @@ -2837,7 +2837,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
> > > >             /* clone ring and setup updated count */
> > > >             rx_rings[i] = *vsi->rx_rings[i];
> > > >             rx_rings[i].count = new_rx_cnt;
> > > > -           rx_rings[i].cached_phctime = pf->ptp.cached_phc_time;
> > > > +           rx_rings[i].pkt_ctx.cached_phctime = pf->ptp.cached_phc_time;
> > > >             rx_rings[i].desc = NULL;
> > > >             rx_rings[i].rx_buf = NULL;
> > > >             /* this is to allow wr32 to have something to write to
> > > > diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
> > > > index 00e3afd507a4..eb69b0ac7956 100644
> > > > --- a/drivers/net/ethernet/intel/ice/ice_lib.c
> > > > +++ b/drivers/net/ethernet/intel/ice/ice_lib.c
> > > > @@ -1445,7 +1445,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
> > > >             ring->netdev = vsi->netdev;
> > > >             ring->dev = dev;
> > > >             ring->count = vsi->num_rx_desc;
> > > > -           ring->cached_phctime = pf->ptp.cached_phc_time;
> > > > +           ring->pkt_ctx.cached_phctime = pf->ptp.cached_phc_time;
> > > >             WRITE_ONCE(vsi->rx_rings[i], ring);
> > > >     }
> > > >
> > > > diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
> > > > index 93979ab18bc1..f21996b812ea 100644
> > > > --- a/drivers/net/ethernet/intel/ice/ice_main.c
> > > > +++ b/drivers/net/ethernet/intel/ice/ice_main.c
> > > > @@ -3384,6 +3384,7 @@ static void ice_set_ops(struct ice_vsi *vsi)
> > > >
> > > >     netdev->netdev_ops = &ice_netdev_ops;
> > > >     netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic;
> > > > +   netdev->xdp_metadata_ops = &ice_xdp_md_ops;
> > > >     ice_set_ethtool_ops(netdev);
> > > >
> > > >     if (vsi->type != ICE_VSI_PF)
> > > > diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
> > > > index a31333972c68..70697e4829dd 100644
> > > > --- a/drivers/net/ethernet/intel/ice/ice_ptp.c
> > > > +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
> > > > @@ -1038,7 +1038,7 @@ static int ice_ptp_update_cached_phctime(struct ice_pf *pf)
> > > >             ice_for_each_rxq(vsi, j) {
> > > >                     if (!vsi->rx_rings[j])
> > > >                             continue;
> > > > -                   WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime);
> > > > +                   WRITE_ONCE(vsi->rx_rings[j]->pkt_ctx.cached_phctime, systime);
> > > >             }
> > > >     }
> > > >     clear_bit(ICE_CFG_BUSY, pf->state);
> > > > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
> > > > index d0ab2c4c0c91..4237702a58a9 100644
> > > > --- a/drivers/net/ethernet/intel/ice/ice_txrx.h
> > > > +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
> > > > @@ -259,6 +259,7 @@ enum ice_rx_dtype {
> > > >
> > > >  struct ice_pkt_ctx {
> > > >     const union ice_32b_rx_flex_desc *eop_desc;
> > > > +   u64 cached_phctime;
> > > >  };
> > > >
> > > >  struct ice_xdp_buff {
> > > > @@ -354,7 +355,6 @@ struct ice_rx_ring {
> > > >     struct ice_tx_ring *xdp_ring;
> > > >     struct xsk_buff_pool *xsk_pool;
> > > >     dma_addr_t dma;                 /* physical address of ring */
> > > > -   u64 cached_phctime;
> > > >     u16 rx_buf_len;
> > > >     u8 dcb_tc;                      /* Traffic class of ring */
> > > >     u8 ptp_rx;
> > > > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
> > > > index beb1c5bb392a..463d9e5cbe05 100644
> > > > --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
> > > > +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
> > > > @@ -546,3 +546,27 @@ void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res,
> > > >                     spin_unlock(&xdp_ring->tx_lock);
> > > >     }
> > > >  }
> > > > +
> > > > +/**
> > > > + * ice_xdp_rx_hw_ts - HW timestamp XDP hint handler
> > > > + * @ctx: XDP buff pointer
> > > > + * @ts_ns: destination address
> > > > + *
> > > > + * Copy HW timestamp (if available) to the destination address.
> > > > + */
> > > > +static int ice_xdp_rx_hw_ts(const struct xdp_md *ctx, u64 *ts_ns)
> > > > +{
> > > > +   const struct ice_xdp_buff *xdp_ext = (void *)ctx;
> > > > +   u64 cached_time;
> > > > +
> > > > +   cached_time = READ_ONCE(xdp_ext->pkt_ctx.cached_phctime);
> > >
> > > I believe we have to have something like the following here:
> > >
> > > if (!ts_ns)
> > >       return -EINVAL;
> > >
> > > IOW, I don't think verifier guarantees that those pointer args are
> > > non-NULL.
> >
> > Oh, that's a shame.
> >
> > > Same for the other ice kfunc you're adding and veth changes.
> > >
> > > Can you also fix it for the existing veth kfuncs? (or lmk if you prefer me
> > > to fix it).
> >
> > I think I can send fixes for RX hash and timestamp in veth separately, before
> > v3 of this patchset, code probably doesn't intersect.
> >
> > But argument checks in kfuncs are a little bit a gray area for me, whether they
> > should be sent to stable tree or not?
> 
> Add a Fixes tag and they will get into the stable trees automatically I believe?

What about declaring XDP hints kfuncs with

BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS)

instead of BTF_ID_FLAGS(func, name, 0)
?

I have tested this just now and xdp_metadata passes just fine (so both stack 
and data_meta destination pointers work), but if I replace &timestamp with NULL,
verifier rejects the program with a descriptive message "Possibly NULL pointer 
passed to trusted arg1", so it serves our purpose. I do not see many ways this 
could limit the users, but it definitely benefits driver developers.

The only concern I see is that if we ever decide to allow NULL arguments for 
kfuncs, we'd need to add support for a "_or_null" suffix [0]. But it doesn't 
sound too hard?

I have dug into this, because adding

if (unlikely(!hash || &rss_type))
	return -EINVAL;

or something similar to every .xmo_ handler in existence starts to look ugly.

[0] 
https://lore.kernel.org/lkml/20230120054441.arj5h6yrnh5jsrgr@MacBook-Pro-6.local.dhcp.thefacebook.com/
Stanislav Fomichev July 10, 2023, 6:12 p.m. UTC | #5
On 07/10, Larysa Zaremba wrote:
> On Thu, Jul 06, 2023 at 09:39:29AM -0700, Stanislav Fomichev wrote:
> > On Thu, Jul 6, 2023 at 7:27 AM Larysa Zaremba <larysa.zaremba@intel.com> wrote:
> > >
> > > On Wed, Jul 05, 2023 at 10:30:56AM -0700, Stanislav Fomichev wrote:
> > > > On 07/03, Larysa Zaremba wrote:
> > > > > Use previously refactored code and create a function
> > > > > that allows XDP code to read HW timestamp.
> > > > >
> > > > > Also, move cached_phctime into packet context, this way this data still
> > > > > stays in the ring structure, just at the different address.
> > > > >
> > > > > HW timestamp is the first supported hint in the driver,
> > > > > so also add xdp_metadata_ops.
> > > > >
> > > > > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > > > > ---
> > > > >  drivers/net/ethernet/intel/ice/ice.h          |  2 ++
> > > > >  drivers/net/ethernet/intel/ice/ice_ethtool.c  |  2 +-
> > > > >  drivers/net/ethernet/intel/ice/ice_lib.c      |  2 +-
> > > > >  drivers/net/ethernet/intel/ice/ice_main.c     |  1 +
> > > > >  drivers/net/ethernet/intel/ice/ice_ptp.c      |  2 +-
> > > > >  drivers/net/ethernet/intel/ice/ice_txrx.h     |  2 +-
> > > > >  drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 24 +++++++++++++++++++
> > > > >  7 files changed, 31 insertions(+), 4 deletions(-)
> > > > >
> > > > > diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
> > > > > index 4ba3d99439a0..7a973a2229f1 100644
> > > > > --- a/drivers/net/ethernet/intel/ice/ice.h
> > > > > +++ b/drivers/net/ethernet/intel/ice/ice.h
> > > > > @@ -943,4 +943,6 @@ static inline void ice_clear_rdma_cap(struct ice_pf *pf)
> > > > >     set_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags);
> > > > >     clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
> > > > >  }
> > > > > +
> > > > > +extern const struct xdp_metadata_ops ice_xdp_md_ops;
> > > > >  #endif /* _ICE_H_ */
> > > > > diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> > > > > index 8d5cbbd0b3d5..3c3b9cbfbcd3 100644
> > > > > --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
> > > > > +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> > > > > @@ -2837,7 +2837,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
> > > > >             /* clone ring and setup updated count */
> > > > >             rx_rings[i] = *vsi->rx_rings[i];
> > > > >             rx_rings[i].count = new_rx_cnt;
> > > > > -           rx_rings[i].cached_phctime = pf->ptp.cached_phc_time;
> > > > > +           rx_rings[i].pkt_ctx.cached_phctime = pf->ptp.cached_phc_time;
> > > > >             rx_rings[i].desc = NULL;
> > > > >             rx_rings[i].rx_buf = NULL;
> > > > >             /* this is to allow wr32 to have something to write to
> > > > > diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
> > > > > index 00e3afd507a4..eb69b0ac7956 100644
> > > > > --- a/drivers/net/ethernet/intel/ice/ice_lib.c
> > > > > +++ b/drivers/net/ethernet/intel/ice/ice_lib.c
> > > > > @@ -1445,7 +1445,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
> > > > >             ring->netdev = vsi->netdev;
> > > > >             ring->dev = dev;
> > > > >             ring->count = vsi->num_rx_desc;
> > > > > -           ring->cached_phctime = pf->ptp.cached_phc_time;
> > > > > +           ring->pkt_ctx.cached_phctime = pf->ptp.cached_phc_time;
> > > > >             WRITE_ONCE(vsi->rx_rings[i], ring);
> > > > >     }
> > > > >
> > > > > diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
> > > > > index 93979ab18bc1..f21996b812ea 100644
> > > > > --- a/drivers/net/ethernet/intel/ice/ice_main.c
> > > > > +++ b/drivers/net/ethernet/intel/ice/ice_main.c
> > > > > @@ -3384,6 +3384,7 @@ static void ice_set_ops(struct ice_vsi *vsi)
> > > > >
> > > > >     netdev->netdev_ops = &ice_netdev_ops;
> > > > >     netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic;
> > > > > +   netdev->xdp_metadata_ops = &ice_xdp_md_ops;
> > > > >     ice_set_ethtool_ops(netdev);
> > > > >
> > > > >     if (vsi->type != ICE_VSI_PF)
> > > > > diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
> > > > > index a31333972c68..70697e4829dd 100644
> > > > > --- a/drivers/net/ethernet/intel/ice/ice_ptp.c
> > > > > +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
> > > > > @@ -1038,7 +1038,7 @@ static int ice_ptp_update_cached_phctime(struct ice_pf *pf)
> > > > >             ice_for_each_rxq(vsi, j) {
> > > > >                     if (!vsi->rx_rings[j])
> > > > >                             continue;
> > > > > -                   WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime);
> > > > > +                   WRITE_ONCE(vsi->rx_rings[j]->pkt_ctx.cached_phctime, systime);
> > > > >             }
> > > > >     }
> > > > >     clear_bit(ICE_CFG_BUSY, pf->state);
> > > > > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
> > > > > index d0ab2c4c0c91..4237702a58a9 100644
> > > > > --- a/drivers/net/ethernet/intel/ice/ice_txrx.h
> > > > > +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
> > > > > @@ -259,6 +259,7 @@ enum ice_rx_dtype {
> > > > >
> > > > >  struct ice_pkt_ctx {
> > > > >     const union ice_32b_rx_flex_desc *eop_desc;
> > > > > +   u64 cached_phctime;
> > > > >  };
> > > > >
> > > > >  struct ice_xdp_buff {
> > > > > @@ -354,7 +355,6 @@ struct ice_rx_ring {
> > > > >     struct ice_tx_ring *xdp_ring;
> > > > >     struct xsk_buff_pool *xsk_pool;
> > > > >     dma_addr_t dma;                 /* physical address of ring */
> > > > > -   u64 cached_phctime;
> > > > >     u16 rx_buf_len;
> > > > >     u8 dcb_tc;                      /* Traffic class of ring */
> > > > >     u8 ptp_rx;
> > > > > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
> > > > > index beb1c5bb392a..463d9e5cbe05 100644
> > > > > --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
> > > > > +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
> > > > > @@ -546,3 +546,27 @@ void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res,
> > > > >                     spin_unlock(&xdp_ring->tx_lock);
> > > > >     }
> > > > >  }
> > > > > +
> > > > > +/**
> > > > > + * ice_xdp_rx_hw_ts - HW timestamp XDP hint handler
> > > > > + * @ctx: XDP buff pointer
> > > > > + * @ts_ns: destination address
> > > > > + *
> > > > > + * Copy HW timestamp (if available) to the destination address.
> > > > > + */
> > > > > +static int ice_xdp_rx_hw_ts(const struct xdp_md *ctx, u64 *ts_ns)
> > > > > +{
> > > > > +   const struct ice_xdp_buff *xdp_ext = (void *)ctx;
> > > > > +   u64 cached_time;
> > > > > +
> > > > > +   cached_time = READ_ONCE(xdp_ext->pkt_ctx.cached_phctime);
> > > >
> > > > I believe we have to have something like the following here:
> > > >
> > > > if (!ts_ns)
> > > >       return -EINVAL;
> > > >
> > > > IOW, I don't think verifier guarantees that those pointer args are
> > > > non-NULL.
> > >
> > > Oh, that's a shame.
> > >
> > > > Same for the other ice kfunc you're adding and veth changes.
> > > >
> > > > Can you also fix it for the existing veth kfuncs? (or lmk if you prefer me
> > > > to fix it).
> > >
> > > I think I can send fixes for RX hash and timestamp in veth separately, before
> > > v3 of this patchset, code probably doesn't intersect.
> > >
> > > But argument checks in kfuncs are a little bit a gray area for me, whether they
> > > should be sent to stable tree or not?
> > 
> > Add a Fixes tag and they will get into the stable trees automatically I believe?
> 
> What about declaring XDP hints kfuncs with
> 
> BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS)
> 
> instead of BTF_ID_FLAGS(func, name, 0)
> ?
> 
> I have tested this just now and xdp_metadata passes just fine (so both stack 
> and data_meta destination pointers work), but if I replace &timestamp with NULL,
> verifier rejects the program with a descriptive message "Possibly NULL pointer 
> passed to trusted arg1", so it serves our purpose. I do not see many ways this 
> could limit the users, but it definitely benefits driver developers.
> 
> The only concern I see is that if we ever decide to allow NULL arguments for 
> kfuncs, we'd need to add support for a "_or_null" suffix [0]. But it doesn't 
> sound too hard?
> 
> I have dug into this, because adding
> 
> if (unlikely(!hash || &rss_type))
> 	return -EINVAL;
> 
> or something similar to every .xmo_ handler in existence starts to look ugly.
> 
> [0] 
> https://lore.kernel.org/lkml/20230120054441.arj5h6yrnh5jsrgr@MacBook-Pro-6.local.dhcp.thefacebook.com/

SG! Let's add KF_TRUSTED_ARGS. That is munch nicer indeed!
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 4ba3d99439a0..7a973a2229f1 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -943,4 +943,6 @@  static inline void ice_clear_rdma_cap(struct ice_pf *pf)
 	set_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags);
 	clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
 }
+
+extern const struct xdp_metadata_ops ice_xdp_md_ops;
 #endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 8d5cbbd0b3d5..3c3b9cbfbcd3 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -2837,7 +2837,7 @@  ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
 		/* clone ring and setup updated count */
 		rx_rings[i] = *vsi->rx_rings[i];
 		rx_rings[i].count = new_rx_cnt;
-		rx_rings[i].cached_phctime = pf->ptp.cached_phc_time;
+		rx_rings[i].pkt_ctx.cached_phctime = pf->ptp.cached_phc_time;
 		rx_rings[i].desc = NULL;
 		rx_rings[i].rx_buf = NULL;
 		/* this is to allow wr32 to have something to write to
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 00e3afd507a4..eb69b0ac7956 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1445,7 +1445,7 @@  static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
 		ring->netdev = vsi->netdev;
 		ring->dev = dev;
 		ring->count = vsi->num_rx_desc;
-		ring->cached_phctime = pf->ptp.cached_phc_time;
+		ring->pkt_ctx.cached_phctime = pf->ptp.cached_phc_time;
 		WRITE_ONCE(vsi->rx_rings[i], ring);
 	}
 
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 93979ab18bc1..f21996b812ea 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3384,6 +3384,7 @@  static void ice_set_ops(struct ice_vsi *vsi)
 
 	netdev->netdev_ops = &ice_netdev_ops;
 	netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic;
+	netdev->xdp_metadata_ops = &ice_xdp_md_ops;
 	ice_set_ethtool_ops(netdev);
 
 	if (vsi->type != ICE_VSI_PF)
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index a31333972c68..70697e4829dd 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -1038,7 +1038,7 @@  static int ice_ptp_update_cached_phctime(struct ice_pf *pf)
 		ice_for_each_rxq(vsi, j) {
 			if (!vsi->rx_rings[j])
 				continue;
-			WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime);
+			WRITE_ONCE(vsi->rx_rings[j]->pkt_ctx.cached_phctime, systime);
 		}
 	}
 	clear_bit(ICE_CFG_BUSY, pf->state);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index d0ab2c4c0c91..4237702a58a9 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -259,6 +259,7 @@  enum ice_rx_dtype {
 
 struct ice_pkt_ctx {
 	const union ice_32b_rx_flex_desc *eop_desc;
+	u64 cached_phctime;
 };
 
 struct ice_xdp_buff {
@@ -354,7 +355,6 @@  struct ice_rx_ring {
 	struct ice_tx_ring *xdp_ring;
 	struct xsk_buff_pool *xsk_pool;
 	dma_addr_t dma;			/* physical address of ring */
-	u64 cached_phctime;
 	u16 rx_buf_len;
 	u8 dcb_tc;			/* Traffic class of ring */
 	u8 ptp_rx;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index beb1c5bb392a..463d9e5cbe05 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -546,3 +546,27 @@  void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res,
 			spin_unlock(&xdp_ring->tx_lock);
 	}
 }
+
+/**
+ * ice_xdp_rx_hw_ts - HW timestamp XDP hint handler
+ * @ctx: XDP buff pointer
+ * @ts_ns: destination address
+ *
+ * Copy HW timestamp (if available) to the destination address.
+ */
+static int ice_xdp_rx_hw_ts(const struct xdp_md *ctx, u64 *ts_ns)
+{
+	const struct ice_xdp_buff *xdp_ext = (void *)ctx;
+	u64 cached_time;
+
+	cached_time = READ_ONCE(xdp_ext->pkt_ctx.cached_phctime);
+	*ts_ns = ice_ptp_get_rx_hwts(xdp_ext->pkt_ctx.eop_desc, cached_time);
+	if (!*ts_ns)
+		return -ENODATA;
+
+	return 0;
+}
+
+const struct xdp_metadata_ops ice_xdp_md_ops = {
+	.xmo_rx_timestamp		= ice_xdp_rx_hw_ts,
+};