diff mbox series

[bpf-next,v6,4/4] igc: Add launch time support to XDP ZC

Message ID 20250116155350.555374-5-yoong.siang.song@intel.com (mailing list archive)
State New
Headers show
Series xsk: TX metadata Launch Time support | expand

Commit Message

Song Yoong Siang Jan. 16, 2025, 3:53 p.m. UTC
Enable Launch Time Control (LTC) support to XDP zero copy via XDP Tx
metadata framework.

This patch is tested with tools/testing/selftests/bpf/xdp_hw_metadata on
Intel I225-LM Ethernet controller. Below are the test steps and result.

Test Steps:
1. At DUT, start xdp_hw_metadata selftest application:
   $ sudo ./xdp_hw_metadata enp2s0 -l 1000000000 -L 1

2. At Link Partner, send an UDP packet with VLAN priority 1 to port 9091 of
   DUT.

When launch time is set to 1s in the future, the delta between launch time
and transmit hardware timestamp is equal to 0.016us, as shown in result
below:
  0x562ff5dc8880: rx_desc[4]->addr=84110 addr=84110 comp_addr=84110 EoP
  rx_hash: 0xE343384 with RSS type:0x1
  HW RX-time:   1734578015467548904 (sec:1734578015.4675) delta to User RX-time sec:0.0002 (183.103 usec)
  XDP RX-time:   1734578015467651698 (sec:1734578015.4677) delta to User RX-time sec:0.0001 (80.309 usec)
  No rx_vlan_tci or rx_vlan_proto, err=-95
  0x562ff5dc8880: ping-pong with csum=561c (want c7dd) csum_start=34 csum_offset=6
  HW RX-time:   1734578015467548904 (sec:1734578015.4675) delta to HW Launch-time sec:1.0000 (1000000.000 usec)
  0x562ff5dc8880: complete tx idx=4 addr=4018
  HW Launch-time:   1734578016467548904 (sec:1734578016.4675) delta to HW TX-complete-time sec:0.0000 (0.016 usec)
  HW TX-complete-time:   1734578016467548920 (sec:1734578016.4675) delta to User TX-complete-time sec:0.0000 (32.546 usec)
  XDP RX-time:   1734578015467651698 (sec:1734578015.4677) delta to User TX-complete-time sec:0.9999 (999929.768 usec)
  HW RX-time:   1734578015467548904 (sec:1734578015.4675) delta to HW TX-complete-time sec:1.0000 (1000000.016 usec)
  0x562ff5dc8880: complete rx idx=132 addr=84110

Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 78 ++++++++++++++++-------
 1 file changed, 56 insertions(+), 22 deletions(-)

Comments

Abdul Rahim, Faizal Jan. 20, 2025, 6:25 a.m. UTC | #1
Hi Siang.

On 16/1/2025 11:53 pm, Song Yoong Siang wrote:
> Enable Launch Time Control (LTC) support to XDP zero copy via XDP Tx
> metadata framework.
> 
> This patch is tested with tools/testing/selftests/bpf/xdp_hw_metadata on
> Intel I225-LM Ethernet controller. Below are the test steps and result.
> 
> Test Steps:
> 1. At DUT, start xdp_hw_metadata selftest application:
>     $ sudo ./xdp_hw_metadata enp2s0 -l 1000000000 -L 1
> 
> 2. At Link Partner, send an UDP packet with VLAN priority 1 to port 9091 of
>     DUT.
> 
> When launch time is set to 1s in the future, the delta between launch time
> and transmit hardware timestamp is equal to 0.016us, as shown in result
> below:
>    0x562ff5dc8880: rx_desc[4]->addr=84110 addr=84110 comp_addr=84110 EoP
>    rx_hash: 0xE343384 with RSS type:0x1
>    HW RX-time:   1734578015467548904 (sec:1734578015.4675) delta to User RX-time sec:0.0002 (183.103 usec)
>    XDP RX-time:   1734578015467651698 (sec:1734578015.4677) delta to User RX-time sec:0.0001 (80.309 usec)
>    No rx_vlan_tci or rx_vlan_proto, err=-95
>    0x562ff5dc8880: ping-pong with csum=561c (want c7dd) csum_start=34 csum_offset=6
>    HW RX-time:   1734578015467548904 (sec:1734578015.4675) delta to HW Launch-time sec:1.0000 (1000000.000 usec)
>    0x562ff5dc8880: complete tx idx=4 addr=4018
>    HW Launch-time:   1734578016467548904 (sec:1734578016.4675) delta to HW TX-complete-time sec:0.0000 (0.016 usec)
>    HW TX-complete-time:   1734578016467548920 (sec:1734578016.4675) delta to User TX-complete-time sec:0.0000 (32.546 usec)
>    XDP RX-time:   1734578015467651698 (sec:1734578015.4677) delta to User TX-complete-time sec:0.9999 (999929.768 usec)
>    HW RX-time:   1734578015467548904 (sec:1734578015.4675) delta to HW TX-complete-time sec:1.0000 (1000000.016 usec)
>    0x562ff5dc8880: complete rx idx=132 addr=84110

To be cautious, could we perform a stress test by sending a higher number 
of packets with launch time? For example, we could send 200 packets, each 
configured with a launch time, and verify that the driver continues to 
function correctly afterward.

> Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com>
> ---
>   drivers/net/ethernet/intel/igc/igc_main.c | 78 ++++++++++++++++-------
>   1 file changed, 56 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
> index 27872bdea9bd..6857f5f5b4b2 100644
> --- a/drivers/net/ethernet/intel/igc/igc_main.c
> +++ b/drivers/net/ethernet/intel/igc/igc_main.c
> @@ -1566,6 +1566,26 @@ static bool igc_request_tx_tstamp(struct igc_adapter *adapter, struct sk_buff *s
>   	return false;
>   }
>   
> +static void igc_insert_empty_packet(struct igc_ring *tx_ring)
> +{
> +	struct igc_tx_buffer *empty_info;
> +	struct sk_buff *empty;
> +	void *data;
> +
> +	empty_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
> +	empty = alloc_skb(IGC_EMPTY_FRAME_SIZE, GFP_ATOMIC);
> +	if (!empty)
> +		return;
> +
> +	data = skb_put(empty, IGC_EMPTY_FRAME_SIZE);
> +	memset(data, 0, IGC_EMPTY_FRAME_SIZE);
> +
> +	igc_tx_ctxtdesc(tx_ring, 0, false, 0, 0, 0);
> +
> +	if (igc_init_tx_empty_descriptor(tx_ring, empty, empty_info) < 0)
> +		dev_kfree_skb_any(empty);
> +}
> +

The function igc_insert_empty_packet() appears to wrap existing code to 
enhance reusability, with no new changes related to enabling launch-time 
XDP ZC functionality. If so, could we split this into a separate commit? 
This would make it clearer for the reader to distinguish between the 
refactoring changes and the new changes related to enabling launch-time XDP 
ZC support.

>   static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
>   				       struct igc_ring *tx_ring)
>   {
> @@ -1603,26 +1623,8 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
>   	skb->tstamp = ktime_set(0, 0);
>   	launch_time = igc_tx_launchtime(tx_ring, txtime, &first_flag, &insert_empty);
>   
> -	if (insert_empty) {
> -		struct igc_tx_buffer *empty_info;
> -		struct sk_buff *empty;
> -		void *data;
> -
> -		empty_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
> -		empty = alloc_skb(IGC_EMPTY_FRAME_SIZE, GFP_ATOMIC);
> -		if (!empty)
> -			goto done;
> -
> -		data = skb_put(empty, IGC_EMPTY_FRAME_SIZE);
> -		memset(data, 0, IGC_EMPTY_FRAME_SIZE);
> -
> -		igc_tx_ctxtdesc(tx_ring, 0, false, 0, 0, 0);
> -
> -		if (igc_init_tx_empty_descriptor(tx_ring,
> -						 empty,
> -						 empty_info) < 0)
> -			dev_kfree_skb_any(empty);
> -	}
> +	if (insert_empty)
> +		igc_insert_empty_packet(tx_ring);
>   
>   done:
>   	/* record the location of the first descriptor for this packet */
> @@ -2955,9 +2957,33 @@ static u64 igc_xsk_fill_timestamp(void *_priv)
>   	return *(u64 *)_priv;
>   }
>   
> +static void igc_xsk_request_launch_time(u64 launch_time, void *_priv)
> +{
> +	struct igc_metadata_request *meta_req = _priv;
> +	struct igc_ring *tx_ring = meta_req->tx_ring;
> +	__le32 launch_time_offset;
> +	bool insert_empty = false;
> +	bool first_flag = false;
> +
> +	if (!tx_ring->launchtime_enable)
> +		return;
> +
> +	launch_time_offset = igc_tx_launchtime(tx_ring,
> +					       ns_to_ktime(launch_time),
> +					       &first_flag, &insert_empty);
> +	if (insert_empty) {
> +		igc_insert_empty_packet(tx_ring);
> +		meta_req->tx_buffer =
> +			&tx_ring->tx_buffer_info[tx_ring->next_to_use];
> +	}
> +
> +	igc_tx_ctxtdesc(tx_ring, launch_time_offset, first_flag, 0, 0, 0);
> +}
> +
>   const struct xsk_tx_metadata_ops igc_xsk_tx_metadata_ops = {
>   	.tmo_request_timestamp		= igc_xsk_request_timestamp,
>   	.tmo_fill_timestamp		= igc_xsk_fill_timestamp,
> +	.tmo_request_launch_time	= igc_xsk_request_launch_time,
>   };
>   
>   static void igc_xdp_xmit_zc(struct igc_ring *ring)
> @@ -2980,7 +3006,7 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring)
>   	ntu = ring->next_to_use;
>   	budget = igc_desc_unused(ring);
>   
> -	while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
> +	while (xsk_tx_peek_desc(pool, &xdp_desc) && budget >= 4) {

Could we add some explanation on what & why the value "4" is used ?
Choong Yong Liang Jan. 20, 2025, 7:24 a.m. UTC | #2
On 20/1/2025 2:25 pm, Abdul Rahim, Faizal wrote:
> 
> To be cautious, could we perform a stress test by sending a higher number 
> of packets with launch time? For example, we could send 200 packets, each 
> configured with a launch time, and verify that the driver continues to 
> function correctly afterward.
> 
I agree on this point. Could you perform the same stress test on the STMMAC 
driver as well?
Song Yoong Siang Jan. 20, 2025, 10:06 a.m. UTC | #3
On Monday, January 20, 2025 2:26 PM, Abdul Rahim, Faizal <faizal.abdul.rahim@linux.intel.com> wrote:
>Hi Siang.
>
>On 16/1/2025 11:53 pm, Song Yoong Siang wrote:
>> Enable Launch Time Control (LTC) support to XDP zero copy via XDP Tx
>> metadata framework.
>>
>> This patch is tested with tools/testing/selftests/bpf/xdp_hw_metadata on
>> Intel I225-LM Ethernet controller. Below are the test steps and result.
>>
>> Test Steps:
>> 1. At DUT, start xdp_hw_metadata selftest application:
>>     $ sudo ./xdp_hw_metadata enp2s0 -l 1000000000 -L 1
>>
>> 2. At Link Partner, send an UDP packet with VLAN priority 1 to port 9091 of
>>     DUT.
>>
>> When launch time is set to 1s in the future, the delta between launch time
>> and transmit hardware timestamp is equal to 0.016us, as shown in result
>> below:
>>    0x562ff5dc8880: rx_desc[4]->addr=84110 addr=84110 comp_addr=84110 EoP
>>    rx_hash: 0xE343384 with RSS type:0x1
>>    HW RX-time:   1734578015467548904 (sec:1734578015.4675) delta to User RX-
>time sec:0.0002 (183.103 usec)
>>    XDP RX-time:   1734578015467651698 (sec:1734578015.4677) delta to User RX-
>time sec:0.0001 (80.309 usec)
>>    No rx_vlan_tci or rx_vlan_proto, err=-95
>>    0x562ff5dc8880: ping-pong with csum=561c (want c7dd) csum_start=34
>csum_offset=6
>>    HW RX-time:   1734578015467548904 (sec:1734578015.4675) delta to HW
>Launch-time sec:1.0000 (1000000.000 usec)
>>    0x562ff5dc8880: complete tx idx=4 addr=4018
>>    HW Launch-time:   1734578016467548904 (sec:1734578016.4675) delta to HW
>TX-complete-time sec:0.0000 (0.016 usec)
>>    HW TX-complete-time:   1734578016467548920 (sec:1734578016.4675) delta
>to User TX-complete-time sec:0.0000 (32.546 usec)
>>    XDP RX-time:   1734578015467651698 (sec:1734578015.4677) delta to User TX-
>complete-time sec:0.9999 (999929.768 usec)
>>    HW RX-time:   1734578015467548904 (sec:1734578015.4675) delta to HW TX-
>complete-time sec:1.0000 (1000000.016 usec)
>>    0x562ff5dc8880: complete rx idx=132 addr=84110
>
>To be cautious, could we perform a stress test by sending a higher number
>of packets with launch time? For example, we could send 200 packets, each
>configured with a launch time, and verify that the driver continues to
>function correctly afterward.
>

Hi Faizal,

Thanks for your review comments.
Sure, I can send continuous packets with short interval and share
the result in commit msg.

>> Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com>
>> ---
>>   drivers/net/ethernet/intel/igc/igc_main.c | 78 ++++++++++++++++-------
>>   1 file changed, 56 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/intel/igc/igc_main.c
>b/drivers/net/ethernet/intel/igc/igc_main.c
>> index 27872bdea9bd..6857f5f5b4b2 100644
>> --- a/drivers/net/ethernet/intel/igc/igc_main.c
>> +++ b/drivers/net/ethernet/intel/igc/igc_main.c
>> @@ -1566,6 +1566,26 @@ static bool igc_request_tx_tstamp(struct igc_adapter
>*adapter, struct sk_buff *s
>>   	return false;
>>   }
>>
>> +static void igc_insert_empty_packet(struct igc_ring *tx_ring)
>> +{
>> +	struct igc_tx_buffer *empty_info;
>> +	struct sk_buff *empty;
>> +	void *data;
>> +
>> +	empty_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
>> +	empty = alloc_skb(IGC_EMPTY_FRAME_SIZE, GFP_ATOMIC);
>> +	if (!empty)
>> +		return;
>> +
>> +	data = skb_put(empty, IGC_EMPTY_FRAME_SIZE);
>> +	memset(data, 0, IGC_EMPTY_FRAME_SIZE);
>> +
>> +	igc_tx_ctxtdesc(tx_ring, 0, false, 0, 0, 0);
>> +
>> +	if (igc_init_tx_empty_descriptor(tx_ring, empty, empty_info) < 0)
>> +		dev_kfree_skb_any(empty);
>> +}
>> +
>
>The function igc_insert_empty_packet() appears to wrap existing code to
>enhance reusability, with no new changes related to enabling launch-time
>XDP ZC functionality. If so, could we split this into a separate commit?
>This would make it clearer for the reader to distinguish between the
>refactoring changes and the new changes related to enabling launch-time XDP
>ZC support.
>

I am ok to split the patch into two. Will do it on next version submission.

>>   static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
>>   				       struct igc_ring *tx_ring)
>>   {
>> @@ -1603,26 +1623,8 @@ static netdev_tx_t igc_xmit_frame_ring(struct
>sk_buff *skb,
>>   	skb->tstamp = ktime_set(0, 0);
>>   	launch_time = igc_tx_launchtime(tx_ring, txtime, &first_flag,
>&insert_empty);
>>
>> -	if (insert_empty) {
>> -		struct igc_tx_buffer *empty_info;
>> -		struct sk_buff *empty;
>> -		void *data;
>> -
>> -		empty_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
>> -		empty = alloc_skb(IGC_EMPTY_FRAME_SIZE, GFP_ATOMIC);
>> -		if (!empty)
>> -			goto done;
>> -
>> -		data = skb_put(empty, IGC_EMPTY_FRAME_SIZE);
>> -		memset(data, 0, IGC_EMPTY_FRAME_SIZE);
>> -
>> -		igc_tx_ctxtdesc(tx_ring, 0, false, 0, 0, 0);
>> -
>> -		if (igc_init_tx_empty_descriptor(tx_ring,
>> -						 empty,
>> -						 empty_info) < 0)
>> -			dev_kfree_skb_any(empty);
>> -	}
>> +	if (insert_empty)
>> +		igc_insert_empty_packet(tx_ring);
>>
>>   done:
>>   	/* record the location of the first descriptor for this packet */
>> @@ -2955,9 +2957,33 @@ static u64 igc_xsk_fill_timestamp(void *_priv)
>>   	return *(u64 *)_priv;
>>   }
>>
>> +static void igc_xsk_request_launch_time(u64 launch_time, void *_priv)
>> +{
>> +	struct igc_metadata_request *meta_req = _priv;
>> +	struct igc_ring *tx_ring = meta_req->tx_ring;
>> +	__le32 launch_time_offset;
>> +	bool insert_empty = false;
>> +	bool first_flag = false;
>> +
>> +	if (!tx_ring->launchtime_enable)
>> +		return;
>> +
>> +	launch_time_offset = igc_tx_launchtime(tx_ring,
>> +					       ns_to_ktime(launch_time),
>> +					       &first_flag, &insert_empty);
>> +	if (insert_empty) {
>> +		igc_insert_empty_packet(tx_ring);
>> +		meta_req->tx_buffer =
>> +			&tx_ring->tx_buffer_info[tx_ring->next_to_use];
>> +	}
>> +
>> +	igc_tx_ctxtdesc(tx_ring, launch_time_offset, first_flag, 0, 0, 0);
>> +}
>> +
>>   const struct xsk_tx_metadata_ops igc_xsk_tx_metadata_ops = {
>>   	.tmo_request_timestamp		= igc_xsk_request_timestamp,
>>   	.tmo_fill_timestamp		= igc_xsk_fill_timestamp,
>> +	.tmo_request_launch_time	= igc_xsk_request_launch_time,
>>   };
>>
>>   static void igc_xdp_xmit_zc(struct igc_ring *ring)
>> @@ -2980,7 +3006,7 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring)
>>   	ntu = ring->next_to_use;
>>   	budget = igc_desc_unused(ring);
>>
>> -	while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
>> +	while (xsk_tx_peek_desc(pool, &xdp_desc) && budget >= 4) {
>
>Could we add some explanation on what & why the value "4" is used ?

It is because packet with launch time needs 2 descriptors and same goes
for the empty packets. Thus, total need 4 descriptors. I will add detail
explanation. 

Thanks & Regards
Siang
Song Yoong Siang Jan. 20, 2025, 10:08 a.m. UTC | #4
On Monday, January 20, 2025 3:25 PM, Choong Yong Liang <yong.liang.choong@linux.intel.com> wrote:
>On 20/1/2025 2:25 pm, Abdul Rahim, Faizal wrote:
>>
>> To be cautious, could we perform a stress test by sending a higher number
>> of packets with launch time? For example, we could send 200 packets, each
>> configured with a launch time, and verify that the driver continues to
>> function correctly afterward.
>>
>I agree on this point. Could you perform the same stress test on the STMMAC
>driver as well?

Hi Yong Liang,

Sure. I will perform the same tests on stmmac and share the results.

Thanks & Regards
Siang
Zdenek Bouska Jan. 23, 2025, 3:40 p.m. UTC | #5
Hi Siang,

I tested this patch series on 6.13 with Intel I226-LM (rev 04).

I also applied patch "selftests/bpf: Actuate tx_metadata_len in xdp_hw_metadata" [1]
and "selftests/bpf: Enable Tx hwtstamp in xdp_hw_metadata" [2] so that TX timestamps
work.

HW RX-timestamp was small (0.5956 instead of 1737373125.5956):

HW RX-time:   595572448 (sec:0.5956) delta to User RX-time sec:1737373124.9873 (1737373124987318.750 usec)
XDP RX-time:   1737373125582798388 (sec:1737373125.5828) delta to User RX-time sec:0.0001 (92.733 usec)

Igc's raw HW RX-timestamp in front of frame data was overwritten by BPF program on
line 90 in tools/testing/selftests/bpf: meta->hint_valid = 0;

"HW timestamp has been copied into local variable" comment is outdated on
line 2813 in drivers/net/ethernet/intel/igc/igc_main.c after
commit 069b142f5819 igc: Add support for PTP .getcyclesx64() [3].

Workaround is to add unused data to xdp_meta struct:

--- a/tools/testing/selftests/bpf/xdp_metadata.h
+++ b/tools/testing/selftests/bpf/xdp_metadata.h
@@ -49,4 +49,5 @@ struct xdp_meta {
                __s32 rx_vlan_tag_err;
        };
        enum xdp_meta_field hint_valid;
+       __u8 avoid_IGC_TS_HDR_LEN[16];
 };

But Launch time still does not work:

HW Launch-time:   1737374407515922696 (sec:1737374407.5159) delta to HW TX-complete-time sec:-0.9999 (-999923.649 usec)

Command "sudo ethtool -X enp1s0 start 1 equal 1" was in v4 [4] but is not in v6.
Was that intentional? After executing it Launch time feature works:

HW Launch-time:   1737374618088557111 (sec:1737374618.0886) delta to HW TX-complete-time sec:0.0000 (0.012 usec)

Thank you for XDP launch time support!

[1] https://lore.kernel.org/linux-kernel/20241205044258.3155799-1-yoong.siang.song@intel.com/
[2] https://lore.kernel.org/linux-kernel/20241205051936.3156307-1-yoong.siang.song@intel.com/
[3] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=069b142f58196bd9f47b35e493255741e2c663c7
[4] https://lore.kernel.org/linux-kernel/20250106135724.9749-1-yoong.siang.song@intel.com/

Best regards,
Zdenek Bouska

--
Siemens, s.r.o
Foundational Technologies

> -----Original Message-----
> From: Song Yoong Siang <yoong.siang.song@intel.com>
> Sent: Thursday, January 16, 2025 4:54 PM
> To: David S . Miller <davem@davemloft.net>; Eric Dumazet
> <edumazet@google.com>; Jakub Kicinski <kuba@kernel.org>; Paolo Abeni
> <pabeni@redhat.com>; Simon Horman <horms@kernel.org>; Willem de Bruijn
> <willemb@google.com>; Bezdeka, Florian (FT RPD CED OES-DE)
> <florian.bezdeka@siemens.com>; Donald Hunter
> <donald.hunter@gmail.com>; Jonathan Corbet <corbet@lwn.net>; Bjorn
> Topel <bjorn@kernel.org>; Magnus Karlsson <magnus.karlsson@intel.com>;
> Maciej Fijalkowski <maciej.fijalkowski@intel.com>; Jonathan Lemon
> <jonathan.lemon@gmail.com>; Andrew Lunn <andrew+netdev@lunn.ch>;
> Alexei Starovoitov <ast@kernel.org>; Daniel Borkmann
> <daniel@iogearbox.net>; Jesper Dangaard Brouer <hawk@kernel.org>; John
> Fastabend <john.fastabend@gmail.com>; Joe Damato <jdamato@fastly.com>;
> Stanislav Fomichev <sdf@fomichev.me>; Xuan Zhuo
> <xuanzhuo@linux.alibaba.com>; Mina Almasry <almasrymina@google.com>;
> Daniel Jurgens <danielj@nvidia.com>; Song Yoong Siang
> <yoong.siang.song@intel.com>; Andrii Nakryiko <andrii@kernel.org>; Eduard
> Zingerman <eddyz87@gmail.com>; Mykola Lysenko <mykolal@fb.com>;
> Martin KaFai Lau <martin.lau@linux.dev>; Song Liu <song@kernel.org>;
> Yonghong Song <yonghong.song@linux.dev>; KP Singh
> <kpsingh@kernel.org>; Hao Luo <haoluo@google.com>; Jiri Olsa
> <jolsa@kernel.org>; Shuah Khan <shuah@kernel.org>; Alexandre Torgue
> <alexandre.torgue@foss.st.com>; Jose Abreu <joabreu@synopsys.com>;
> Maxime Coquelin <mcoquelin.stm32@gmail.com>; Tony Nguyen
> <anthony.l.nguyen@intel.com>; Przemek Kitszel
> <przemyslaw.kitszel@intel.com>
> Cc: netdev@vger.kernel.org; linux-kernel@vger.kernel.org; linux-
> doc@vger.kernel.org; bpf@vger.kernel.org; linux-kselftest@vger.kernel.org;
> linux-stm32@st-md-mailman.stormreply.com; linux-arm-
> kernel@lists.infradead.org; intel-wired-lan@lists.osuosl.org; xdp-hints@xdp-
> project.net
> Subject: [PATCH bpf-next v6 4/4] igc: Add launch time support to XDP ZC
> 
> Enable Launch Time Control (LTC) support to XDP zero copy via XDP Tx
> metadata framework.
> 
> This patch is tested with tools/testing/selftests/bpf/xdp_hw_metadata on
> Intel I225-LM Ethernet controller. Below are the test steps and result.
> 
> Test Steps:
> 1. At DUT, start xdp_hw_metadata selftest application:
>    $ sudo ./xdp_hw_metadata enp2s0 -l 1000000000 -L 1
> 
> 2. At Link Partner, send an UDP packet with VLAN priority 1 to port 9091 of
>    DUT.
> 
> When launch time is set to 1s in the future, the delta between launch time and
> transmit hardware timestamp is equal to 0.016us, as shown in result
> below:
>   0x562ff5dc8880: rx_desc[4]->addr=84110 addr=84110 comp_addr=84110
> EoP
>   rx_hash: 0xE343384 with RSS type:0x1
>   HW RX-time:   1734578015467548904 (sec:1734578015.4675) delta to
> User RX-time sec:0.0002 (183.103 usec)
>   XDP RX-time:   1734578015467651698 (sec:1734578015.4677) delta to
> User RX-time sec:0.0001 (80.309 usec)
>   No rx_vlan_tci or rx_vlan_proto, err=-95
>   0x562ff5dc8880: ping-pong with csum=561c (want c7dd) csum_start=34
> csum_offset=6
>   HW RX-time:   1734578015467548904 (sec:1734578015.4675) delta to HW
> Launch-time sec:1.0000 (1000000.000 usec)
>   0x562ff5dc8880: complete tx idx=4 addr=4018
>   HW Launch-time:   1734578016467548904 (sec:1734578016.4675) delta
> to HW TX-complete-time sec:0.0000 (0.016 usec)
>   HW TX-complete-time:   1734578016467548920 (sec:1734578016.4675)
> delta to User TX-complete-time sec:0.0000 (32.546 usec)
>   XDP RX-time:   1734578015467651698 (sec:1734578015.4677) delta to
> User TX-complete-time sec:0.9999 (999929.768 usec)
>   HW RX-time:   1734578015467548904 (sec:1734578015.4675) delta to HW
> TX-complete-time sec:1.0000 (1000000.016 usec)
>   0x562ff5dc8880: complete rx idx=132 addr=84110
> 
> Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com>
> ---
>  drivers/net/ethernet/intel/igc/igc_main.c | 78 ++++++++++++++++-------
>  1 file changed, 56 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/igc/igc_main.c
> b/drivers/net/ethernet/intel/igc/igc_main.c
> index 27872bdea9bd..6857f5f5b4b2 100644
> --- a/drivers/net/ethernet/intel/igc/igc_main.c
> +++ b/drivers/net/ethernet/intel/igc/igc_main.c
> @@ -1566,6 +1566,26 @@ static bool igc_request_tx_tstamp(struct
> igc_adapter *adapter, struct sk_buff *s
>  	return false;
>  }
> 
> +static void igc_insert_empty_packet(struct igc_ring *tx_ring) {
> +	struct igc_tx_buffer *empty_info;
> +	struct sk_buff *empty;
> +	void *data;
> +
> +	empty_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
> +	empty = alloc_skb(IGC_EMPTY_FRAME_SIZE, GFP_ATOMIC);
> +	if (!empty)
> +		return;
> +
> +	data = skb_put(empty, IGC_EMPTY_FRAME_SIZE);
> +	memset(data, 0, IGC_EMPTY_FRAME_SIZE);
> +
> +	igc_tx_ctxtdesc(tx_ring, 0, false, 0, 0, 0);
> +
> +	if (igc_init_tx_empty_descriptor(tx_ring, empty, empty_info) < 0)
> +		dev_kfree_skb_any(empty);
> +}
> +
>  static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
>  				       struct igc_ring *tx_ring)
>  {
> @@ -1603,26 +1623,8 @@ static netdev_tx_t igc_xmit_frame_ring(struct
> sk_buff *skb,
>  	skb->tstamp = ktime_set(0, 0);
>  	launch_time = igc_tx_launchtime(tx_ring, txtime, &first_flag,
> &insert_empty);
> 
> -	if (insert_empty) {
> -		struct igc_tx_buffer *empty_info;
> -		struct sk_buff *empty;
> -		void *data;
> -
> -		empty_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
> -		empty = alloc_skb(IGC_EMPTY_FRAME_SIZE, GFP_ATOMIC);
> -		if (!empty)
> -			goto done;
> -
> -		data = skb_put(empty, IGC_EMPTY_FRAME_SIZE);
> -		memset(data, 0, IGC_EMPTY_FRAME_SIZE);
> -
> -		igc_tx_ctxtdesc(tx_ring, 0, false, 0, 0, 0);
> -
> -		if (igc_init_tx_empty_descriptor(tx_ring,
> -						 empty,
> -						 empty_info) < 0)
> -			dev_kfree_skb_any(empty);
> -	}
> +	if (insert_empty)
> +		igc_insert_empty_packet(tx_ring);
> 
>  done:
>  	/* record the location of the first descriptor for this packet */ @@ -
> 2955,9 +2957,33 @@ static u64 igc_xsk_fill_timestamp(void *_priv)
>  	return *(u64 *)_priv;
>  }
> 
> +static void igc_xsk_request_launch_time(u64 launch_time, void *_priv) {
> +	struct igc_metadata_request *meta_req = _priv;
> +	struct igc_ring *tx_ring = meta_req->tx_ring;
> +	__le32 launch_time_offset;
> +	bool insert_empty = false;
> +	bool first_flag = false;
> +
> +	if (!tx_ring->launchtime_enable)
> +		return;
> +
> +	launch_time_offset = igc_tx_launchtime(tx_ring,
> +					       ns_to_ktime(launch_time),
> +					       &first_flag, &insert_empty);
> +	if (insert_empty) {
> +		igc_insert_empty_packet(tx_ring);
> +		meta_req->tx_buffer =
> +			&tx_ring->tx_buffer_info[tx_ring->next_to_use];
> +	}
> +
> +	igc_tx_ctxtdesc(tx_ring, launch_time_offset, first_flag, 0, 0, 0); }
> +
>  const struct xsk_tx_metadata_ops igc_xsk_tx_metadata_ops = {
>  	.tmo_request_timestamp		= igc_xsk_request_timestamp,
>  	.tmo_fill_timestamp		= igc_xsk_fill_timestamp,
> +	.tmo_request_launch_time	= igc_xsk_request_launch_time,
>  };
> 
>  static void igc_xdp_xmit_zc(struct igc_ring *ring) @@ -2980,7 +3006,7 @@
> static void igc_xdp_xmit_zc(struct igc_ring *ring)
>  	ntu = ring->next_to_use;
>  	budget = igc_desc_unused(ring);
> 
> -	while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
> +	while (xsk_tx_peek_desc(pool, &xdp_desc) && budget >= 4) {
>  		struct igc_metadata_request meta_req;
>  		struct xsk_tx_metadata *meta = NULL;
>  		struct igc_tx_buffer *bi;
> @@ -3004,6 +3030,12 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring)
>  		xsk_tx_metadata_request(meta, &igc_xsk_tx_metadata_ops,
>  					&meta_req);
> 
> +		/* xsk_tx_metadata_request() may have updated next_to_use
> */
> +		ntu = ring->next_to_use;
> +
> +		/* xsk_tx_metadata_request() may have updated Tx buffer
> info */
> +		bi = meta_req.tx_buffer;
> +
>  		tx_desc = IGC_TX_DESC(ring, ntu);
>  		tx_desc->read.cmd_type_len =
> cpu_to_le32(meta_req.cmd_type);
>  		tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
> @@ -3021,9 +3053,11 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring)
>  		ntu++;
>  		if (ntu == ring->count)
>  			ntu = 0;
> +
> +		ring->next_to_use = ntu;
> +		budget = igc_desc_unused(ring);
>  	}
> 
> -	ring->next_to_use = ntu;
>  	if (tx_desc) {
>  		igc_flush_tx_descriptors(ring);
>  		xsk_tx_release(pool);
> --
> 2.34.1
>
Song Yoong Siang Jan. 23, 2025, 4:41 p.m. UTC | #6
On Thursday, January 23, 2025 11:40 PM, Bouska, Zdenek <zdenek.bouska@siemens.com> wrote:
>
>Hi Siang,
>
>I tested this patch series on 6.13 with Intel I226-LM (rev 04).
>
>I also applied patch "selftests/bpf: Actuate tx_metadata_len in xdp_hw_metadata" [1]
>and "selftests/bpf: Enable Tx hwtstamp in xdp_hw_metadata" [2] so that TX timestamps
>work.
>
>HW RX-timestamp was small (0.5956 instead of 1737373125.5956):
>
>HW RX-time:   595572448 (sec:0.5956) delta to User RX-time sec:1737373124.9873 (1737373124987318.750 usec)
>XDP RX-time:   1737373125582798388 (sec:1737373125.5828) delta to User RX-time sec:0.0001 (92.733 usec)
>
>Igc's raw HW RX-timestamp in front of frame data was overwritten by BPF program on
>line 90 in tools/testing/selftests/bpf: meta->hint_valid = 0;
>
>"HW timestamp has been copied into local variable" comment is outdated on
>line 2813 in drivers/net/ethernet/intel/igc/igc_main.c after
>commit 069b142f5819 igc: Add support for PTP .getcyclesx64() [3].
>
>Workaround is to add unused data to xdp_meta struct:
>
>--- a/tools/testing/selftests/bpf/xdp_metadata.h
>+++ b/tools/testing/selftests/bpf/xdp_metadata.h
>@@ -49,4 +49,5 @@ struct xdp_meta {
>                __s32 rx_vlan_tag_err;
>        };
>        enum xdp_meta_field hint_valid;
>+       __u8 avoid_IGC_TS_HDR_LEN[16];
> };
>

Hi Zdenek Bouska, 

Thanks for your help on testing this patch set.
You are right, there is some issue with the Rx hw timestamp,
I will submit the bug fix patch when the solution is finalized,
but the fix will not be part of this launch time patch set.
Until then, you can continue to use your WA.

>But Launch time still does not work:
>
>HW Launch-time:   1737374407515922696 (sec:1737374407.5159) delta to HW TX-complete-time sec:-0.9999 (-999923.649 usec)
>
>Command "sudo ethtool -X enp1s0 start 1 equal 1" was in v4 [4] but is not in v6.
>Was that intentional? After executing it Launch time feature works:

This ethtool command is to use RSS method to route the incoming packet
to the queue which has launch time enabled. However, not every device support
RSS. So I move to use a more generic method, which is vlan priority method,
to route the incoming packet. Therefore, you need to send an
UDP packet with VLAN priority 1 to port 9091 of DUT.

Below is example of my python script to generate the vlan UDP packet.
You can have a quick try on it.

from scapy.all import *
from scapy.all import Ether, Dot1Q, IP, UDP
packet = Ether(src="44:ab:bc:bb:21:44", dst="22:ab:bc:bb:12:34") / Dot1Q(vlan=100, prio=1) / IP(src="169.254.1.2", dst="169.254.1.1") / UDP(dport=9091)
sendp(packet, iface="enp1s0")

Thanks & Regards
Siang

>
>HW Launch-time:   1737374618088557111 (sec:1737374618.0886) delta to HW TX-complete-time sec:0.0000 (0.012 usec)
>
>Thank you for XDP launch time support!
>
>[1] https://lore.kernel.org/linux-kernel/20241205044258.3155799-1-
>yoong.siang.song@intel.com/
>[2] https://lore.kernel.org/linux-kernel/20241205051936.3156307-1-
>yoong.siang.song@intel.com/
>[3]
>https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=069
>b142f58196bd9f47b35e493255741e2c663c7
>[4] https://lore.kernel.org/linux-kernel/20250106135724.9749-1-
>yoong.siang.song@intel.com/
>
>Best regards,
>Zdenek Bouska
>
>--
>Siemens, s.r.o
>Foundational Technologies
>
Florian Bezdeka Jan. 23, 2025, 5:24 p.m. UTC | #7
Hi all,

On Thu, 2025-01-23 at 16:41 +0000, Song, Yoong Siang wrote:
> On Thursday, January 23, 2025 11:40 PM, Bouska, Zdenek <zdenek.bouska@siemens.com> wrote:
> > 
> > Hi Siang,
> > 
> > I tested this patch series on 6.13 with Intel I226-LM (rev 04).
> > 
> > I also applied patch "selftests/bpf: Actuate tx_metadata_len in xdp_hw_metadata" [1]
> > and "selftests/bpf: Enable Tx hwtstamp in xdp_hw_metadata" [2] so that TX timestamps
> > work.
> > 
> > HW RX-timestamp was small (0.5956 instead of 1737373125.5956):
> > 
> > HW RX-time:   595572448 (sec:0.5956) delta to User RX-time sec:1737373124.9873 (1737373124987318.750 usec)
> > XDP RX-time:   1737373125582798388 (sec:1737373125.5828) delta to User RX-time sec:0.0001 (92.733 usec)
> > 
> > Igc's raw HW RX-timestamp in front of frame data was overwritten by BPF program on
> > line 90 in tools/testing/selftests/bpf: meta->hint_valid = 0;
> > 
> > "HW timestamp has been copied into local variable" comment is outdated on
> > line 2813 in drivers/net/ethernet/intel/igc/igc_main.c after
> > commit 069b142f5819 igc: Add support for PTP .getcyclesx64() [3].
> > 
> > Workaround is to add unused data to xdp_meta struct:
> > 
> > --- a/tools/testing/selftests/bpf/xdp_metadata.h
> > +++ b/tools/testing/selftests/bpf/xdp_metadata.h
> > @@ -49,4 +49,5 @@ struct xdp_meta {
> >                __s32 rx_vlan_tag_err;
> >        };
> >        enum xdp_meta_field hint_valid;
> > +       __u8 avoid_IGC_TS_HDR_LEN[16];
> > };
> > 
> 
> Hi Zdenek Bouska, 
> 
> Thanks for your help on testing this patch set.
> You are right, there is some issue with the Rx hw timestamp,
> I will submit the bug fix patch when the solution is finalized,
> but the fix will not be part of this launch time patch set.
> Until then, you can continue to use your WA.

I think there is no simple fix for that. That needs some discussion
around the "expectations" to the headroom / meta data area in front of
the actual packet data.

To be able to write generic BPF programs - generic in terms of "works
with all drivers" - the headroom is expected to be available for use
inside the BPF program.

I think that is true for most drivers / devices, but at least igc is
different in this regard. Devices deliver the RX timestamp in front of
the actual data while other devices deliver the meta information as
part of the RX descriptor.

For igc we get:

+----------+-----------------+-----+------+
| headroom | custom metadata |RX TS| data |
+----------+-----------------+-----+------+
           ^                       ^
           |                       |
 xdp_buff->data_meta        xdp_buff->data


The only information the application gets is a pointer to the start of
the data section. For calculating / finding the beginning of the meta
data area the application has to go backward.

That is exactly how it is currently implemented in the selftest.

Problem: By writing into the calculated meta data area the BPF program
might already destroy meta information delivered by the driver. At
least for igc this is a problem.

I hope that was clear...

Best regards,
Florian

> 
> > But Launch time still does not work:
> > 
> > HW Launch-time:   1737374407515922696 (sec:1737374407.5159) delta to HW TX-complete-time sec:-0.9999 (-999923.649 usec)
> > 
> > Command "sudo ethtool -X enp1s0 start 1 equal 1" was in v4 [4] but is not in v6.
> > Was that intentional? After executing it Launch time feature works:
> 
> This ethtool command is to use RSS method to route the incoming packet
> to the queue which has launch time enabled. However, not every device support
> RSS. So I move to use a more generic method, which is vlan priority method,
> to route the incoming packet. Therefore, you need to send an
> UDP packet with VLAN priority 1 to port 9091 of DUT.
> 
> Below is example of my python script to generate the vlan UDP packet.
> You can have a quick try on it.
> 
> from scapy.all import *
> from scapy.all import Ether, Dot1Q, IP, UDP
> packet = Ether(src="44:ab:bc:bb:21:44", dst="22:ab:bc:bb:12:34") / Dot1Q(vlan=100, prio=1) / IP(src="169.254.1.2", dst="169.254.1.1") / UDP(dport=9091)
> sendp(packet, iface="enp1s0")
> 
> Thanks & Regards
> Siang
> 
> > 
> > HW Launch-time:   1737374618088557111 (sec:1737374618.0886) delta to HW TX-complete-time sec:0.0000 (0.012 usec)
> > 
> > Thank you for XDP launch time support!
> > 
> > [1] https://lore.kernel.org/linux-kernel/20241205044258.3155799-1-
> > yoong.siang.song@intel.com/
> > [2] https://lore.kernel.org/linux-kernel/20241205051936.3156307-1-
> > yoong.siang.song@intel.com/
> > [3]
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=069
> > b142f58196bd9f47b35e493255741e2c663c7
> > [4] https://lore.kernel.org/linux-kernel/20250106135724.9749-1-
> > yoong.siang.song@intel.com/
> > 
> > Best regards,
> > Zdenek Bouska
> > 
> > --
> > Siemens, s.r.o
> > Foundational Technologies
> >
Stanislav Fomichev Jan. 23, 2025, 7:49 p.m. UTC | #8
On 01/23, Florian Bezdeka wrote:
> Hi all,
> 
> On Thu, 2025-01-23 at 16:41 +0000, Song, Yoong Siang wrote:
> > On Thursday, January 23, 2025 11:40 PM, Bouska, Zdenek <zdenek.bouska@siemens.com> wrote:
> > > 
> > > Hi Siang,
> > > 
> > > I tested this patch series on 6.13 with Intel I226-LM (rev 04).
> > > 
> > > I also applied patch "selftests/bpf: Actuate tx_metadata_len in xdp_hw_metadata" [1]
> > > and "selftests/bpf: Enable Tx hwtstamp in xdp_hw_metadata" [2] so that TX timestamps
> > > work.
> > > 
> > > HW RX-timestamp was small (0.5956 instead of 1737373125.5956):
> > > 
> > > HW RX-time:   595572448 (sec:0.5956) delta to User RX-time sec:1737373124.9873 (1737373124987318.750 usec)
> > > XDP RX-time:   1737373125582798388 (sec:1737373125.5828) delta to User RX-time sec:0.0001 (92.733 usec)
> > > 
> > > Igc's raw HW RX-timestamp in front of frame data was overwritten by BPF program on
> > > line 90 in tools/testing/selftests/bpf: meta->hint_valid = 0;
> > > 
> > > "HW timestamp has been copied into local variable" comment is outdated on
> > > line 2813 in drivers/net/ethernet/intel/igc/igc_main.c after
> > > commit 069b142f5819 igc: Add support for PTP .getcyclesx64() [3].
> > > 
> > > Workaround is to add unused data to xdp_meta struct:
> > > 
> > > --- a/tools/testing/selftests/bpf/xdp_metadata.h
> > > +++ b/tools/testing/selftests/bpf/xdp_metadata.h
> > > @@ -49,4 +49,5 @@ struct xdp_meta {
> > >                __s32 rx_vlan_tag_err;
> > >        };
> > >        enum xdp_meta_field hint_valid;
> > > +       __u8 avoid_IGC_TS_HDR_LEN[16];
> > > };
> > > 
> > 
> > Hi Zdenek Bouska, 
> > 
> > Thanks for your help on testing this patch set.
> > You are right, there is some issue with the Rx hw timestamp,
> > I will submit the bug fix patch when the solution is finalized,
> > but the fix will not be part of this launch time patch set.
> > Until then, you can continue to use your WA.
> 
> I think there is no simple fix for that. That needs some discussion
> around the "expectations" to the headroom / meta data area in front of
> the actual packet data.

By 'simple' you mean without some new UAPI to signal the size of that
'reserved area' by the driver? I don't see any other easy way out as well :-/
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 27872bdea9bd..6857f5f5b4b2 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -1566,6 +1566,26 @@  static bool igc_request_tx_tstamp(struct igc_adapter *adapter, struct sk_buff *s
 	return false;
 }
 
+static void igc_insert_empty_packet(struct igc_ring *tx_ring)
+{
+	struct igc_tx_buffer *empty_info;
+	struct sk_buff *empty;
+	void *data;
+
+	empty_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+	empty = alloc_skb(IGC_EMPTY_FRAME_SIZE, GFP_ATOMIC);
+	if (!empty)
+		return;
+
+	data = skb_put(empty, IGC_EMPTY_FRAME_SIZE);
+	memset(data, 0, IGC_EMPTY_FRAME_SIZE);
+
+	igc_tx_ctxtdesc(tx_ring, 0, false, 0, 0, 0);
+
+	if (igc_init_tx_empty_descriptor(tx_ring, empty, empty_info) < 0)
+		dev_kfree_skb_any(empty);
+}
+
 static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
 				       struct igc_ring *tx_ring)
 {
@@ -1603,26 +1623,8 @@  static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
 	skb->tstamp = ktime_set(0, 0);
 	launch_time = igc_tx_launchtime(tx_ring, txtime, &first_flag, &insert_empty);
 
-	if (insert_empty) {
-		struct igc_tx_buffer *empty_info;
-		struct sk_buff *empty;
-		void *data;
-
-		empty_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
-		empty = alloc_skb(IGC_EMPTY_FRAME_SIZE, GFP_ATOMIC);
-		if (!empty)
-			goto done;
-
-		data = skb_put(empty, IGC_EMPTY_FRAME_SIZE);
-		memset(data, 0, IGC_EMPTY_FRAME_SIZE);
-
-		igc_tx_ctxtdesc(tx_ring, 0, false, 0, 0, 0);
-
-		if (igc_init_tx_empty_descriptor(tx_ring,
-						 empty,
-						 empty_info) < 0)
-			dev_kfree_skb_any(empty);
-	}
+	if (insert_empty)
+		igc_insert_empty_packet(tx_ring);
 
 done:
 	/* record the location of the first descriptor for this packet */
@@ -2955,9 +2957,33 @@  static u64 igc_xsk_fill_timestamp(void *_priv)
 	return *(u64 *)_priv;
 }
 
+static void igc_xsk_request_launch_time(u64 launch_time, void *_priv)
+{
+	struct igc_metadata_request *meta_req = _priv;
+	struct igc_ring *tx_ring = meta_req->tx_ring;
+	__le32 launch_time_offset;
+	bool insert_empty = false;
+	bool first_flag = false;
+
+	if (!tx_ring->launchtime_enable)
+		return;
+
+	launch_time_offset = igc_tx_launchtime(tx_ring,
+					       ns_to_ktime(launch_time),
+					       &first_flag, &insert_empty);
+	if (insert_empty) {
+		igc_insert_empty_packet(tx_ring);
+		meta_req->tx_buffer =
+			&tx_ring->tx_buffer_info[tx_ring->next_to_use];
+	}
+
+	igc_tx_ctxtdesc(tx_ring, launch_time_offset, first_flag, 0, 0, 0);
+}
+
 const struct xsk_tx_metadata_ops igc_xsk_tx_metadata_ops = {
 	.tmo_request_timestamp		= igc_xsk_request_timestamp,
 	.tmo_fill_timestamp		= igc_xsk_fill_timestamp,
+	.tmo_request_launch_time	= igc_xsk_request_launch_time,
 };
 
 static void igc_xdp_xmit_zc(struct igc_ring *ring)
@@ -2980,7 +3006,7 @@  static void igc_xdp_xmit_zc(struct igc_ring *ring)
 	ntu = ring->next_to_use;
 	budget = igc_desc_unused(ring);
 
-	while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
+	while (xsk_tx_peek_desc(pool, &xdp_desc) && budget >= 4) {
 		struct igc_metadata_request meta_req;
 		struct xsk_tx_metadata *meta = NULL;
 		struct igc_tx_buffer *bi;
@@ -3004,6 +3030,12 @@  static void igc_xdp_xmit_zc(struct igc_ring *ring)
 		xsk_tx_metadata_request(meta, &igc_xsk_tx_metadata_ops,
 					&meta_req);
 
+		/* xsk_tx_metadata_request() may have updated next_to_use */
+		ntu = ring->next_to_use;
+
+		/* xsk_tx_metadata_request() may have updated Tx buffer info */
+		bi = meta_req.tx_buffer;
+
 		tx_desc = IGC_TX_DESC(ring, ntu);
 		tx_desc->read.cmd_type_len = cpu_to_le32(meta_req.cmd_type);
 		tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
@@ -3021,9 +3053,11 @@  static void igc_xdp_xmit_zc(struct igc_ring *ring)
 		ntu++;
 		if (ntu == ring->count)
 			ntu = 0;
+
+		ring->next_to_use = ntu;
+		budget = igc_desc_unused(ring);
 	}
 
-	ring->next_to_use = ntu;
 	if (tx_desc) {
 		igc_flush_tx_descriptors(ring);
 		xsk_tx_release(pool);