diff mbox series

[RESEND,bpf-next,05/15] ice: Introduce ice_xdp_buff

Message ID 20230512152607.992209-6-larysa.zaremba@intel.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series new kfunc XDP hints and ice implementation | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 9 this patch: 9
netdev/cc_maintainers warning 4 maintainers not CCed: hawk@kernel.org edumazet@google.com davem@davemloft.net pabeni@redhat.com
netdev/build_clang success Errors and warnings before: 10 this patch: 10
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 9 this patch: 9
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 97 lines checked
netdev/kdoc success Errors and warnings before: 1 this patch: 1
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-7 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-16 fail Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 fail Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-21 fail Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 fail Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-36 success Logs for veristat
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_parallel on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_verifier on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-13 fail Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 fail Logs for test_progs on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-18 fail Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 fail Logs for test_progs_no_alu32 on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-20 fail Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 fail Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on s390x with gcc

Commit Message

Larysa Zaremba May 12, 2023, 3:25 p.m. UTC
In order to use XDP hints via kfuncs we need to put
RX descriptor and ring pointers just next to xdp_buff.
Same as in hints implementations in other drivers, we archieve
this through putting xdp_buff into a child structure.

Currently, xdp_buff is stored in the ring structure,
so replace it with union that includes child structure.
This way enough memory is available while existing XDP code
remains isolated from hints.

Size of the new child structure (ice_xdp_buff) is 72 bytes,
therefore it does not fit into a single cache line.
To at least place union at the start of cache line, move 'next'
field from CL3 to CL1, as it isn't used often.

Placing union at the start of cache line makes at least xdp_buff
and descriptor fit into a single CL,
ring pointer is used less often, so it can spill into the next CL.

Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_txrx.c     |  7 ++++--
 drivers/net/ethernet/intel/ice/ice_txrx.h     | 23 ++++++++++++++++---
 drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 11 +++++++++
 3 files changed, 36 insertions(+), 5 deletions(-)

Comments

Alexander Lobakin May 22, 2023, 4:46 p.m. UTC | #1
From: Larysa Zaremba <larysa.zaremba@intel.com>
Date: Fri, 12 May 2023 17:25:57 +0200

> In order to use XDP hints via kfuncs we need to put
> RX descriptor and ring pointers just next to xdp_buff.
> Same as in hints implementations in other drivers, we archieve
> this through putting xdp_buff into a child structure.
> 
> Currently, xdp_buff is stored in the ring structure,
> so replace it with union that includes child structure.
> This way enough memory is available while existing XDP code
> remains isolated from hints.
> 
> Size of the new child structure (ice_xdp_buff) is 72 bytes,
> therefore it does not fit into a single cache line.
> To at least place union at the start of cache line, move 'next'
> field from CL3 to CL1, as it isn't used often.
> 
> Placing union at the start of cache line makes at least xdp_buff
> and descriptor fit into a single CL,
> ring pointer is used less often, so it can spill into the next CL.

Spill or span?

> 
> Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> ---
>  drivers/net/ethernet/intel/ice/ice_txrx.c     |  7 ++++--
>  drivers/net/ethernet/intel/ice/ice_txrx.h     | 23 ++++++++++++++++---
>  drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 11 +++++++++
>  3 files changed, 36 insertions(+), 5 deletions(-)

[...]

> --- a/drivers/net/ethernet/intel/ice/ice_txrx.h
> +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
> @@ -260,6 +260,15 @@ enum ice_rx_dtype {
>  	ICE_RX_DTYPE_SPLIT_ALWAYS	= 2,
>  };
>  
> +struct ice_xdp_buff {
> +	struct xdp_buff xdp_buff;
> +	union ice_32b_rx_flex_desc *eop_desc;	/* Required for all metadata */

Probably can be const here as well after changing all the places
appropriately -- I don't think you write to it anywhere.

> +	/* End of the 1st cache line */
> +	struct ice_rx_ring *rx_ring;

Can't we get rid of ring dependency? Maybe there's only a couple fields
that could be copied here instead of referencing the ring? I just find
it weird that our drivers often look for something in the ring structure
to parse a descriptor ._.
If not, can't it be const?

> +};
> +
> +static_assert(offsetof(struct ice_xdp_buff, xdp_buff) == 0);
> +
>  /* indices into GLINT_ITR registers */
>  #define ICE_RX_ITR	ICE_IDX_ITR0
>  #define ICE_TX_ITR	ICE_IDX_ITR1
> @@ -301,7 +310,6 @@ enum ice_dynamic_itr {
>  /* descriptor ring, associated with a VSI */
>  struct ice_rx_ring {
>  	/* CL1 - 1st cacheline starts here */
> -	struct ice_rx_ring *next;	/* pointer to next ring in q_vector */
>  	void *desc;			/* Descriptor ring memory */
>  	struct device *dev;		/* Used for DMA mapping */
>  	struct net_device *netdev;	/* netdev ring maps to */
> @@ -313,12 +321,19 @@ struct ice_rx_ring {
>  	u16 count;			/* Number of descriptors */
>  	u16 reg_idx;			/* HW register index of the ring */
>  	u16 next_to_alloc;
> -	/* CL2 - 2nd cacheline starts here */
> +
>  	union {
>  		struct ice_rx_buf *rx_buf;
>  		struct xdp_buff **xdp_buf;
>  	};
> -	struct xdp_buff xdp;
> +	/* CL2 - 2nd cacheline starts here
> +	 * Size of ice_xdp_buff is 72 bytes,
> +	 * so it spills into CL3
> +	 */
> +	union {
> +		struct ice_xdp_buff xdp_ext;
> +		struct xdp_buff xdp;
> +	};

...or you can leave just one xdp_ext (naming it just "xdp") -- for now,
this union does literally nothing, as xdp_ext contains xdp at its very
beginning.

>  	/* CL3 - 3rd cacheline starts here */
>  	struct bpf_prog *xdp_prog;
>  	u16 rx_offset;
> @@ -328,6 +343,8 @@ struct ice_rx_ring {
>  	u16 next_to_clean;
>  	u16 first_desc;
>  
> +	struct ice_rx_ring *next;	/* pointer to next ring in q_vector */

It can be placed even farther, somewhere near rcu_head -- IIRC it's not
used anywhere on hotpath. Even ::ring_stats below is hotter.

> +
>  	/* stats structs */
>  	struct ice_ring_stats *ring_stats;
>  
> diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
> index e1d49e1235b3..2835a8348237 100644
> --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
> +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
> @@ -151,4 +151,15 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring,
>  		       struct sk_buff *skb);
>  void
>  ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag);
> +
> +static inline void
> +ice_xdp_set_meta_srcs(struct xdp_buff *xdp,

Not sure about the naming... But can't propose anything :clownface:
ice_xdp_init_buff()? Like xdp_init_buff(), but ice_xdp_buff :D

> +		      union ice_32b_rx_flex_desc *eop_desc,
> +		      struct ice_rx_ring *rx_ring)
> +{
> +	struct ice_xdp_buff *xdp_ext = (struct ice_xdp_buff *)xdp;

I'd use container_of(), even though it will do the same thing here.
BTW, is having &xdp_buff at offset 0 still a requirement?

> +
> +	xdp_ext->eop_desc = eop_desc;
> +	xdp_ext->rx_ring = rx_ring;
> +}
>  #endif /* !_ICE_TXRX_LIB_H_ */

Thanks,
Olek
Larysa Zaremba May 23, 2023, 8:02 a.m. UTC | #2
On Mon, May 22, 2023 at 06:46:40PM +0200, Alexander Lobakin wrote:
> From: Larysa Zaremba <larysa.zaremba@intel.com>
> Date: Fri, 12 May 2023 17:25:57 +0200
> 
> > In order to use XDP hints via kfuncs we need to put
> > RX descriptor and ring pointers just next to xdp_buff.
> > Same as in hints implementations in other drivers, we archieve
> > this through putting xdp_buff into a child structure.
> > 
> > Currently, xdp_buff is stored in the ring structure,
> > so replace it with union that includes child structure.
> > This way enough memory is available while existing XDP code
> > remains isolated from hints.
> > 
> > Size of the new child structure (ice_xdp_buff) is 72 bytes,
> > therefore it does not fit into a single cache line.
> > To at least place union at the start of cache line, move 'next'
> > field from CL3 to CL1, as it isn't used often.
> > 
> > Placing union at the start of cache line makes at least xdp_buff
> > and descriptor fit into a single CL,
> > ring pointer is used less often, so it can spill into the next CL.
> 
> Spill or span?

I guess 'span' is the better word.

> 
> > 
> > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > ---
> >  drivers/net/ethernet/intel/ice/ice_txrx.c     |  7 ++++--
> >  drivers/net/ethernet/intel/ice/ice_txrx.h     | 23 ++++++++++++++++---
> >  drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 11 +++++++++
> >  3 files changed, 36 insertions(+), 5 deletions(-)
> 
> [...]
> 
> > --- a/drivers/net/ethernet/intel/ice/ice_txrx.h
> > +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
> > @@ -260,6 +260,15 @@ enum ice_rx_dtype {
> >  	ICE_RX_DTYPE_SPLIT_ALWAYS	= 2,
> >  };
> >  
> > +struct ice_xdp_buff {
> > +	struct xdp_buff xdp_buff;
> > +	union ice_32b_rx_flex_desc *eop_desc;	/* Required for all metadata */
> 
> Probably can be const here as well after changing all the places
> appropriately -- I don't think you write to it anywhere.

Correct.

> 
> > +	/* End of the 1st cache line */
> > +	struct ice_rx_ring *rx_ring;
> 
> Can't we get rid of ring dependency? Maybe there's only a couple fields
> that could be copied here instead of referencing the ring? I just find
> it weird that our drivers often look for something in the ring structure
> to parse a descriptor ._.
> If not, can't it be const?

You're right, I could put just rx_ring->cached_phctime into this structure.
But I recall you saying that if we access ring for timestamps only this is not a 
problem :)

> 
> > +};
> > +
> > +static_assert(offsetof(struct ice_xdp_buff, xdp_buff) == 0);
> > +
> >  /* indices into GLINT_ITR registers */
> >  #define ICE_RX_ITR	ICE_IDX_ITR0
> >  #define ICE_TX_ITR	ICE_IDX_ITR1
> > @@ -301,7 +310,6 @@ enum ice_dynamic_itr {
> >  /* descriptor ring, associated with a VSI */
> >  struct ice_rx_ring {
> >  	/* CL1 - 1st cacheline starts here */
> > -	struct ice_rx_ring *next;	/* pointer to next ring in q_vector */
> >  	void *desc;			/* Descriptor ring memory */
> >  	struct device *dev;		/* Used for DMA mapping */
> >  	struct net_device *netdev;	/* netdev ring maps to */
> > @@ -313,12 +321,19 @@ struct ice_rx_ring {
> >  	u16 count;			/* Number of descriptors */
> >  	u16 reg_idx;			/* HW register index of the ring */
> >  	u16 next_to_alloc;
> > -	/* CL2 - 2nd cacheline starts here */
> > +
> >  	union {
> >  		struct ice_rx_buf *rx_buf;
> >  		struct xdp_buff **xdp_buf;
> >  	};
> > -	struct xdp_buff xdp;
> > +	/* CL2 - 2nd cacheline starts here
> > +	 * Size of ice_xdp_buff is 72 bytes,
> > +	 * so it spills into CL3
> > +	 */
> > +	union {
> > +		struct ice_xdp_buff xdp_ext;
> > +		struct xdp_buff xdp;
> > +	};
> 
> ...or you can leave just one xdp_ext (naming it just "xdp") -- for now,
> this union does literally nothing, as xdp_ext contains xdp at its very
> beginning.

I would like to leave non-meta-related-code rather unaware of existance of 
ice_xdp_buff. Why access '&ring->xdp.xdp_buff' or '(struct xdp_buff *)xdp', when 
we can do just 'ring->xdp'?

> 
> >  	/* CL3 - 3rd cacheline starts here */
> >  	struct bpf_prog *xdp_prog;
> >  	u16 rx_offset;
> > @@ -328,6 +343,8 @@ struct ice_rx_ring {
> >  	u16 next_to_clean;
> >  	u16 first_desc;
> >  
> > +	struct ice_rx_ring *next;	/* pointer to next ring in q_vector */
> 
> It can be placed even farther, somewhere near rcu_head -- IIRC it's not
> used anywhere on hotpath. Even ::ring_stats below is hotter.

Ok, I'll try to but it further from the start.

> 
> > +
> >  	/* stats structs */
> >  	struct ice_ring_stats *ring_stats;
> >  
> > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
> > index e1d49e1235b3..2835a8348237 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
> > +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
> > @@ -151,4 +151,15 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring,
> >  		       struct sk_buff *skb);
> >  void
> >  ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag);
> > +
> > +static inline void
> > +ice_xdp_set_meta_srcs(struct xdp_buff *xdp,
> 
> Not sure about the naming... But can't propose anything :clownface:
> ice_xdp_init_buff()? Like xdp_init_buff(), but ice_xdp_buff :D

ice_xdp_init_buff() sound exactly like a custom wrapper for xdp_init_buff(), but 
usage of those functions would be quite different. I've contemplated the naming 
of this one for some time and think it's good enough as it is, at least it 
communicates that function has sth to do with 'xdp' and 'meta' and doesn't sound 
like it fills in metadata.
> 
> > +		      union ice_32b_rx_flex_desc *eop_desc,
> > +		      struct ice_rx_ring *rx_ring)
> > +{
> > +	struct ice_xdp_buff *xdp_ext = (struct ice_xdp_buff *)xdp;
> 
> I'd use container_of(), even though it will do the same thing here.
> BTW, is having &xdp_buff at offset 0 still a requirement?

I've actually forgot about why it is a requirement, but have found my older 
github answer to you.

"AF_XDP implementation also assumes xdp_buff is at the start".

What I meant by that is xdp_buffs from xsk_pool have only tailroom.

Maybe I should add a comment about this next to static assert.
Will change to container_of, I guess it's more future-proof.

> 
> > +
> > +	xdp_ext->eop_desc = eop_desc;
> > +	xdp_ext->rx_ring = rx_ring;
> > +}
> >  #endif /* !_ICE_TXRX_LIB_H_ */
> 
> Thanks,
> Olek
Alexander Lobakin May 25, 2023, 11:02 a.m. UTC | #3
From: Larysa Zaremba <larysa.zaremba@intel.com>
Date: Tue, 23 May 2023 10:02:42 +0200

> On Mon, May 22, 2023 at 06:46:40PM +0200, Alexander Lobakin wrote:
>> From: Larysa Zaremba <larysa.zaremba@intel.com>
>> Date: Fri, 12 May 2023 17:25:57 +0200
>>
>>> In order to use XDP hints via kfuncs we need to put
>>> RX descriptor and ring pointers just next to xdp_buff.
>>> Same as in hints implementations in other drivers, we archieve

                                                          ^^^^^^^^
                                                          achieve

I missed this one initially :D

>>> this through putting xdp_buff into a child structure.
>>>
>>> Currently, xdp_buff is stored in the ring structure,
>>> so replace it with union that includes child structure.
>>> This way enough memory is available while existing XDP code
>>> remains isolated from hints.

[...]

>>> +	/* End of the 1st cache line */
>>> +	struct ice_rx_ring *rx_ring;
>>
>> Can't we get rid of ring dependency? Maybe there's only a couple fields
>> that could be copied here instead of referencing the ring? I just find
>> it weird that our drivers often look for something in the ring structure
>> to parse a descriptor ._.
>> If not, can't it be const?
> 
> You're right, I could put just rx_ring->cached_phctime into this structure.
> But I recall you saying that if we access ring for timestamps only this is not a 
> problem :)

Sure, it's not a problem, I just thought it's an overkill to put pointer
to the ring here, since it's not needed to parse descriptors.
...checked right now, the function which processes timestamp from a
descriptor really needs only ::cached_phctime from the ring, nothing
more. Sorta overkill I think :s This phctime would be enough to put here.

> 
>>
>>> +};
>>> +
>>> +static_assert(offsetof(struct ice_xdp_buff, xdp_buff) == 0);
>>> +
>>>  /* indices into GLINT_ITR registers */
>>>  #define ICE_RX_ITR	ICE_IDX_ITR0
>>>  #define ICE_TX_ITR	ICE_IDX_ITR1

[...]

>>> +		struct ice_xdp_buff xdp_ext;
>>> +		struct xdp_buff xdp;
>>> +	};
>>
>> ...or you can leave just one xdp_ext (naming it just "xdp") -- for now,
>> this union does literally nothing, as xdp_ext contains xdp at its very
>> beginning.
> 
> I would like to leave non-meta-related-code rather unaware of existance of 
> ice_xdp_buff. Why access '&ring->xdp.xdp_buff' or '(struct xdp_buff *)xdp', when 
> we can do just 'ring->xdp'?

Hmm, got it. On point :D

> 
>>
>>>  	/* CL3 - 3rd cacheline starts here */
>>>  	struct bpf_prog *xdp_prog;
>>>  	u16 rx_offset;

[...]

>>> +static inline void
>>> +ice_xdp_set_meta_srcs(struct xdp_buff *xdp,
>>
>> Not sure about the naming... But can't propose anything :clownface:
>> ice_xdp_init_buff()? Like xdp_init_buff(), but ice_xdp_buff :D
> 
> ice_xdp_init_buff() sound exactly like a custom wrapper for xdp_init_buff(), but 
> usage of those functions would be quite different. I've contemplated the naming 
> of this one for some time and think it's good enough as it is, at least it 
> communicates that function has sth to do with 'xdp' and 'meta' and doesn't sound 
> like it fills in metadata.

ice_xdp_prepare_buff() :D Just kiddin, "set_meta_srcs" is fine, too.

>>
>>> +		      union ice_32b_rx_flex_desc *eop_desc,
>>> +		      struct ice_rx_ring *rx_ring)
>>> +{
>>> +	struct ice_xdp_buff *xdp_ext = (struct ice_xdp_buff *)xdp;
>>
>> I'd use container_of(), even though it will do the same thing here.
>> BTW, is having &xdp_buff at offset 0 still a requirement?
> 
> I've actually forgot about why it is a requirement, but have found my older 
> github answer to you.
> 
> "AF_XDP implementation also assumes xdp_buff is at the start".
> 
> What I meant by that is xdp_buffs from xsk_pool have only tailroom.

> 
> Maybe I should add a comment about this next to static assert.
> Will change to container_of, I guess it's more future-proof.

Ah, AF_XDP programs, right. Comment near the assertion + container_of()
sounds perfect.

[...]

Thanks,
Olek
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index c9bb77da0861..ca21a71749b6 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -557,13 +557,14 @@  ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size)
  * @xdp_prog: XDP program to run
  * @xdp_ring: ring to be used for XDP_TX action
  * @rx_buf: Rx buffer to store the XDP action
+ * @eop_desc: Last descriptor in packet to read metadata from
  *
  * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
  */
 static void
 ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
 	    struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
-	    struct ice_rx_buf *rx_buf)
+	    struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc)
 {
 	unsigned int ret = ICE_XDP_PASS;
 	u32 act;
@@ -571,6 +572,8 @@  ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
 	if (!xdp_prog)
 		goto exit;
 
+	ice_xdp_set_meta_srcs(xdp, eop_desc, rx_ring);
+
 	act = bpf_prog_run_xdp(xdp_prog, xdp);
 	switch (act) {
 	case XDP_PASS:
@@ -1240,7 +1243,7 @@  int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
 		if (ice_is_non_eop(rx_ring, rx_desc))
 			continue;
 
-		ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf);
+		ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc);
 		if (rx_buf->act == ICE_XDP_PASS)
 			goto construct_skb;
 		total_rx_bytes += xdp_get_buff_len(xdp);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index fff0efe28373..f1ac2eb974f1 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -260,6 +260,15 @@  enum ice_rx_dtype {
 	ICE_RX_DTYPE_SPLIT_ALWAYS	= 2,
 };
 
+struct ice_xdp_buff {
+	struct xdp_buff xdp_buff;
+	union ice_32b_rx_flex_desc *eop_desc;	/* Required for all metadata */
+	/* End of the 1st cache line */
+	struct ice_rx_ring *rx_ring;
+};
+
+static_assert(offsetof(struct ice_xdp_buff, xdp_buff) == 0);
+
 /* indices into GLINT_ITR registers */
 #define ICE_RX_ITR	ICE_IDX_ITR0
 #define ICE_TX_ITR	ICE_IDX_ITR1
@@ -301,7 +310,6 @@  enum ice_dynamic_itr {
 /* descriptor ring, associated with a VSI */
 struct ice_rx_ring {
 	/* CL1 - 1st cacheline starts here */
-	struct ice_rx_ring *next;	/* pointer to next ring in q_vector */
 	void *desc;			/* Descriptor ring memory */
 	struct device *dev;		/* Used for DMA mapping */
 	struct net_device *netdev;	/* netdev ring maps to */
@@ -313,12 +321,19 @@  struct ice_rx_ring {
 	u16 count;			/* Number of descriptors */
 	u16 reg_idx;			/* HW register index of the ring */
 	u16 next_to_alloc;
-	/* CL2 - 2nd cacheline starts here */
+
 	union {
 		struct ice_rx_buf *rx_buf;
 		struct xdp_buff **xdp_buf;
 	};
-	struct xdp_buff xdp;
+	/* CL2 - 2nd cacheline starts here
+	 * Size of ice_xdp_buff is 72 bytes,
+	 * so it spills into CL3
+	 */
+	union {
+		struct ice_xdp_buff xdp_ext;
+		struct xdp_buff xdp;
+	};
 	/* CL3 - 3rd cacheline starts here */
 	struct bpf_prog *xdp_prog;
 	u16 rx_offset;
@@ -328,6 +343,8 @@  struct ice_rx_ring {
 	u16 next_to_clean;
 	u16 first_desc;
 
+	struct ice_rx_ring *next;	/* pointer to next ring in q_vector */
+
 	/* stats structs */
 	struct ice_ring_stats *ring_stats;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
index e1d49e1235b3..2835a8348237 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
@@ -151,4 +151,15 @@  ice_process_skb_fields(struct ice_rx_ring *rx_ring,
 		       struct sk_buff *skb);
 void
 ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag);
+
+static inline void
+ice_xdp_set_meta_srcs(struct xdp_buff *xdp,
+		      union ice_32b_rx_flex_desc *eop_desc,
+		      struct ice_rx_ring *rx_ring)
+{
+	struct ice_xdp_buff *xdp_ext = (struct ice_xdp_buff *)xdp;
+
+	xdp_ext->eop_desc = eop_desc;
+	xdp_ext->rx_ring = rx_ring;
+}
 #endif /* !_ICE_TXRX_LIB_H_ */