@@ -490,6 +490,14 @@ enum ice_pf_flags {
ICE_PF_FLAGS_NBITS /* must be last */
};
+enum {
+ ICE_MD_GENERIC,
+
+ /* Must be last */
+ ICE_MD_NONE,
+ __ICE_MD_NUM,
+};
+
struct ice_switchdev_info {
struct ice_vsi *control_vsi;
struct ice_vsi *uplink_vsi;
@@ -48,6 +48,11 @@ static DEFINE_IDA(ice_aux_ida);
DEFINE_STATIC_KEY_FALSE(ice_xdp_locking_key);
EXPORT_SYMBOL(ice_xdp_locking_key);
+/* List of XDP metadata formats supported by the driver */
+static const char * const ice_supported_md[__ICE_MD_NUM] = {
+ [ICE_MD_GENERIC] = "struct xdp_meta_generic",
+};
+
/**
* ice_hw_to_dev - Get device pointer from the hardware structure
* @hw: pointer to the device HW structure
@@ -2848,13 +2853,19 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct netdev_bpf *xdp)
int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
struct netlink_ext_ack *extack = xdp->extack;
bool restart = false, prog = !!xdp->prog;
- int ret = 0, xdp_ring_err = 0;
+ int pos, ret = 0, xdp_ring_err = 0;
if (frame_size > vsi->rx_buf_len) {
NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP");
return -EOPNOTSUPP;
}
+ pos = xdp_meta_match_id(ice_supported_md, xdp->btf_id);
+ if (pos < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid or unsupported BTF ID");
+ return pos;
+ }
+
/* need to stop netdev while setting up the program for Rx rings */
if (ice_is_xdp_ena_vsi(vsi) != prog && netif_running(vsi->netdev) &&
!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
@@ -2867,6 +2878,9 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct netdev_bpf *xdp)
restart = true;
}
+ /* Paired with the READ_ONCE()'s in ice_clean_rx_irq{,_zc}() */
+ WRITE_ONCE(vsi->xdp_info.drv_cookie, ICE_MD_NONE);
+
if (!ice_is_xdp_ena_vsi(vsi) && prog) {
xdp_ring_err = ice_vsi_determine_xdp_res(vsi);
if (xdp_ring_err) {
@@ -2889,6 +2903,8 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct netdev_bpf *xdp)
xdp_attachment_setup_rcu(&vsi->xdp_info, xdp);
}
+ WRITE_ONCE(vsi->xdp_info.drv_cookie, pos);
+
if (restart)
ret = ice_up(vsi);
@@ -1103,10 +1103,10 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
unsigned int total_rx_bytes = 0, total_rx_pkts = 0, frame_sz = 0;
u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
unsigned int offset = rx_ring->rx_offset;
+ struct xdp_attachment_info xdp_info;
struct ice_tx_ring *xdp_ring = NULL;
unsigned int xdp_res, xdp_xmit = 0;
struct sk_buff *skb = rx_ring->skb;
- struct bpf_prog *xdp_prog = NULL;
struct xdp_buff xdp;
bool failure;
@@ -1116,9 +1116,16 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
#endif
xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq);
- xdp_prog = rcu_dereference(rx_ring->xdp_info->prog_rcu);
- if (xdp_prog)
+ xdp_info.prog = rcu_dereference(rx_ring->xdp_info->prog_rcu);
+ if (xdp_info.prog) {
+ const struct xdp_attachment_info *info = rx_ring->xdp_info;
+
+ xdp_info.btf_id_le = cpu_to_le64(READ_ONCE(info->btf_id));
+ xdp_info.meta_thresh = READ_ONCE(info->meta_thresh);
+ xdp_info.drv_cookie = READ_ONCE(info->drv_cookie);
+
xdp_ring = rx_ring->xdp_ring;
+ }
/* start the loop to process Rx packets bounded by 'budget' */
while (likely(total_rx_pkts < (unsigned int)budget)) {
@@ -1182,10 +1189,12 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size);
#endif
- if (!xdp_prog)
+ if (!xdp_info.prog)
goto construct_skb;
- xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog, xdp_ring);
+ ice_xdp_handle_meta(&xdp, &md, &xdp_info, rx_desc, rx_ring);
+
+ xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_info.prog, xdp_ring);
if (!xdp_res)
goto construct_skb;
if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
@@ -1240,8 +1249,8 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
/* probably a little skewed due to removing CRC */
total_rx_bytes += skb->len;
- ice_xdp_build_meta(&md, rx_desc, rx_ring, 0);
- __xdp_populate_skb_meta_generic(skb, &md);
+ ice_xdp_meta_populate_skb(skb, &md, xdp.data, rx_desc,
+ rx_ring);
ice_trace(clean_rx_irq_indicate, rx_ring, rx_desc, skb);
ice_receive_skb(rx_ring, skb);
@@ -1254,7 +1263,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
/* return up to cleaned_count buffers to hardware */
failure = ice_alloc_rx_bufs(rx_ring, cleaned_count);
- if (xdp_prog)
+ if (xdp_info.prog)
ice_finalize_xdp_rx(xdp_ring, xdp_xmit);
rx_ring->skb = skb;
@@ -98,7 +98,60 @@ void __ice_xdp_build_meta(struct xdp_meta_generic_rx *rx_md,
const struct ice_rx_ring *rx_ring,
__le64 full_id);
+static inline void
+__ice_xdp_handle_meta(struct xdp_buff *xdp, struct xdp_meta_generic_rx *rx_md,
+ const struct xdp_attachment_info *info,
+ const union ice_32b_rx_flex_desc *rx_desc,
+ const struct ice_rx_ring *rx_ring)
+{
+ rx_md->rx_flags = 0;
+
+ if (xdp->data_end - xdp->data < info->meta_thresh)
+ return;
+
+ switch (info->drv_cookie) {
+ case ICE_MD_GENERIC:
+ __ice_xdp_build_meta(rx_md, rx_desc, rx_ring, info->btf_id_le);
+
+ xdp->data_meta = xdp_meta_generic_ptr(xdp->data);
+ memcpy(to_rx_md(xdp->data_meta), rx_md, sizeof(*rx_md));
+
+ /* Just zero Tx flags instead of zeroing the whole part */
+ to_gen_md(xdp->data_meta)->tx_flags = 0;
+ break;
+ default:
+ break;
+ }
+}
+
+static inline void
+__ice_xdp_meta_populate_skb(struct sk_buff *skb,
+ struct xdp_meta_generic_rx *rx_md,
+ const void *data,
+ const union ice_32b_rx_flex_desc *rx_desc,
+ const struct ice_rx_ring *rx_ring)
+{
+ /* __ice_xdp_build_meta() unconditionally sets Rx queue id. If it's
+ * not here, it means that metadata for this frame hasn't been built
+ * yet and we need to do this now. Otherwise, sync onstack metadata
+ * copy and mark meta as nocomp to ignore it on GRO layer.
+ */
+ if (rx_md->rx_flags && likely(xdp_meta_has_generic(data))) {
+ memcpy(rx_md, to_rx_md(xdp_meta_generic_ptr(data)),
+ sizeof(*rx_md));
+ skb_metadata_nocomp_set(skb);
+ } else {
+ __ice_xdp_build_meta(rx_md, rx_desc, rx_ring, 0);
+ }
+
+ __xdp_populate_skb_meta_generic(skb, rx_md);
+}
+
#define ice_xdp_build_meta(md, ...) \
__ice_xdp_build_meta(to_rx_md(md), ##__VA_ARGS__)
+#define ice_xdp_handle_meta(xdp, md, ...) \
+ __ice_xdp_handle_meta((xdp), to_rx_md(md), ##__VA_ARGS__)
+#define ice_xdp_meta_populate_skb(skb, md, ...) \
+ __ice_xdp_meta_populate_skb((skb), to_rx_md(md), ##__VA_ARGS__)
#endif /* !_ICE_TXRX_LIB_H_ */
@@ -588,16 +588,20 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
{
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+ const struct xdp_attachment_info *rxi = rx_ring->xdp_info, xdp_info = {
+ .prog = rcu_dereference(rxi->prog_rcu),
+ .btf_id_le = cpu_to_le64(READ_ONCE(rxi->btf_id)),
+ .meta_thresh = READ_ONCE(rxi->meta_thresh),
+ .drv_cookie = READ_ONCE(rxi->drv_cookie),
+ };
struct ice_tx_ring *xdp_ring;
unsigned int xdp_xmit = 0;
- struct bpf_prog *xdp_prog;
bool failure = false;
int entries_to_alloc;
/* ZC patch is enabled only when XDP program is set,
* so here it can not be NULL
*/
- xdp_prog = rcu_dereference(rx_ring->xdp_info->prog_rcu);
xdp_ring = rx_ring->xdp_ring;
while (likely(total_rx_packets < (unsigned int)budget)) {
@@ -638,7 +642,10 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
xsk_buff_set_size(xdp, size);
xsk_buff_dma_sync_for_cpu(xdp, rx_ring->xsk_pool);
- xdp_res = ice_run_xdp_zc(rx_ring, xdp, xdp_prog, xdp_ring);
+ ice_xdp_handle_meta(xdp, &md, &xdp_info, rx_desc, rx_ring);
+
+ xdp_res = ice_run_xdp_zc(rx_ring, xdp, xdp_info.prog,
+ xdp_ring);
if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) {
xdp_xmit |= xdp_res;
} else if (xdp_res == ICE_XDP_EXIT) {
@@ -674,8 +681,8 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
total_rx_bytes += skb->len;
total_rx_packets++;
- ice_xdp_build_meta(&md, rx_desc, rx_ring, 0);
- __xdp_populate_skb_meta_generic(skb, &md);
+ ice_xdp_meta_populate_skb(skb, &md, xdp->data, rx_desc,
+ rx_ring);
ice_receive_skb(rx_ring, skb);
}
Now that the driver builds skbs from an onstack generic meta structure, add the ability to configure the actual metadata format to be provided to BPF and XSK programs (and other consumers like cpumap). At first, it is being built on the stack and then synchronized with the buffer in front of a frame; and vice versa after the program returns back to the driver. In cases when meta is disabled or the frame size is below the threshold, the driver populates it only on %XDP_PASS and right before populating an skb, so no perf hits for that. Signed-off-by: Alexander Lobakin <alexandr.lobakin@intel.com> --- drivers/net/ethernet/intel/ice/ice.h | 8 +++ drivers/net/ethernet/intel/ice/ice_main.c | 18 ++++++- drivers/net/ethernet/intel/ice/ice_txrx.c | 25 ++++++--- drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 53 +++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_xsk.c | 17 ++++-- 5 files changed, 107 insertions(+), 14 deletions(-)