diff mbox series

[v2,5/9] virtio_net: construct multi-buffer xdp in mergeable

Message ID 20221220141449.115918-6-hengqi@linux.alibaba.com (mailing list archive)
State Deferred
Delegated to: Netdev Maintainers
Headers show
Series virtio_net: support multi buffer xdp | expand

Checks

Context Check Description
netdev/tree_selection success Guessed tree name to be net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1 this patch: 0
netdev/cc_maintainers warning 2 maintainers not CCed: virtualization@lists.linux-foundation.org hawk@kernel.org
netdev/build_clang success Errors and warnings before: 2 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1 this patch: 0
netdev/checkpatch warning WARNING: line length of 85 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Heng Qi Dec. 20, 2022, 2:14 p.m. UTC
Build multi-buffer xdp using virtnet_build_xdp_buff_mrg().

For the prefilled buffer before xdp is set, we will probably use
vq reset in the future. At the same time, virtio net currently
uses comp pages, and bpf_xdp_frags_increase_tail() needs to calculate
the tailroom of the last frag, which will involve the offset of the
corresponding page and cause a negative value, so we disable tail
increase by not setting xdp_rxq->frag_size.

Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/net/virtio_net.c | 60 +++++++++++++++++++++++++++++-----------
 1 file changed, 44 insertions(+), 16 deletions(-)

Comments

Jason Wang Dec. 27, 2022, 7:01 a.m. UTC | #1
在 2022/12/20 22:14, Heng Qi 写道:
> Build multi-buffer xdp using virtnet_build_xdp_buff_mrg().
>
> For the prefilled buffer before xdp is set, we will probably use
> vq reset in the future. At the same time, virtio net currently
> uses comp pages, and bpf_xdp_frags_increase_tail() needs to calculate
> the tailroom of the last frag, which will involve the offset of the
> corresponding page and cause a negative value, so we disable tail
> increase by not setting xdp_rxq->frag_size.
>
> Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>   drivers/net/virtio_net.c | 60 +++++++++++++++++++++++++++++-----------
>   1 file changed, 44 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 8fc3b1841d92..40bc58fa57f5 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -1018,6 +1018,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>   					 unsigned int *xdp_xmit,
>   					 struct virtnet_rq_stats *stats)
>   {
> +	unsigned int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
>   	struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
>   	u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
>   	struct page *page = virt_to_head_page(buf);
> @@ -1048,11 +1049,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>   	rcu_read_lock();
>   	xdp_prog = rcu_dereference(rq->xdp_prog);
>   	if (xdp_prog) {
> +		unsigned int xdp_frags_truesz = 0;
> +		struct skb_shared_info *shinfo;
>   		struct xdp_frame *xdpf;
>   		struct page *xdp_page;
>   		struct xdp_buff xdp;
>   		void *data;
>   		u32 act;
> +		int i;
>   
>   		/* Transient failure which in theory could occur if
>   		 * in-flight packets from before XDP was enabled reach
> @@ -1061,19 +1065,23 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>   		if (unlikely(hdr->hdr.gso_type))
>   			goto err_xdp;
>   
> -		/* Buffers with headroom use PAGE_SIZE as alloc size,
> -		 * see add_recvbuf_mergeable() + get_mergeable_buf_len()
> +		/* Now XDP core assumes frag size is PAGE_SIZE, but buffers
> +		 * with headroom may add hole in truesize, which
> +		 * make their length exceed PAGE_SIZE. So we disabled the
> +		 * hole mechanism for xdp. See add_recvbuf_mergeable().
>   		 */
>   		frame_sz = headroom ? PAGE_SIZE : truesize;
>   
> -		/* This happens when rx buffer size is underestimated
> -		 * or headroom is not enough because of the buffer
> -		 * was refilled before XDP is set. This should only
> -		 * happen for the first several packets, so we don't
> -		 * care much about its performance.
> +		/* This happens when headroom is not enough because
> +		 * of the buffer was prefilled before XDP is set.
> +		 * This should only happen for the first several packets.
> +		 * In fact, vq reset can be used here to help us clean up
> +		 * the prefilled buffers, but many existing devices do not
> +		 * support it, and we don't want to bother users who are
> +		 * using xdp normally.
>   		 */
> -		if (unlikely(num_buf > 1 ||
> -			     headroom < virtnet_get_headroom(vi))) {
> +		if (!xdp_prog->aux->xdp_has_frags &&
> +		    (num_buf > 1 || headroom < virtnet_get_headroom(vi))) {
>   			/* linearize data for XDP */
>   			xdp_page = xdp_linearize_page(rq, &num_buf,
>   						      page, offset,
> @@ -1084,17 +1092,26 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>   			if (!xdp_page)
>   				goto err_xdp;
>   			offset = VIRTIO_XDP_HEADROOM;
> +		} else if (unlikely(headroom < virtnet_get_headroom(vi))) {


I believe we need to check xdp_prog->aux->xdp_has_frags at least since 
this may not work if it needs more than one frags?

Btw, I don't see a reason why we can't reuse xdp_linearize_page(), (we 
probably don't need error is the buffer exceeds PAGE_SIZE).

Other looks good.

Thanks


> +			if ((VIRTIO_XDP_HEADROOM + len + tailroom) > PAGE_SIZE)
> +				goto err_xdp;
> +
> +			xdp_page = alloc_page(GFP_ATOMIC);
> +			if (!xdp_page)
> +				goto err_xdp;
> +
> +			memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM,
> +			       page_address(page) + offset, len);
> +			frame_sz = PAGE_SIZE;
> +			offset = VIRTIO_XDP_HEADROOM;
>   		} else {
>   			xdp_page = page;
>   		}
> -
> -		/* Allow consuming headroom but reserve enough space to push
> -		 * the descriptor on if we get an XDP_TX return code.
> -		 */
>   		data = page_address(xdp_page) + offset;
> -		xdp_init_buff(&xdp, frame_sz - vi->hdr_len, &rq->xdp_rxq);
> -		xdp_prepare_buff(&xdp, data - VIRTIO_XDP_HEADROOM + vi->hdr_len,
> -				 VIRTIO_XDP_HEADROOM, len - vi->hdr_len, true);
> +		err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz,
> +						 &num_buf, &xdp_frags_truesz, stats);
> +		if (unlikely(err))
> +			goto err_xdp_frags;
>   
>   		act = bpf_prog_run_xdp(xdp_prog, &xdp);
>   		stats->xdp_packets++;
> @@ -1190,6 +1207,17 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>   				__free_pages(xdp_page, 0);
>   			goto err_xdp;
>   		}
> +err_xdp_frags:
> +		shinfo = xdp_get_shared_info_from_buff(&xdp);
> +
> +		if (unlikely(xdp_page != page))
> +			__free_pages(xdp_page, 0);
> +
> +		for (i = 0; i < shinfo->nr_frags; i++) {
> +			xdp_page = skb_frag_page(&shinfo->frags[i]);
> +			put_page(xdp_page);
> +		}
> +		goto err_xdp;
>   	}
>   	rcu_read_unlock();
>
Heng Qi Dec. 27, 2022, 9:31 a.m. UTC | #2
在 2022/12/27 下午3:01, Jason Wang 写道:
>
> 在 2022/12/20 22:14, Heng Qi 写道:
>> Build multi-buffer xdp using virtnet_build_xdp_buff_mrg().
>>
>> For the prefilled buffer before xdp is set, we will probably use
>> vq reset in the future. At the same time, virtio net currently
>> uses comp pages, and bpf_xdp_frags_increase_tail() needs to calculate
>> the tailroom of the last frag, which will involve the offset of the
>> corresponding page and cause a negative value, so we disable tail
>> increase by not setting xdp_rxq->frag_size.
>>
>> Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
>> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
>> ---
>>   drivers/net/virtio_net.c | 60 +++++++++++++++++++++++++++++-----------
>>   1 file changed, 44 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index 8fc3b1841d92..40bc58fa57f5 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
>> @@ -1018,6 +1018,7 @@ static struct sk_buff *receive_mergeable(struct 
>> net_device *dev,
>>                        unsigned int *xdp_xmit,
>>                        struct virtnet_rq_stats *stats)
>>   {
>> +    unsigned int tailroom = SKB_DATA_ALIGN(sizeof(struct 
>> skb_shared_info));
>>       struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
>>       u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
>>       struct page *page = virt_to_head_page(buf);
>> @@ -1048,11 +1049,14 @@ static struct sk_buff 
>> *receive_mergeable(struct net_device *dev,
>>       rcu_read_lock();
>>       xdp_prog = rcu_dereference(rq->xdp_prog);
>>       if (xdp_prog) {
>> +        unsigned int xdp_frags_truesz = 0;
>> +        struct skb_shared_info *shinfo;
>>           struct xdp_frame *xdpf;
>>           struct page *xdp_page;
>>           struct xdp_buff xdp;
>>           void *data;
>>           u32 act;
>> +        int i;
>>             /* Transient failure which in theory could occur if
>>            * in-flight packets from before XDP was enabled reach
>> @@ -1061,19 +1065,23 @@ static struct sk_buff 
>> *receive_mergeable(struct net_device *dev,
>>           if (unlikely(hdr->hdr.gso_type))
>>               goto err_xdp;
>>   -        /* Buffers with headroom use PAGE_SIZE as alloc size,
>> -         * see add_recvbuf_mergeable() + get_mergeable_buf_len()
>> +        /* Now XDP core assumes frag size is PAGE_SIZE, but buffers
>> +         * with headroom may add hole in truesize, which
>> +         * make their length exceed PAGE_SIZE. So we disabled the
>> +         * hole mechanism for xdp. See add_recvbuf_mergeable().
>>            */
>>           frame_sz = headroom ? PAGE_SIZE : truesize;
>>   -        /* This happens when rx buffer size is underestimated
>> -         * or headroom is not enough because of the buffer
>> -         * was refilled before XDP is set. This should only
>> -         * happen for the first several packets, so we don't
>> -         * care much about its performance.
>> +        /* This happens when headroom is not enough because
>> +         * of the buffer was prefilled before XDP is set.
>> +         * This should only happen for the first several packets.
>> +         * In fact, vq reset can be used here to help us clean up
>> +         * the prefilled buffers, but many existing devices do not
>> +         * support it, and we don't want to bother users who are
>> +         * using xdp normally.
>>            */
>> -        if (unlikely(num_buf > 1 ||
>> -                 headroom < virtnet_get_headroom(vi))) {
>> +        if (!xdp_prog->aux->xdp_has_frags &&
>> +            (num_buf > 1 || headroom < virtnet_get_headroom(vi))) {
>>               /* linearize data for XDP */
>>               xdp_page = xdp_linearize_page(rq, &num_buf,
>>                                 page, offset,
>> @@ -1084,17 +1092,26 @@ static struct sk_buff 
>> *receive_mergeable(struct net_device *dev,
>>               if (!xdp_page)
>>                   goto err_xdp;
>>               offset = VIRTIO_XDP_HEADROOM;
>> +        } else if (unlikely(headroom < virtnet_get_headroom(vi))) {
>
>
> I believe we need to check xdp_prog->aux->xdp_has_frags at least since 
> this may not work if it needs more than one frags?

Sorry Jason, I didn't understand you, I'll try to answer. For 
multi-buffer xdp programs, if the first buffer is a pre-filled buffer 
(no headroom),
we need to copy it out and use the subsequent buffers of this packet as 
its frags (this is done in virtnet_build_xdp_buff_mrg()), therefore,
it seems that there is no need to check 'xdp_prog->aux->xdp_has_frags' 
to mark multi-buffer xdp (of course I can add it),

+ } else if (unlikely(headroom < virtnet_get_headroom(vi))) {

Because the linearization of single-buffer xdp has all been done before, 
the subsequent situation can only be applied to multi-buffer xdp:
+ if (!xdp_prog->aux->xdp_has_frags &&
+ (num_buf > 1 || headroom < virtnet_get_headroom(vi))) {

>
> Btw, I don't see a reason why we can't reuse xdp_linearize_page(), (we 
> probably don't need error is the buffer exceeds PAGE_SIZE).

For multi-buffer xdp, we only need to copy out the pre-filled first 
buffer, and use the remaining buffers of this packet as frags in 
virtnet_build_xdp_buff_mrg().

Thanks.

>
> Other looks good.
>
> Thanks
>
>
>> +            if ((VIRTIO_XDP_HEADROOM + len + tailroom) > PAGE_SIZE)
>> +                goto err_xdp;
>> +
>> +            xdp_page = alloc_page(GFP_ATOMIC);
>> +            if (!xdp_page)
>> +                goto err_xdp;
>> +
>> +            memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM,
>> +                   page_address(page) + offset, len);
>> +            frame_sz = PAGE_SIZE;
>> +            offset = VIRTIO_XDP_HEADROOM;
>>           } else {
>>               xdp_page = page;
>>           }
>> -
>> -        /* Allow consuming headroom but reserve enough space to push
>> -         * the descriptor on if we get an XDP_TX return code.
>> -         */
>>           data = page_address(xdp_page) + offset;
>> -        xdp_init_buff(&xdp, frame_sz - vi->hdr_len, &rq->xdp_rxq);
>> -        xdp_prepare_buff(&xdp, data - VIRTIO_XDP_HEADROOM + 
>> vi->hdr_len,
>> -                 VIRTIO_XDP_HEADROOM, len - vi->hdr_len, true);
>> +        err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, 
>> len, frame_sz,
>> +                         &num_buf, &xdp_frags_truesz, stats);
>> +        if (unlikely(err))
>> +            goto err_xdp_frags;
>>             act = bpf_prog_run_xdp(xdp_prog, &xdp);
>>           stats->xdp_packets++;
>> @@ -1190,6 +1207,17 @@ static struct sk_buff 
>> *receive_mergeable(struct net_device *dev,
>>                   __free_pages(xdp_page, 0);
>>               goto err_xdp;
>>           }
>> +err_xdp_frags:
>> +        shinfo = xdp_get_shared_info_from_buff(&xdp);
>> +
>> +        if (unlikely(xdp_page != page))
>> +            __free_pages(xdp_page, 0);
>> +
>> +        for (i = 0; i < shinfo->nr_frags; i++) {
>> +            xdp_page = skb_frag_page(&shinfo->frags[i]);
>> +            put_page(xdp_page);
>> +        }
>> +        goto err_xdp;
>>       }
>>       rcu_read_unlock();
Jason Wang Dec. 28, 2022, 6:24 a.m. UTC | #3
在 2022/12/27 17:31, Heng Qi 写道:
>
>
> 在 2022/12/27 下午3:01, Jason Wang 写道:
>>
>> 在 2022/12/20 22:14, Heng Qi 写道:
>>> Build multi-buffer xdp using virtnet_build_xdp_buff_mrg().
>>>
>>> For the prefilled buffer before xdp is set, we will probably use
>>> vq reset in the future. At the same time, virtio net currently
>>> uses comp pages, and bpf_xdp_frags_increase_tail() needs to calculate
>>> the tailroom of the last frag, which will involve the offset of the
>>> corresponding page and cause a negative value, so we disable tail
>>> increase by not setting xdp_rxq->frag_size.
>>>
>>> Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
>>> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
>>> ---
>>>   drivers/net/virtio_net.c | 60 
>>> +++++++++++++++++++++++++++++-----------
>>>   1 file changed, 44 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>>> index 8fc3b1841d92..40bc58fa57f5 100644
>>> --- a/drivers/net/virtio_net.c
>>> +++ b/drivers/net/virtio_net.c
>>> @@ -1018,6 +1018,7 @@ static struct sk_buff 
>>> *receive_mergeable(struct net_device *dev,
>>>                        unsigned int *xdp_xmit,
>>>                        struct virtnet_rq_stats *stats)
>>>   {
>>> +    unsigned int tailroom = SKB_DATA_ALIGN(sizeof(struct 
>>> skb_shared_info));
>>>       struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
>>>       u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
>>>       struct page *page = virt_to_head_page(buf);
>>> @@ -1048,11 +1049,14 @@ static struct sk_buff 
>>> *receive_mergeable(struct net_device *dev,
>>>       rcu_read_lock();
>>>       xdp_prog = rcu_dereference(rq->xdp_prog);
>>>       if (xdp_prog) {
>>> +        unsigned int xdp_frags_truesz = 0;
>>> +        struct skb_shared_info *shinfo;
>>>           struct xdp_frame *xdpf;
>>>           struct page *xdp_page;
>>>           struct xdp_buff xdp;
>>>           void *data;
>>>           u32 act;
>>> +        int i;
>>>             /* Transient failure which in theory could occur if
>>>            * in-flight packets from before XDP was enabled reach
>>> @@ -1061,19 +1065,23 @@ static struct sk_buff 
>>> *receive_mergeable(struct net_device *dev,
>>>           if (unlikely(hdr->hdr.gso_type))
>>>               goto err_xdp;
>>>   -        /* Buffers with headroom use PAGE_SIZE as alloc size,
>>> -         * see add_recvbuf_mergeable() + get_mergeable_buf_len()
>>> +        /* Now XDP core assumes frag size is PAGE_SIZE, but buffers
>>> +         * with headroom may add hole in truesize, which
>>> +         * make their length exceed PAGE_SIZE. So we disabled the
>>> +         * hole mechanism for xdp. See add_recvbuf_mergeable().
>>>            */
>>>           frame_sz = headroom ? PAGE_SIZE : truesize;
>>>   -        /* This happens when rx buffer size is underestimated
>>> -         * or headroom is not enough because of the buffer
>>> -         * was refilled before XDP is set. This should only
>>> -         * happen for the first several packets, so we don't
>>> -         * care much about its performance.
>>> +        /* This happens when headroom is not enough because
>>> +         * of the buffer was prefilled before XDP is set.
>>> +         * This should only happen for the first several packets.
>>> +         * In fact, vq reset can be used here to help us clean up
>>> +         * the prefilled buffers, but many existing devices do not
>>> +         * support it, and we don't want to bother users who are
>>> +         * using xdp normally.
>>>            */
>>> -        if (unlikely(num_buf > 1 ||
>>> -                 headroom < virtnet_get_headroom(vi))) {
>>> +        if (!xdp_prog->aux->xdp_has_frags &&
>>> +            (num_buf > 1 || headroom < virtnet_get_headroom(vi))) {
>>>               /* linearize data for XDP */
>>>               xdp_page = xdp_linearize_page(rq, &num_buf,
>>>                                 page, offset,
>>> @@ -1084,17 +1092,26 @@ static struct sk_buff 
>>> *receive_mergeable(struct net_device *dev,
>>>               if (!xdp_page)
>>>                   goto err_xdp;
>>>               offset = VIRTIO_XDP_HEADROOM;
>>> +        } else if (unlikely(headroom < virtnet_get_headroom(vi))) {
>>
>>
>> I believe we need to check xdp_prog->aux->xdp_has_frags at least 
>> since this may not work if it needs more than one frags?
>
> Sorry Jason, I didn't understand you, I'll try to answer. For 
> multi-buffer xdp programs, if the first buffer is a pre-filled buffer 
> (no headroom),
> we need to copy it out and use the subsequent buffers of this packet 
> as its frags (this is done in virtnet_build_xdp_buff_mrg()), therefore,
> it seems that there is no need to check 'xdp_prog->aux->xdp_has_frags' 
> to mark multi-buffer xdp (of course I can add it),
>
> + } else if (unlikely(headroom < virtnet_get_headroom(vi))) {
>
> Because the linearization of single-buffer xdp has all been done 
> before, the subsequent situation can only be applied to multi-buffer xdp:
> + if (!xdp_prog->aux->xdp_has_frags &&
> + (num_buf > 1 || headroom < virtnet_get_headroom(vi))) {


I basically meant what happens if

!xdp_prog->aux->xdp_has_frags && num_buf > 2 && headroom < 
virtnet_get_headroom(vi)

In this case the current code seems to leave the second buffer in the 
frags. This is the case of the buffer size underestimation that is 
mentioned in the comment before (I'd like to keep that).

(And that's why I'm asking to use linearizge_page())

Thanks


>
>>
>> Btw, I don't see a reason why we can't reuse xdp_linearize_page(), 
>> (we probably don't need error is the buffer exceeds PAGE_SIZE).
>
> For multi-buffer xdp, we only need to copy out the pre-filled first 
> buffer, and use the remaining buffers of this packet as frags in 
> virtnet_build_xdp_buff_mrg().
>
> Thanks.
>
>>
>> Other looks good.
>>
>> Thanks
>>
>>
>>> +            if ((VIRTIO_XDP_HEADROOM + len + tailroom) > PAGE_SIZE)
>>> +                goto err_xdp;
>>> +
>>> +            xdp_page = alloc_page(GFP_ATOMIC);
>>> +            if (!xdp_page)
>>> +                goto err_xdp;
>>> +
>>> +            memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM,
>>> +                   page_address(page) + offset, len);
>>> +            frame_sz = PAGE_SIZE;
>>> +            offset = VIRTIO_XDP_HEADROOM;
>>>           } else {
>>>               xdp_page = page;
>>>           }
>>> -
>>> -        /* Allow consuming headroom but reserve enough space to push
>>> -         * the descriptor on if we get an XDP_TX return code.
>>> -         */
>>>           data = page_address(xdp_page) + offset;
>>> -        xdp_init_buff(&xdp, frame_sz - vi->hdr_len, &rq->xdp_rxq);
>>> -        xdp_prepare_buff(&xdp, data - VIRTIO_XDP_HEADROOM + 
>>> vi->hdr_len,
>>> -                 VIRTIO_XDP_HEADROOM, len - vi->hdr_len, true);
>>> +        err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, 
>>> len, frame_sz,
>>> +                         &num_buf, &xdp_frags_truesz, stats);
>>> +        if (unlikely(err))
>>> +            goto err_xdp_frags;
>>>             act = bpf_prog_run_xdp(xdp_prog, &xdp);
>>>           stats->xdp_packets++;
>>> @@ -1190,6 +1207,17 @@ static struct sk_buff 
>>> *receive_mergeable(struct net_device *dev,
>>>                   __free_pages(xdp_page, 0);
>>>               goto err_xdp;
>>>           }
>>> +err_xdp_frags:
>>> +        shinfo = xdp_get_shared_info_from_buff(&xdp);
>>> +
>>> +        if (unlikely(xdp_page != page))
>>> +            __free_pages(xdp_page, 0);
>>> +
>>> +        for (i = 0; i < shinfo->nr_frags; i++) {
>>> +            xdp_page = skb_frag_page(&shinfo->frags[i]);
>>> +            put_page(xdp_page);
>>> +        }
>>> +        goto err_xdp;
>>>       }
>>>       rcu_read_unlock();
>
Heng Qi Dec. 28, 2022, 8:23 a.m. UTC | #4
On Wed, Dec 28, 2022 at 02:24:22PM +0800, Jason Wang wrote:
> 
> 在 2022/12/27 17:31, Heng Qi 写道:
> >
> >
> >在 2022/12/27 下午3:01, Jason Wang 写道:
> >>
> >>在 2022/12/20 22:14, Heng Qi 写道:
> >>>Build multi-buffer xdp using virtnet_build_xdp_buff_mrg().
> >>>
> >>>For the prefilled buffer before xdp is set, we will probably use
> >>>vq reset in the future. At the same time, virtio net currently
> >>>uses comp pages, and bpf_xdp_frags_increase_tail() needs to calculate
> >>>the tailroom of the last frag, which will involve the offset of the
> >>>corresponding page and cause a negative value, so we disable tail
> >>>increase by not setting xdp_rxq->frag_size.
> >>>
> >>>Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
> >>>Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> >>>---
> >>>  drivers/net/virtio_net.c | 60
> >>>+++++++++++++++++++++++++++++-----------
> >>>  1 file changed, 44 insertions(+), 16 deletions(-)
> >>>
> >>>diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> >>>index 8fc3b1841d92..40bc58fa57f5 100644
> >>>--- a/drivers/net/virtio_net.c
> >>>+++ b/drivers/net/virtio_net.c
> >>>@@ -1018,6 +1018,7 @@ static struct sk_buff
> >>>*receive_mergeable(struct net_device *dev,
> >>>                       unsigned int *xdp_xmit,
> >>>                       struct virtnet_rq_stats *stats)
> >>>  {
> >>>+    unsigned int tailroom = SKB_DATA_ALIGN(sizeof(struct
> >>>skb_shared_info));
> >>>      struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
> >>>      u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
> >>>      struct page *page = virt_to_head_page(buf);
> >>>@@ -1048,11 +1049,14 @@ static struct sk_buff
> >>>*receive_mergeable(struct net_device *dev,
> >>>      rcu_read_lock();
> >>>      xdp_prog = rcu_dereference(rq->xdp_prog);
> >>>      if (xdp_prog) {
> >>>+        unsigned int xdp_frags_truesz = 0;
> >>>+        struct skb_shared_info *shinfo;
> >>>          struct xdp_frame *xdpf;
> >>>          struct page *xdp_page;
> >>>          struct xdp_buff xdp;
> >>>          void *data;
> >>>          u32 act;
> >>>+        int i;
> >>>            /* Transient failure which in theory could occur if
> >>>           * in-flight packets from before XDP was enabled reach
> >>>@@ -1061,19 +1065,23 @@ static struct sk_buff
> >>>*receive_mergeable(struct net_device *dev,
> >>>          if (unlikely(hdr->hdr.gso_type))
> >>>              goto err_xdp;
> >>>  -        /* Buffers with headroom use PAGE_SIZE as alloc size,
> >>>-         * see add_recvbuf_mergeable() + get_mergeable_buf_len()
> >>>+        /* Now XDP core assumes frag size is PAGE_SIZE, but buffers
> >>>+         * with headroom may add hole in truesize, which
> >>>+         * make their length exceed PAGE_SIZE. So we disabled the
> >>>+         * hole mechanism for xdp. See add_recvbuf_mergeable().
> >>>           */
> >>>          frame_sz = headroom ? PAGE_SIZE : truesize;
> >>>  -        /* This happens when rx buffer size is underestimated
> >>>-         * or headroom is not enough because of the buffer
> >>>-         * was refilled before XDP is set. This should only
> >>>-         * happen for the first several packets, so we don't
> >>>-         * care much about its performance.
> >>>+        /* This happens when headroom is not enough because
> >>>+         * of the buffer was prefilled before XDP is set.
> >>>+         * This should only happen for the first several packets.
> >>>+         * In fact, vq reset can be used here to help us clean up
> >>>+         * the prefilled buffers, but many existing devices do not
> >>>+         * support it, and we don't want to bother users who are
> >>>+         * using xdp normally.
> >>>           */
> >>>-        if (unlikely(num_buf > 1 ||
> >>>-                 headroom < virtnet_get_headroom(vi))) {
> >>>+        if (!xdp_prog->aux->xdp_has_frags &&
> >>>+            (num_buf > 1 || headroom < virtnet_get_headroom(vi))) {
> >>>              /* linearize data for XDP */
> >>>              xdp_page = xdp_linearize_page(rq, &num_buf,
> >>>                                page, offset,
> >>>@@ -1084,17 +1092,26 @@ static struct sk_buff
> >>>*receive_mergeable(struct net_device *dev,
> >>>              if (!xdp_page)
> >>>                  goto err_xdp;
> >>>              offset = VIRTIO_XDP_HEADROOM;
> >>>+        } else if (unlikely(headroom < virtnet_get_headroom(vi))) {
> >>
> >>
> >>I believe we need to check xdp_prog->aux->xdp_has_frags at least
> >>since this may not work if it needs more than one frags?
> >
> >Sorry Jason, I didn't understand you, I'll try to answer. For
> >multi-buffer xdp programs, if the first buffer is a pre-filled
> >buffer (no headroom),
> >we need to copy it out and use the subsequent buffers of this
> >packet as its frags (this is done in
> >virtnet_build_xdp_buff_mrg()), therefore,
> >it seems that there is no need to check
> >'xdp_prog->aux->xdp_has_frags' to mark multi-buffer xdp (of course
> >I can add it),
> >
> >+ } else if (unlikely(headroom < virtnet_get_headroom(vi))) {
> >
> >Because the linearization of single-buffer xdp has all been done
> >before, the subsequent situation can only be applied to
> >multi-buffer xdp:
> >+ if (!xdp_prog->aux->xdp_has_frags &&
> >+ (num_buf > 1 || headroom < virtnet_get_headroom(vi))) {
> 
> 
> I basically meant what happens if
> 
> !xdp_prog->aux->xdp_has_frags && num_buf > 2 && headroom <
> virtnet_get_headroom(vi)
> 
> In this case the current code seems to leave the second buffer in
> the frags. This is the case of the buffer size underestimation that
> is mentioned in the comment before (I'd like to keep that).

If I'm not wrong, this case is still directly into the first 'if' loop.

-               if (unlikely(num_buf > 1 ||
-                            headroom < virtnet_get_headroom(vi))) {
+               if (!xdp_prog->aux->xdp_has_frags &&
+                   (num_buf > 1 || headroom < virtnet_get_headroom(vi))) {

Thanks.

> 
> (And that's why I'm asking to use linearizge_page())
> 
> Thanks
> 
> 
> >
> >>
> >>Btw, I don't see a reason why we can't reuse
> >>xdp_linearize_page(), (we probably don't need error is the
> >>buffer exceeds PAGE_SIZE).
> >
> >For multi-buffer xdp, we only need to copy out the pre-filled
> >first buffer, and use the remaining buffers of this packet as
> >frags in virtnet_build_xdp_buff_mrg().
> >
> >Thanks.
> >
> >>
> >>Other looks good.
> >>
> >>Thanks
> >>
> >>
> >>>+            if ((VIRTIO_XDP_HEADROOM + len + tailroom) > PAGE_SIZE)
> >>>+                goto err_xdp;
> >>>+
> >>>+            xdp_page = alloc_page(GFP_ATOMIC);
> >>>+            if (!xdp_page)
> >>>+                goto err_xdp;
> >>>+
> >>>+            memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM,
> >>>+                   page_address(page) + offset, len);
> >>>+            frame_sz = PAGE_SIZE;
> >>>+            offset = VIRTIO_XDP_HEADROOM;
> >>>          } else {
> >>>              xdp_page = page;
> >>>          }
> >>>-
> >>>-        /* Allow consuming headroom but reserve enough space to push
> >>>-         * the descriptor on if we get an XDP_TX return code.
> >>>-         */
> >>>          data = page_address(xdp_page) + offset;
> >>>-        xdp_init_buff(&xdp, frame_sz - vi->hdr_len, &rq->xdp_rxq);
> >>>-        xdp_prepare_buff(&xdp, data - VIRTIO_XDP_HEADROOM +
> >>>vi->hdr_len,
> >>>-                 VIRTIO_XDP_HEADROOM, len - vi->hdr_len, true);
> >>>+        err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp,
> >>>data, len, frame_sz,
> >>>+                         &num_buf, &xdp_frags_truesz, stats);
> >>>+        if (unlikely(err))
> >>>+            goto err_xdp_frags;
> >>>            act = bpf_prog_run_xdp(xdp_prog, &xdp);
> >>>          stats->xdp_packets++;
> >>>@@ -1190,6 +1207,17 @@ static struct sk_buff
> >>>*receive_mergeable(struct net_device *dev,
> >>>                  __free_pages(xdp_page, 0);
> >>>              goto err_xdp;
> >>>          }
> >>>+err_xdp_frags:
> >>>+        shinfo = xdp_get_shared_info_from_buff(&xdp);
> >>>+
> >>>+        if (unlikely(xdp_page != page))
> >>>+            __free_pages(xdp_page, 0);
> >>>+
> >>>+        for (i = 0; i < shinfo->nr_frags; i++) {
> >>>+            xdp_page = skb_frag_page(&shinfo->frags[i]);
> >>>+            put_page(xdp_page);
> >>>+        }
> >>>+        goto err_xdp;
> >>>      }
> >>>      rcu_read_unlock();
> >
Jason Wang Dec. 28, 2022, 11:54 a.m. UTC | #5
On Wed, Dec 28, 2022 at 4:23 PM Heng Qi <hengqi@linux.alibaba.com> wrote:
>
> On Wed, Dec 28, 2022 at 02:24:22PM +0800, Jason Wang wrote:
> >
> > 在 2022/12/27 17:31, Heng Qi 写道:
> > >
> > >
> > >在 2022/12/27 下午3:01, Jason Wang 写道:
> > >>
> > >>在 2022/12/20 22:14, Heng Qi 写道:
> > >>>Build multi-buffer xdp using virtnet_build_xdp_buff_mrg().
> > >>>
> > >>>For the prefilled buffer before xdp is set, we will probably use
> > >>>vq reset in the future. At the same time, virtio net currently
> > >>>uses comp pages, and bpf_xdp_frags_increase_tail() needs to calculate
> > >>>the tailroom of the last frag, which will involve the offset of the
> > >>>corresponding page and cause a negative value, so we disable tail
> > >>>increase by not setting xdp_rxq->frag_size.
> > >>>
> > >>>Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
> > >>>Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > >>>---
> > >>>  drivers/net/virtio_net.c | 60
> > >>>+++++++++++++++++++++++++++++-----------
> > >>>  1 file changed, 44 insertions(+), 16 deletions(-)
> > >>>
> > >>>diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > >>>index 8fc3b1841d92..40bc58fa57f5 100644
> > >>>--- a/drivers/net/virtio_net.c
> > >>>+++ b/drivers/net/virtio_net.c
> > >>>@@ -1018,6 +1018,7 @@ static struct sk_buff
> > >>>*receive_mergeable(struct net_device *dev,
> > >>>                       unsigned int *xdp_xmit,
> > >>>                       struct virtnet_rq_stats *stats)
> > >>>  {
> > >>>+    unsigned int tailroom = SKB_DATA_ALIGN(sizeof(struct
> > >>>skb_shared_info));
> > >>>      struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
> > >>>      u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
> > >>>      struct page *page = virt_to_head_page(buf);
> > >>>@@ -1048,11 +1049,14 @@ static struct sk_buff
> > >>>*receive_mergeable(struct net_device *dev,
> > >>>      rcu_read_lock();
> > >>>      xdp_prog = rcu_dereference(rq->xdp_prog);
> > >>>      if (xdp_prog) {
> > >>>+        unsigned int xdp_frags_truesz = 0;
> > >>>+        struct skb_shared_info *shinfo;
> > >>>          struct xdp_frame *xdpf;
> > >>>          struct page *xdp_page;
> > >>>          struct xdp_buff xdp;
> > >>>          void *data;
> > >>>          u32 act;
> > >>>+        int i;
> > >>>            /* Transient failure which in theory could occur if
> > >>>           * in-flight packets from before XDP was enabled reach
> > >>>@@ -1061,19 +1065,23 @@ static struct sk_buff
> > >>>*receive_mergeable(struct net_device *dev,
> > >>>          if (unlikely(hdr->hdr.gso_type))
> > >>>              goto err_xdp;
> > >>>  -        /* Buffers with headroom use PAGE_SIZE as alloc size,
> > >>>-         * see add_recvbuf_mergeable() + get_mergeable_buf_len()
> > >>>+        /* Now XDP core assumes frag size is PAGE_SIZE, but buffers
> > >>>+         * with headroom may add hole in truesize, which
> > >>>+         * make their length exceed PAGE_SIZE. So we disabled the
> > >>>+         * hole mechanism for xdp. See add_recvbuf_mergeable().
> > >>>           */
> > >>>          frame_sz = headroom ? PAGE_SIZE : truesize;
> > >>>  -        /* This happens when rx buffer size is underestimated
> > >>>-         * or headroom is not enough because of the buffer
> > >>>-         * was refilled before XDP is set. This should only
> > >>>-         * happen for the first several packets, so we don't
> > >>>-         * care much about its performance.
> > >>>+        /* This happens when headroom is not enough because
> > >>>+         * of the buffer was prefilled before XDP is set.
> > >>>+         * This should only happen for the first several packets.
> > >>>+         * In fact, vq reset can be used here to help us clean up
> > >>>+         * the prefilled buffers, but many existing devices do not
> > >>>+         * support it, and we don't want to bother users who are
> > >>>+         * using xdp normally.
> > >>>           */
> > >>>-        if (unlikely(num_buf > 1 ||
> > >>>-                 headroom < virtnet_get_headroom(vi))) {
> > >>>+        if (!xdp_prog->aux->xdp_has_frags &&
> > >>>+            (num_buf > 1 || headroom < virtnet_get_headroom(vi))) {
> > >>>              /* linearize data for XDP */
> > >>>              xdp_page = xdp_linearize_page(rq, &num_buf,
> > >>>                                page, offset,
> > >>>@@ -1084,17 +1092,26 @@ static struct sk_buff
> > >>>*receive_mergeable(struct net_device *dev,
> > >>>              if (!xdp_page)
> > >>>                  goto err_xdp;
> > >>>              offset = VIRTIO_XDP_HEADROOM;
> > >>>+        } else if (unlikely(headroom < virtnet_get_headroom(vi))) {
> > >>
> > >>
> > >>I believe we need to check xdp_prog->aux->xdp_has_frags at least
> > >>since this may not work if it needs more than one frags?
> > >
> > >Sorry Jason, I didn't understand you, I'll try to answer. For
> > >multi-buffer xdp programs, if the first buffer is a pre-filled
> > >buffer (no headroom),
> > >we need to copy it out and use the subsequent buffers of this
> > >packet as its frags (this is done in
> > >virtnet_build_xdp_buff_mrg()), therefore,
> > >it seems that there is no need to check
> > >'xdp_prog->aux->xdp_has_frags' to mark multi-buffer xdp (of course
> > >I can add it),
> > >
> > >+ } else if (unlikely(headroom < virtnet_get_headroom(vi))) {
> > >
> > >Because the linearization of single-buffer xdp has all been done
> > >before, the subsequent situation can only be applied to
> > >multi-buffer xdp:
> > >+ if (!xdp_prog->aux->xdp_has_frags &&
> > >+ (num_buf > 1 || headroom < virtnet_get_headroom(vi))) {
> >
> >
> > I basically meant what happens if
> >
> > !xdp_prog->aux->xdp_has_frags && num_buf > 2 && headroom <
> > virtnet_get_headroom(vi)
> >
> > In this case the current code seems to leave the second buffer in
> > the frags. This is the case of the buffer size underestimation that
> > is mentioned in the comment before (I'd like to keep that).
>
> If I'm not wrong, this case is still directly into the first 'if' loop.

My fault, you're right.

Thanks

>
> -               if (unlikely(num_buf > 1 ||
> -                            headroom < virtnet_get_headroom(vi))) {
> +               if (!xdp_prog->aux->xdp_has_frags &&
> +                   (num_buf > 1 || headroom < virtnet_get_headroom(vi))) {
>
> Thanks.
>
> >
> > (And that's why I'm asking to use linearizge_page())
> >
> > Thanks
> >
> >
> > >
> > >>
> > >>Btw, I don't see a reason why we can't reuse
> > >>xdp_linearize_page(), (we probably don't need error is the
> > >>buffer exceeds PAGE_SIZE).
> > >
> > >For multi-buffer xdp, we only need to copy out the pre-filled
> > >first buffer, and use the remaining buffers of this packet as
> > >frags in virtnet_build_xdp_buff_mrg().
> > >
> > >Thanks.
> > >
> > >>
> > >>Other looks good.
> > >>
> > >>Thanks
> > >>
> > >>
> > >>>+            if ((VIRTIO_XDP_HEADROOM + len + tailroom) > PAGE_SIZE)
> > >>>+                goto err_xdp;
> > >>>+
> > >>>+            xdp_page = alloc_page(GFP_ATOMIC);
> > >>>+            if (!xdp_page)
> > >>>+                goto err_xdp;
> > >>>+
> > >>>+            memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM,
> > >>>+                   page_address(page) + offset, len);
> > >>>+            frame_sz = PAGE_SIZE;
> > >>>+            offset = VIRTIO_XDP_HEADROOM;
> > >>>          } else {
> > >>>              xdp_page = page;
> > >>>          }
> > >>>-
> > >>>-        /* Allow consuming headroom but reserve enough space to push
> > >>>-         * the descriptor on if we get an XDP_TX return code.
> > >>>-         */
> > >>>          data = page_address(xdp_page) + offset;
> > >>>-        xdp_init_buff(&xdp, frame_sz - vi->hdr_len, &rq->xdp_rxq);
> > >>>-        xdp_prepare_buff(&xdp, data - VIRTIO_XDP_HEADROOM +
> > >>>vi->hdr_len,
> > >>>-                 VIRTIO_XDP_HEADROOM, len - vi->hdr_len, true);
> > >>>+        err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp,
> > >>>data, len, frame_sz,
> > >>>+                         &num_buf, &xdp_frags_truesz, stats);
> > >>>+        if (unlikely(err))
> > >>>+            goto err_xdp_frags;
> > >>>            act = bpf_prog_run_xdp(xdp_prog, &xdp);
> > >>>          stats->xdp_packets++;
> > >>>@@ -1190,6 +1207,17 @@ static struct sk_buff
> > >>>*receive_mergeable(struct net_device *dev,
> > >>>                  __free_pages(xdp_page, 0);
> > >>>              goto err_xdp;
> > >>>          }
> > >>>+err_xdp_frags:
> > >>>+        shinfo = xdp_get_shared_info_from_buff(&xdp);
> > >>>+
> > >>>+        if (unlikely(xdp_page != page))
> > >>>+            __free_pages(xdp_page, 0);
> > >>>+
> > >>>+        for (i = 0; i < shinfo->nr_frags; i++) {
> > >>>+            xdp_page = skb_frag_page(&shinfo->frags[i]);
> > >>>+            put_page(xdp_page);
> > >>>+        }
> > >>>+        goto err_xdp;
> > >>>      }
> > >>>      rcu_read_unlock();
> > >
>
diff mbox series

Patch

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 8fc3b1841d92..40bc58fa57f5 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1018,6 +1018,7 @@  static struct sk_buff *receive_mergeable(struct net_device *dev,
 					 unsigned int *xdp_xmit,
 					 struct virtnet_rq_stats *stats)
 {
+	unsigned int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 	struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
 	u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
 	struct page *page = virt_to_head_page(buf);
@@ -1048,11 +1049,14 @@  static struct sk_buff *receive_mergeable(struct net_device *dev,
 	rcu_read_lock();
 	xdp_prog = rcu_dereference(rq->xdp_prog);
 	if (xdp_prog) {
+		unsigned int xdp_frags_truesz = 0;
+		struct skb_shared_info *shinfo;
 		struct xdp_frame *xdpf;
 		struct page *xdp_page;
 		struct xdp_buff xdp;
 		void *data;
 		u32 act;
+		int i;
 
 		/* Transient failure which in theory could occur if
 		 * in-flight packets from before XDP was enabled reach
@@ -1061,19 +1065,23 @@  static struct sk_buff *receive_mergeable(struct net_device *dev,
 		if (unlikely(hdr->hdr.gso_type))
 			goto err_xdp;
 
-		/* Buffers with headroom use PAGE_SIZE as alloc size,
-		 * see add_recvbuf_mergeable() + get_mergeable_buf_len()
+		/* Now XDP core assumes frag size is PAGE_SIZE, but buffers
+		 * with headroom may add hole in truesize, which
+		 * make their length exceed PAGE_SIZE. So we disabled the
+		 * hole mechanism for xdp. See add_recvbuf_mergeable().
 		 */
 		frame_sz = headroom ? PAGE_SIZE : truesize;
 
-		/* This happens when rx buffer size is underestimated
-		 * or headroom is not enough because of the buffer
-		 * was refilled before XDP is set. This should only
-		 * happen for the first several packets, so we don't
-		 * care much about its performance.
+		/* This happens when headroom is not enough because
+		 * of the buffer was prefilled before XDP is set.
+		 * This should only happen for the first several packets.
+		 * In fact, vq reset can be used here to help us clean up
+		 * the prefilled buffers, but many existing devices do not
+		 * support it, and we don't want to bother users who are
+		 * using xdp normally.
 		 */
-		if (unlikely(num_buf > 1 ||
-			     headroom < virtnet_get_headroom(vi))) {
+		if (!xdp_prog->aux->xdp_has_frags &&
+		    (num_buf > 1 || headroom < virtnet_get_headroom(vi))) {
 			/* linearize data for XDP */
 			xdp_page = xdp_linearize_page(rq, &num_buf,
 						      page, offset,
@@ -1084,17 +1092,26 @@  static struct sk_buff *receive_mergeable(struct net_device *dev,
 			if (!xdp_page)
 				goto err_xdp;
 			offset = VIRTIO_XDP_HEADROOM;
+		} else if (unlikely(headroom < virtnet_get_headroom(vi))) {
+			if ((VIRTIO_XDP_HEADROOM + len + tailroom) > PAGE_SIZE)
+				goto err_xdp;
+
+			xdp_page = alloc_page(GFP_ATOMIC);
+			if (!xdp_page)
+				goto err_xdp;
+
+			memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM,
+			       page_address(page) + offset, len);
+			frame_sz = PAGE_SIZE;
+			offset = VIRTIO_XDP_HEADROOM;
 		} else {
 			xdp_page = page;
 		}
-
-		/* Allow consuming headroom but reserve enough space to push
-		 * the descriptor on if we get an XDP_TX return code.
-		 */
 		data = page_address(xdp_page) + offset;
-		xdp_init_buff(&xdp, frame_sz - vi->hdr_len, &rq->xdp_rxq);
-		xdp_prepare_buff(&xdp, data - VIRTIO_XDP_HEADROOM + vi->hdr_len,
-				 VIRTIO_XDP_HEADROOM, len - vi->hdr_len, true);
+		err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz,
+						 &num_buf, &xdp_frags_truesz, stats);
+		if (unlikely(err))
+			goto err_xdp_frags;
 
 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
 		stats->xdp_packets++;
@@ -1190,6 +1207,17 @@  static struct sk_buff *receive_mergeable(struct net_device *dev,
 				__free_pages(xdp_page, 0);
 			goto err_xdp;
 		}
+err_xdp_frags:
+		shinfo = xdp_get_shared_info_from_buff(&xdp);
+
+		if (unlikely(xdp_page != page))
+			__free_pages(xdp_page, 0);
+
+		for (i = 0; i < shinfo->nr_frags; i++) {
+			xdp_page = skb_frag_page(&shinfo->frags[i]);
+			put_page(xdp_page);
+		}
+		goto err_xdp;
 	}
 	rcu_read_unlock();