Message ID | 20240130114224.86536-3-xuanzhuo@linux.alibaba.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | virtio: drivers maintain dma info for premapped vq | expand |
On Tue, Jan 30, 2024 at 7:42 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote: > > In the functions vring_unmap_extra_packed and vring_unmap_desc_packed, > multiple checks are made whether unmap is performed and whether it is > INDIRECT. > > These two functions are usually called in a loop, and we should put the > check outside the loop. > > And we unmap the descs with VRING_DESC_F_INDIRECT on the same path with > other descs, that make the thing more complex. If we distinguish the > descs with VRING_DESC_F_INDIRECT before unmap, thing will be clearer. > > 1. only one desc of the desc table is used, we do not need the loop > 2. the called unmap api is difference from the other desc > 3. the vq->premapped is not needed to check > 4. the vq->indirect is not needed to check > 5. the state->indir_desc must not be null > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> > --- > drivers/virtio/virtio_ring.c | 76 ++++++++++++++++++------------------ > 1 file changed, 39 insertions(+), 37 deletions(-) > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > index 4677831e6c26..7280a1706cca 100644 > --- a/drivers/virtio/virtio_ring.c > +++ b/drivers/virtio/virtio_ring.c > @@ -1220,6 +1220,7 @@ static u16 packed_last_used(u16 last_used_idx) > return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR)); > } > > +/* caller must check vring_need_unmap_buffer() */ > static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, > const struct vring_desc_extra *extra) > { > @@ -1227,33 +1228,18 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, > > flags = extra->flags; > > - if (flags & VRING_DESC_F_INDIRECT) { > - if (!vq->use_dma_api) > - return; > - > - dma_unmap_single(vring_dma_dev(vq), > - extra->addr, extra->len, > - (flags & VRING_DESC_F_WRITE) ? > - DMA_FROM_DEVICE : DMA_TO_DEVICE); > - } else { > - if (!vring_need_unmap_buffer(vq)) > - return; > - > - dma_unmap_page(vring_dma_dev(vq), > - extra->addr, extra->len, > - (flags & VRING_DESC_F_WRITE) ? > - DMA_FROM_DEVICE : DMA_TO_DEVICE); > - } > + dma_unmap_page(vring_dma_dev(vq), > + extra->addr, extra->len, > + (flags & VRING_DESC_F_WRITE) ? > + DMA_FROM_DEVICE : DMA_TO_DEVICE); > } > > +/* caller must check vring_need_unmap_buffer() */ > static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, > const struct vring_packed_desc *desc) > { > u16 flags; > > - if (!vring_need_unmap_buffer(vq)) > - return; > - > flags = le16_to_cpu(desc->flags); > > dma_unmap_page(vring_dma_dev(vq), > @@ -1329,7 +1315,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, > total_sg * sizeof(struct vring_packed_desc), > DMA_TO_DEVICE); > if (vring_mapping_error(vq, addr)) { > - if (vq->premapped) > + if (!vring_need_unmap_buffer(vq)) > goto free_desc; > > goto unmap_release; > @@ -1344,10 +1330,11 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, > vq->packed.desc_extra[id].addr = addr; > vq->packed.desc_extra[id].len = total_sg * > sizeof(struct vring_packed_desc); > - vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | > - vq->packed.avail_used_flags; > } > > + vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | > + vq->packed.avail_used_flags; Is this a bug fix? Or if we only need to check _F_INDIRECT, we can simply avoid doing this by checking vq->indirect && state->indir_desc? > + > /* > * A driver MUST NOT make the first descriptor in the list > * available before all subsequent descriptors comprising > @@ -1388,6 +1375,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, > unmap_release: > err_idx = i; > > + WARN_ON(!vring_need_unmap_buffer(vq)); Nitpick, using BUG_ON might be better as it may lead to unexpected results which we can't recover from. > + > for (i = 0; i < err_idx; i++) > vring_unmap_desc_packed(vq, &desc[i]); > > @@ -1484,9 +1473,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, > if (unlikely(vring_need_unmap_buffer(vq))) { > vq->packed.desc_extra[curr].addr = addr; > vq->packed.desc_extra[curr].len = sg->length; > - vq->packed.desc_extra[curr].flags = > - le16_to_cpu(flags); > } > + > + vq->packed.desc_extra[curr].flags = le16_to_cpu(flags); > + > prev = curr; > curr = vq->packed.desc_extra[curr].next; > > @@ -1536,6 +1526,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, > > vq->packed.avail_used_flags = avail_used_flags; > > + WARN_ON(!vring_need_unmap_buffer(vq)); > + > for (n = 0; n < total_sg; n++) { > if (i == err_idx) > break; > @@ -1605,7 +1597,9 @@ static void detach_buf_packed(struct vring_virtqueue *vq, > struct vring_desc_state_packed *state = NULL; > struct vring_packed_desc *desc; > unsigned int i, curr; > + u16 flags; > > + flags = vq->packed.desc_extra[id].flags; > state = &vq->packed.desc_state[id]; > > /* Clear data ptr. */ > @@ -1615,22 +1609,32 @@ static void detach_buf_packed(struct vring_virtqueue *vq, > vq->free_head = id; > vq->vq.num_free += state->num; > > - if (unlikely(vring_need_unmap_buffer(vq))) { > - curr = id; > - for (i = 0; i < state->num; i++) { > - vring_unmap_extra_packed(vq, > - &vq->packed.desc_extra[curr]); > - curr = vq->packed.desc_extra[curr].next; > + if (!(flags & VRING_DESC_F_INDIRECT)) { > + if (vring_need_unmap_buffer(vq)) { > + curr = id; > + for (i = 0; i < state->num; i++) { > + vring_unmap_extra_packed(vq, > + &vq->packed.desc_extra[curr]); > + curr = vq->packed.desc_extra[curr].next; > + } So before the change, we had: if (unlikely(vq->do_unmap)) { curr = id; for (i = 0; i < state->num; i++) { vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]); curr = vq->packed.desc_extra[curr].next; } } This looks like a bug as we should unmap the indirect descriptor regradless of whether do_unmap is true or false. If yes, we need a independent fix instead of squashing it in this patch? > } > - } > > - if (vq->indirect) { > + if (ctx) > + *ctx = state->indir_desc; > + } else { > + const struct vring_desc_extra *extra; > u32 len; > > + if (vq->use_dma_api) { > + extra = &vq->packed.desc_extra[id]; > + dma_unmap_single(vring_dma_dev(vq), > + extra->addr, extra->len, > + (flags & VRING_DESC_F_WRITE) ? > + DMA_FROM_DEVICE : DMA_TO_DEVICE); > + } > + Thanks
On Wed, 31 Jan 2024 17:12:19 +0800, Jason Wang <jasowang@redhat.com> wrote: > On Tue, Jan 30, 2024 at 7:42 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote: > > > > In the functions vring_unmap_extra_packed and vring_unmap_desc_packed, > > multiple checks are made whether unmap is performed and whether it is > > INDIRECT. > > > > These two functions are usually called in a loop, and we should put the > > check outside the loop. > > > > And we unmap the descs with VRING_DESC_F_INDIRECT on the same path with > > other descs, that make the thing more complex. If we distinguish the > > descs with VRING_DESC_F_INDIRECT before unmap, thing will be clearer. > > > > 1. only one desc of the desc table is used, we do not need the loop > > 2. the called unmap api is difference from the other desc > > 3. the vq->premapped is not needed to check > > 4. the vq->indirect is not needed to check > > 5. the state->indir_desc must not be null > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> > > --- > > drivers/virtio/virtio_ring.c | 76 ++++++++++++++++++------------------ > > 1 file changed, 39 insertions(+), 37 deletions(-) > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > > index 4677831e6c26..7280a1706cca 100644 > > --- a/drivers/virtio/virtio_ring.c > > +++ b/drivers/virtio/virtio_ring.c > > @@ -1220,6 +1220,7 @@ static u16 packed_last_used(u16 last_used_idx) > > return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR)); > > } > > > > +/* caller must check vring_need_unmap_buffer() */ > > static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, > > const struct vring_desc_extra *extra) > > { > > @@ -1227,33 +1228,18 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, > > > > flags = extra->flags; > > > > - if (flags & VRING_DESC_F_INDIRECT) { > > - if (!vq->use_dma_api) > > - return; > > - > > - dma_unmap_single(vring_dma_dev(vq), > > - extra->addr, extra->len, > > - (flags & VRING_DESC_F_WRITE) ? > > - DMA_FROM_DEVICE : DMA_TO_DEVICE); > > - } else { > > - if (!vring_need_unmap_buffer(vq)) > > - return; > > - > > - dma_unmap_page(vring_dma_dev(vq), > > - extra->addr, extra->len, > > - (flags & VRING_DESC_F_WRITE) ? > > - DMA_FROM_DEVICE : DMA_TO_DEVICE); > > - } > > + dma_unmap_page(vring_dma_dev(vq), > > + extra->addr, extra->len, > > + (flags & VRING_DESC_F_WRITE) ? > > + DMA_FROM_DEVICE : DMA_TO_DEVICE); > > } > > > > +/* caller must check vring_need_unmap_buffer() */ > > static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, > > const struct vring_packed_desc *desc) > > { > > u16 flags; > > > > - if (!vring_need_unmap_buffer(vq)) > > - return; > > - > > flags = le16_to_cpu(desc->flags); > > > > dma_unmap_page(vring_dma_dev(vq), > > @@ -1329,7 +1315,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, > > total_sg * sizeof(struct vring_packed_desc), > > DMA_TO_DEVICE); > > if (vring_mapping_error(vq, addr)) { > > - if (vq->premapped) > > + if (!vring_need_unmap_buffer(vq)) > > goto free_desc; > > > > goto unmap_release; > > @@ -1344,10 +1330,11 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, > > vq->packed.desc_extra[id].addr = addr; > > vq->packed.desc_extra[id].len = total_sg * > > sizeof(struct vring_packed_desc); > > - vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | > > - vq->packed.avail_used_flags; > > } > > > > + vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | > > + vq->packed.avail_used_flags; > > Is this a bug fix? Or if we only need to check _F_INDIRECT, we can > simply avoid doing this by checking vq->indirect && state->indir_desc? > > > + > > /* > > * A driver MUST NOT make the first descriptor in the list > > * available before all subsequent descriptors comprising > > @@ -1388,6 +1375,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, > > unmap_release: > > err_idx = i; > > > > + WARN_ON(!vring_need_unmap_buffer(vq)); > > Nitpick, using BUG_ON might be better as it may lead to unexpected > results which we can't recover from. the checkpatch.pl does not like BUG_ON. I have not preference. > > > + > > for (i = 0; i < err_idx; i++) > > vring_unmap_desc_packed(vq, &desc[i]); > > > > @@ -1484,9 +1473,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, > > if (unlikely(vring_need_unmap_buffer(vq))) { > > vq->packed.desc_extra[curr].addr = addr; > > vq->packed.desc_extra[curr].len = sg->length; > > - vq->packed.desc_extra[curr].flags = > > - le16_to_cpu(flags); > > } > > + > > + vq->packed.desc_extra[curr].flags = le16_to_cpu(flags); > > + > > prev = curr; > > curr = vq->packed.desc_extra[curr].next; > > > > @@ -1536,6 +1526,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, > > > > vq->packed.avail_used_flags = avail_used_flags; > > > > + WARN_ON(!vring_need_unmap_buffer(vq)); > > + > > for (n = 0; n < total_sg; n++) { > > if (i == err_idx) > > break; > > @@ -1605,7 +1597,9 @@ static void detach_buf_packed(struct vring_virtqueue *vq, > > struct vring_desc_state_packed *state = NULL; > > struct vring_packed_desc *desc; > > unsigned int i, curr; > > + u16 flags; > > > > + flags = vq->packed.desc_extra[id].flags; > > state = &vq->packed.desc_state[id]; > > > > /* Clear data ptr. */ > > @@ -1615,22 +1609,32 @@ static void detach_buf_packed(struct vring_virtqueue *vq, > > vq->free_head = id; > > vq->vq.num_free += state->num; > > > > - if (unlikely(vring_need_unmap_buffer(vq))) { > > - curr = id; > > - for (i = 0; i < state->num; i++) { > > - vring_unmap_extra_packed(vq, > > - &vq->packed.desc_extra[curr]); > > - curr = vq->packed.desc_extra[curr].next; > > + if (!(flags & VRING_DESC_F_INDIRECT)) { > > + if (vring_need_unmap_buffer(vq)) { > > + curr = id; > > + for (i = 0; i < state->num; i++) { > > + vring_unmap_extra_packed(vq, > > + &vq->packed.desc_extra[curr]); > > + curr = vq->packed.desc_extra[curr].next; > > + } > > So before the change, we had: > > if (unlikely(vq->do_unmap)) { > curr = id; > for (i = 0; i < state->num; i++) { > vring_unmap_extra_packed(vq, > &vq->packed.desc_extra[curr]); > curr = vq->packed.desc_extra[curr].next; > } > } > > This looks like a bug as we should unmap the indirect descriptor > regradless of whether do_unmap is true or false. > > If yes, we need a independent fix instead of squashing it in this patch? YES. I noticed this. I will post a fix to the stable branch. Thanks. > > > } > > - } > > > > - if (vq->indirect) { > > + if (ctx) > > + *ctx = state->indir_desc; > > + } else { > > + const struct vring_desc_extra *extra; > > u32 len; > > > > + if (vq->use_dma_api) { > > + extra = &vq->packed.desc_extra[id]; > > + dma_unmap_single(vring_dma_dev(vq), > > + extra->addr, extra->len, > > + (flags & VRING_DESC_F_WRITE) ? > > + DMA_FROM_DEVICE : DMA_TO_DEVICE); > > + } > > + > > Thanks >
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 4677831e6c26..7280a1706cca 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1220,6 +1220,7 @@ static u16 packed_last_used(u16 last_used_idx) return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR)); } +/* caller must check vring_need_unmap_buffer() */ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, const struct vring_desc_extra *extra) { @@ -1227,33 +1228,18 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, flags = extra->flags; - if (flags & VRING_DESC_F_INDIRECT) { - if (!vq->use_dma_api) - return; - - dma_unmap_single(vring_dma_dev(vq), - extra->addr, extra->len, - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } else { - if (!vring_need_unmap_buffer(vq)) - return; - - dma_unmap_page(vring_dma_dev(vq), - extra->addr, extra->len, - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } + dma_unmap_page(vring_dma_dev(vq), + extra->addr, extra->len, + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); } +/* caller must check vring_need_unmap_buffer() */ static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, const struct vring_packed_desc *desc) { u16 flags; - if (!vring_need_unmap_buffer(vq)) - return; - flags = le16_to_cpu(desc->flags); dma_unmap_page(vring_dma_dev(vq), @@ -1329,7 +1315,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, total_sg * sizeof(struct vring_packed_desc), DMA_TO_DEVICE); if (vring_mapping_error(vq, addr)) { - if (vq->premapped) + if (!vring_need_unmap_buffer(vq)) goto free_desc; goto unmap_release; @@ -1344,10 +1330,11 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, vq->packed.desc_extra[id].addr = addr; vq->packed.desc_extra[id].len = total_sg * sizeof(struct vring_packed_desc); - vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | - vq->packed.avail_used_flags; } + vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | + vq->packed.avail_used_flags; + /* * A driver MUST NOT make the first descriptor in the list * available before all subsequent descriptors comprising @@ -1388,6 +1375,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, unmap_release: err_idx = i; + WARN_ON(!vring_need_unmap_buffer(vq)); + for (i = 0; i < err_idx; i++) vring_unmap_desc_packed(vq, &desc[i]); @@ -1484,9 +1473,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, if (unlikely(vring_need_unmap_buffer(vq))) { vq->packed.desc_extra[curr].addr = addr; vq->packed.desc_extra[curr].len = sg->length; - vq->packed.desc_extra[curr].flags = - le16_to_cpu(flags); } + + vq->packed.desc_extra[curr].flags = le16_to_cpu(flags); + prev = curr; curr = vq->packed.desc_extra[curr].next; @@ -1536,6 +1526,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, vq->packed.avail_used_flags = avail_used_flags; + WARN_ON(!vring_need_unmap_buffer(vq)); + for (n = 0; n < total_sg; n++) { if (i == err_idx) break; @@ -1605,7 +1597,9 @@ static void detach_buf_packed(struct vring_virtqueue *vq, struct vring_desc_state_packed *state = NULL; struct vring_packed_desc *desc; unsigned int i, curr; + u16 flags; + flags = vq->packed.desc_extra[id].flags; state = &vq->packed.desc_state[id]; /* Clear data ptr. */ @@ -1615,22 +1609,32 @@ static void detach_buf_packed(struct vring_virtqueue *vq, vq->free_head = id; vq->vq.num_free += state->num; - if (unlikely(vring_need_unmap_buffer(vq))) { - curr = id; - for (i = 0; i < state->num; i++) { - vring_unmap_extra_packed(vq, - &vq->packed.desc_extra[curr]); - curr = vq->packed.desc_extra[curr].next; + if (!(flags & VRING_DESC_F_INDIRECT)) { + if (vring_need_unmap_buffer(vq)) { + curr = id; + for (i = 0; i < state->num; i++) { + vring_unmap_extra_packed(vq, + &vq->packed.desc_extra[curr]); + curr = vq->packed.desc_extra[curr].next; + } } - } - if (vq->indirect) { + if (ctx) + *ctx = state->indir_desc; + } else { + const struct vring_desc_extra *extra; u32 len; + if (vq->use_dma_api) { + extra = &vq->packed.desc_extra[id]; + dma_unmap_single(vring_dma_dev(vq), + extra->addr, extra->len, + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + } + /* Free the indirect table, if any, now that it's unmapped. */ desc = state->indir_desc; - if (!desc) - return; if (vring_need_unmap_buffer(vq)) { len = vq->packed.desc_extra[id].len; @@ -1640,8 +1644,6 @@ static void detach_buf_packed(struct vring_virtqueue *vq, } kfree(desc); state->indir_desc = NULL; - } else if (ctx) { - *ctx = state->indir_desc; } }
In the functions vring_unmap_extra_packed and vring_unmap_desc_packed, multiple checks are made whether unmap is performed and whether it is INDIRECT. These two functions are usually called in a loop, and we should put the check outside the loop. And we unmap the descs with VRING_DESC_F_INDIRECT on the same path with other descs, that make the thing more complex. If we distinguish the descs with VRING_DESC_F_INDIRECT before unmap, thing will be clearer. 1. only one desc of the desc table is used, we do not need the loop 2. the called unmap api is difference from the other desc 3. the vq->premapped is not needed to check 4. the vq->indirect is not needed to check 5. the state->indir_desc must not be null Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 76 ++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 37 deletions(-)