diff mbox series

[4/8] ublk: add segment parameter

Message ID 20250324134905.766777-5-ming.lei@redhat.com (mailing list archive)
State New
Headers show
Series ublk: cleanup & improvement & zc follow-up | expand

Checks

Context Check Description
shin/vmtest-for-next-PR success PR summary
shin/vmtest-for-next-VM_Test-1 success Logs for build-kernel
shin/vmtest-for-next-VM_Test-0 success Logs for build-kernel

Commit Message

Ming Lei March 24, 2025, 1:48 p.m. UTC
IO split is usually bad in io_uring world, since -EAGAIN is caused and
IO handling may have to fallback to io-wq, this way does hurt performance.

ublk starts to support zero copy recently, for avoiding unnecessary IO
split, ublk driver's segment limit should be aligned with backend
device's segment limit.

Another reason is that io_buffer_register_bvec() needs to allocate bvecs,
which number is aligned with ublk request segment number, so that big
memory allocation can be avoided by setting reasonable max_segments limit.

So add segment parameter for providing ublk server chance to align
segment limit with backend, and keep it reasonable from implementation
viewpoint.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/block/ublk_drv.c      | 15 ++++++++++++++-
 include/uapi/linux/ublk_cmd.h |  9 +++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

Comments

Caleb Sander Mateos March 24, 2025, 10:26 p.m. UTC | #1
On Mon, Mar 24, 2025 at 6:49 AM Ming Lei <ming.lei@redhat.com> wrote:
>
> IO split is usually bad in io_uring world, since -EAGAIN is caused and
> IO handling may have to fallback to io-wq, this way does hurt performance.
>
> ublk starts to support zero copy recently, for avoiding unnecessary IO
> split, ublk driver's segment limit should be aligned with backend
> device's segment limit.
>
> Another reason is that io_buffer_register_bvec() needs to allocate bvecs,
> which number is aligned with ublk request segment number, so that big
> memory allocation can be avoided by setting reasonable max_segments limit.
>
> So add segment parameter for providing ublk server chance to align
> segment limit with backend, and keep it reasonable from implementation
> viewpoint.
>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>  drivers/block/ublk_drv.c      | 15 ++++++++++++++-
>  include/uapi/linux/ublk_cmd.h |  9 +++++++++
>  2 files changed, 23 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index acb6aed7be75..53a463681a41 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -74,7 +74,7 @@
>  #define UBLK_PARAM_TYPE_ALL                                \
>         (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD | \
>          UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED |    \
> -        UBLK_PARAM_TYPE_DMA_ALIGN)
> +        UBLK_PARAM_TYPE_DMA_ALIGN | UBLK_PARAM_TYPE_SEGMENT)
>
>  struct ublk_rq_data {
>         struct kref ref;
> @@ -580,6 +580,13 @@ static int ublk_validate_params(const struct ublk_device *ub)
>                         return -EINVAL;
>         }
>
> +       if (ub->params.types & UBLK_PARAM_TYPE_SEGMENT) {
> +               const struct ublk_param_segment *p = &ub->params.seg;
> +
> +               if (!is_power_of_2(p->seg_boundary_mask + 1))
> +                       return -EINVAL;

Looking at blk_validate_limits(), it seems like there are some
additional requirements? Looks like seg_boundary_mask has to be at
least PAGE_SIZE - 1 and max_segment_size has to be at least PAGE_SIZE
if virt_boundary_mask is set?

Aside from that, this looks good to me.

Best,
Caleb

> +       }
> +
>         return 0;
>  }
>
> @@ -2350,6 +2357,12 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
>         if (ub->params.types & UBLK_PARAM_TYPE_DMA_ALIGN)
>                 lim.dma_alignment = ub->params.dma.alignment;
>
> +       if (ub->params.types & UBLK_PARAM_TYPE_SEGMENT) {
> +               lim.seg_boundary_mask = ub->params.seg.seg_boundary_mask;
> +               lim.max_segment_size = ub->params.seg.max_segment_size;
> +               lim.max_segments = ub->params.seg.max_segments;
> +       }
> +
>         if (wait_for_completion_interruptible(&ub->completion) != 0)
>                 return -EINTR;
>
> diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
> index 7255b36b5cf6..83c2b94251f0 100644
> --- a/include/uapi/linux/ublk_cmd.h
> +++ b/include/uapi/linux/ublk_cmd.h
> @@ -410,6 +410,13 @@ struct ublk_param_dma_align {
>         __u8    pad[4];
>  };
>
> +struct ublk_param_segment {
> +       __u64   seg_boundary_mask;
> +       __u32   max_segment_size;
> +       __u16   max_segments;
> +       __u8    pad[2];
> +};
> +
>  struct ublk_params {
>         /*
>          * Total length of parameters, userspace has to set 'len' for both
> @@ -423,6 +430,7 @@ struct ublk_params {
>  #define UBLK_PARAM_TYPE_DEVT            (1 << 2)
>  #define UBLK_PARAM_TYPE_ZONED           (1 << 3)
>  #define UBLK_PARAM_TYPE_DMA_ALIGN       (1 << 4)
> +#define UBLK_PARAM_TYPE_SEGMENT         (1 << 5)
>         __u32   types;                  /* types of parameter included */
>
>         struct ublk_param_basic         basic;
> @@ -430,6 +438,7 @@ struct ublk_params {
>         struct ublk_param_devt          devt;
>         struct ublk_param_zoned zoned;
>         struct ublk_param_dma_align     dma;
> +       struct ublk_param_segment       seg;
>  };
>
>  #endif
> --
> 2.47.0
>
Ming Lei March 25, 2025, 1:15 a.m. UTC | #2
On Mon, Mar 24, 2025 at 03:26:06PM -0700, Caleb Sander Mateos wrote:
> On Mon, Mar 24, 2025 at 6:49 AM Ming Lei <ming.lei@redhat.com> wrote:
> >
> > IO split is usually bad in io_uring world, since -EAGAIN is caused and
> > IO handling may have to fallback to io-wq, this way does hurt performance.
> >
> > ublk starts to support zero copy recently, for avoiding unnecessary IO
> > split, ublk driver's segment limit should be aligned with backend
> > device's segment limit.
> >
> > Another reason is that io_buffer_register_bvec() needs to allocate bvecs,
> > which number is aligned with ublk request segment number, so that big
> > memory allocation can be avoided by setting reasonable max_segments limit.
> >
> > So add segment parameter for providing ublk server chance to align
> > segment limit with backend, and keep it reasonable from implementation
> > viewpoint.
> >
> > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > ---
> >  drivers/block/ublk_drv.c      | 15 ++++++++++++++-
> >  include/uapi/linux/ublk_cmd.h |  9 +++++++++
> >  2 files changed, 23 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> > index acb6aed7be75..53a463681a41 100644
> > --- a/drivers/block/ublk_drv.c
> > +++ b/drivers/block/ublk_drv.c
> > @@ -74,7 +74,7 @@
> >  #define UBLK_PARAM_TYPE_ALL                                \
> >         (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD | \
> >          UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED |    \
> > -        UBLK_PARAM_TYPE_DMA_ALIGN)
> > +        UBLK_PARAM_TYPE_DMA_ALIGN | UBLK_PARAM_TYPE_SEGMENT)
> >
> >  struct ublk_rq_data {
> >         struct kref ref;
> > @@ -580,6 +580,13 @@ static int ublk_validate_params(const struct ublk_device *ub)
> >                         return -EINVAL;
> >         }
> >
> > +       if (ub->params.types & UBLK_PARAM_TYPE_SEGMENT) {
> > +               const struct ublk_param_segment *p = &ub->params.seg;
> > +
> > +               if (!is_power_of_2(p->seg_boundary_mask + 1))
> > +                       return -EINVAL;
> 
> Looking at blk_validate_limits(), it seems like there are some
> additional requirements? Looks like seg_boundary_mask has to be at
> least PAGE_SIZE - 1

Yeah, it isn't done in ublk because block layer runs the check, and it
will be failed when starting the device. That said we take block layer's
default setting, which isn't good from UAPI viewpoint, since block
layer may change the default setting.

Also it is bad to associate device property with PAGE_SIZE which is
a variable actually. The latest kernel has replaced PAGE_SIZE with 4096
for segment limits.

I think we can take 4096 for validation here.

> and max_segment_size has to be at least PAGE_SIZE
> if virt_boundary_mask is set?

If virt_boundary_mask is set, max_segment_size will be ignored usually
except for some stacking devices.


Thanks,
Ming
Caleb Sander Mateos March 25, 2025, 7:43 p.m. UTC | #3
On Mon, Mar 24, 2025 at 6:16 PM Ming Lei <ming.lei@redhat.com> wrote:
>
> On Mon, Mar 24, 2025 at 03:26:06PM -0700, Caleb Sander Mateos wrote:
> > On Mon, Mar 24, 2025 at 6:49 AM Ming Lei <ming.lei@redhat.com> wrote:
> > >
> > > IO split is usually bad in io_uring world, since -EAGAIN is caused and
> > > IO handling may have to fallback to io-wq, this way does hurt performance.
> > >
> > > ublk starts to support zero copy recently, for avoiding unnecessary IO
> > > split, ublk driver's segment limit should be aligned with backend
> > > device's segment limit.
> > >
> > > Another reason is that io_buffer_register_bvec() needs to allocate bvecs,
> > > which number is aligned with ublk request segment number, so that big
> > > memory allocation can be avoided by setting reasonable max_segments limit.
> > >
> > > So add segment parameter for providing ublk server chance to align
> > > segment limit with backend, and keep it reasonable from implementation
> > > viewpoint.
> > >
> > > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > > ---
> > >  drivers/block/ublk_drv.c      | 15 ++++++++++++++-
> > >  include/uapi/linux/ublk_cmd.h |  9 +++++++++
> > >  2 files changed, 23 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> > > index acb6aed7be75..53a463681a41 100644
> > > --- a/drivers/block/ublk_drv.c
> > > +++ b/drivers/block/ublk_drv.c
> > > @@ -74,7 +74,7 @@
> > >  #define UBLK_PARAM_TYPE_ALL                                \
> > >         (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD | \
> > >          UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED |    \
> > > -        UBLK_PARAM_TYPE_DMA_ALIGN)
> > > +        UBLK_PARAM_TYPE_DMA_ALIGN | UBLK_PARAM_TYPE_SEGMENT)
> > >
> > >  struct ublk_rq_data {
> > >         struct kref ref;
> > > @@ -580,6 +580,13 @@ static int ublk_validate_params(const struct ublk_device *ub)
> > >                         return -EINVAL;
> > >         }
> > >
> > > +       if (ub->params.types & UBLK_PARAM_TYPE_SEGMENT) {
> > > +               const struct ublk_param_segment *p = &ub->params.seg;
> > > +
> > > +               if (!is_power_of_2(p->seg_boundary_mask + 1))
> > > +                       return -EINVAL;
> >
> > Looking at blk_validate_limits(), it seems like there are some
> > additional requirements? Looks like seg_boundary_mask has to be at
> > least PAGE_SIZE - 1
>
> Yeah, it isn't done in ublk because block layer runs the check, and it
> will be failed when starting the device. That said we take block layer's
> default setting, which isn't good from UAPI viewpoint, since block
> layer may change the default setting.

Even though blk_validate_limits() rejects it, it appears to log a
warning. That seems undesirable for something controllable from
userspace.
/*
 * By default there is no limit on the segment boundary alignment,
 * but if there is one it can't be smaller than the page size as
 * that would break all the normal I/O patterns.
 */
if (!lim->seg_boundary_mask)
        lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
if (WARN_ON_ONCE(lim->seg_boundary_mask < BLK_MIN_SEGMENT_SIZE - 1))
        return -EINVAL;

>
> Also it is bad to associate device property with PAGE_SIZE which is
> a variable actually. The latest kernel has replaced PAGE_SIZE with 4096
> for segment limits.
>
> I think we can take 4096 for validation here.
>
> > and max_segment_size has to be at least PAGE_SIZE
> > if virt_boundary_mask is set?
>
> If virt_boundary_mask is set, max_segment_size will be ignored usually
> except for some stacking devices.

Sorry, I had it backwards. The requirement is if virt_boundary_mask is
*not* set:
/*
 * Stacking device may have both virtual boundary and max segment
 * size limit, so allow this setting now, and long-term the two
 * might need to move out of stacking limits since we have immutable
 * bvec and lower layer bio splitting is supposed to handle the two
 * correctly.
 */
if (lim->virt_boundary_mask) {
        if (!lim->max_segment_size)
                lim->max_segment_size = UINT_MAX;
} else {
        /*
         * The maximum segment size has an odd historic 64k default that
         * drivers probably should override.  Just like the I/O size we
         * require drivers to at least handle a full page per segment.
         */
        if (!lim->max_segment_size)
                lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
        if (WARN_ON_ONCE(lim->max_segment_size < BLK_MIN_SEGMENT_SIZE))
                return -EINVAL;
}

Best,
Caleb
Ming Lei March 26, 2025, 2:17 a.m. UTC | #4
On Tue, Mar 25, 2025 at 12:43:26PM -0700, Caleb Sander Mateos wrote:
> On Mon, Mar 24, 2025 at 6:16 PM Ming Lei <ming.lei@redhat.com> wrote:
> >
> > On Mon, Mar 24, 2025 at 03:26:06PM -0700, Caleb Sander Mateos wrote:
> > > On Mon, Mar 24, 2025 at 6:49 AM Ming Lei <ming.lei@redhat.com> wrote:
> > > >
> > > > IO split is usually bad in io_uring world, since -EAGAIN is caused and
> > > > IO handling may have to fallback to io-wq, this way does hurt performance.
> > > >
> > > > ublk starts to support zero copy recently, for avoiding unnecessary IO
> > > > split, ublk driver's segment limit should be aligned with backend
> > > > device's segment limit.
> > > >
> > > > Another reason is that io_buffer_register_bvec() needs to allocate bvecs,
> > > > which number is aligned with ublk request segment number, so that big
> > > > memory allocation can be avoided by setting reasonable max_segments limit.
> > > >
> > > > So add segment parameter for providing ublk server chance to align
> > > > segment limit with backend, and keep it reasonable from implementation
> > > > viewpoint.
> > > >
> > > > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > > > ---
> > > >  drivers/block/ublk_drv.c      | 15 ++++++++++++++-
> > > >  include/uapi/linux/ublk_cmd.h |  9 +++++++++
> > > >  2 files changed, 23 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> > > > index acb6aed7be75..53a463681a41 100644
> > > > --- a/drivers/block/ublk_drv.c
> > > > +++ b/drivers/block/ublk_drv.c
> > > > @@ -74,7 +74,7 @@
> > > >  #define UBLK_PARAM_TYPE_ALL                                \
> > > >         (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD | \
> > > >          UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED |    \
> > > > -        UBLK_PARAM_TYPE_DMA_ALIGN)
> > > > +        UBLK_PARAM_TYPE_DMA_ALIGN | UBLK_PARAM_TYPE_SEGMENT)
> > > >
> > > >  struct ublk_rq_data {
> > > >         struct kref ref;
> > > > @@ -580,6 +580,13 @@ static int ublk_validate_params(const struct ublk_device *ub)
> > > >                         return -EINVAL;
> > > >         }
> > > >
> > > > +       if (ub->params.types & UBLK_PARAM_TYPE_SEGMENT) {
> > > > +               const struct ublk_param_segment *p = &ub->params.seg;
> > > > +
> > > > +               if (!is_power_of_2(p->seg_boundary_mask + 1))
> > > > +                       return -EINVAL;
> > >
> > > Looking at blk_validate_limits(), it seems like there are some
> > > additional requirements? Looks like seg_boundary_mask has to be at
> > > least PAGE_SIZE - 1
> >
> > Yeah, it isn't done in ublk because block layer runs the check, and it
> > will be failed when starting the device. That said we take block layer's
> > default setting, which isn't good from UAPI viewpoint, since block
> > layer may change the default setting.
> 
> Even though blk_validate_limits() rejects it, it appears to log a
> warning. That seems undesirable for something controllable from
> userspace.
> /*
>  * By default there is no limit on the segment boundary alignment,
>  * but if there is one it can't be smaller than the page size as
>  * that would break all the normal I/O patterns.
>  */
> if (!lim->seg_boundary_mask)
>         lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
> if (WARN_ON_ONCE(lim->seg_boundary_mask < BLK_MIN_SEGMENT_SIZE - 1))
>         return -EINVAL;

Yes, it has been addressed in my local version, and we need to make it
a hw/sw interface.

> 
> >
> > Also it is bad to associate device property with PAGE_SIZE which is
> > a variable actually. The latest kernel has replaced PAGE_SIZE with 4096
> > for segment limits.
> >
> > I think we can take 4096 for validation here.
> >
> > > and max_segment_size has to be at least PAGE_SIZE
> > > if virt_boundary_mask is set?
> >
> > If virt_boundary_mask is set, max_segment_size will be ignored usually
> > except for some stacking devices.
> 
> Sorry, I had it backwards. The requirement is if virt_boundary_mask is
> *not* set:
> /*
>  * Stacking device may have both virtual boundary and max segment
>  * size limit, so allow this setting now, and long-term the two
>  * might need to move out of stacking limits since we have immutable
>  * bvec and lower layer bio splitting is supposed to handle the two
>  * correctly.
>  */
> if (lim->virt_boundary_mask) {
>         if (!lim->max_segment_size)
>                 lim->max_segment_size = UINT_MAX;
> } else {
>         /*
>          * The maximum segment size has an odd historic 64k default that
>          * drivers probably should override.  Just like the I/O size we
>          * require drivers to at least handle a full page per segment.
>          */
>         if (!lim->max_segment_size)
>                 lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
>         if (WARN_ON_ONCE(lim->max_segment_size < BLK_MIN_SEGMENT_SIZE))
>                 return -EINVAL;
> }

Right.

Please feel free to see if the revised patch is good:


From 0718b9f130b3bc9b9b06907c687fb5b9eea172f7 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 24 Mar 2025 12:33:59 +0000
Subject: [PATCH V2 3/8] ublk: add segment parameter

IO split is usually bad in io_uring world, since -EAGAIN is caused and
IO handling may have to fallback to io-wq, this way does hurt performance.

ublk starts to support zero copy recently, for avoiding unnecessary IO
split, ublk driver's segment limit should be aligned with backend
device's segment limit.

Another reason is that io_buffer_register_bvec() needs to allocate bvecs,
which number is aligned with ublk request segment number, so that big
memory allocation can be avoided by setting reasonable max_segments limit.

So add segment parameter for providing ublk server chance to align
segment limit with backend, and keep it reasonable from implementation
viewpoint.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/block/ublk_drv.c      | 20 +++++++++++++++++++-
 include/uapi/linux/ublk_cmd.h | 21 +++++++++++++++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 6fa1384c6436..6367476cef2b 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -74,7 +74,7 @@
 #define UBLK_PARAM_TYPE_ALL                                \
 	(UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD | \
 	 UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED |    \
-	 UBLK_PARAM_TYPE_DMA_ALIGN)
+	 UBLK_PARAM_TYPE_DMA_ALIGN | UBLK_PARAM_TYPE_SEGMENT)
 
 struct ublk_rq_data {
 	struct kref ref;
@@ -580,6 +580,18 @@ static int ublk_validate_params(const struct ublk_device *ub)
 			return -EINVAL;
 	}
 
+	if (ub->params.types & UBLK_PARAM_TYPE_SEGMENT) {
+		const struct ublk_param_segment *p = &ub->params.seg;
+
+		if (!is_power_of_2(p->seg_boundary_mask + 1))
+			return -EINVAL;
+
+		if (p->seg_boundary_mask + 1 < UBLK_MIN_SEGMENT_SIZE)
+			return -EINVAL;
+		if (p->max_segment_size < UBLK_MIN_SEGMENT_SIZE)
+			return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -2346,6 +2358,12 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
 	if (ub->params.types & UBLK_PARAM_TYPE_DMA_ALIGN)
 		lim.dma_alignment = ub->params.dma.alignment;
 
+	if (ub->params.types & UBLK_PARAM_TYPE_SEGMENT) {
+		lim.seg_boundary_mask = ub->params.seg.seg_boundary_mask;
+		lim.max_segment_size = ub->params.seg.max_segment_size;
+		lim.max_segments = ub->params.seg.max_segments;
+	}
+
 	if (wait_for_completion_interruptible(&ub->completion) != 0)
 		return -EINTR;
 
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
index 7255b36b5cf6..ffa805b05141 100644
--- a/include/uapi/linux/ublk_cmd.h
+++ b/include/uapi/linux/ublk_cmd.h
@@ -410,6 +410,25 @@ struct ublk_param_dma_align {
 	__u8	pad[4];
 };
 
+#define UBLK_MIN_SEGMENT_SIZE   4096
+struct ublk_param_segment {
+	/*
+	 * seg_boundary_mask + 1 needs to be power_of_2(), and the sum has
+	 * to be >= UBLK_MIN_SEGMENT_SIZE(4096)
+	 */
+	__u64 	seg_boundary_mask;
+
+	/*
+	 * max_segment_size could be override by virt_boundary_mask, so be
+	 * careful when setting both.
+	 *
+	 * max_segment_size has to be >= UBLK_MIN_SEGMENT_SIZE(4096)
+	 */
+	__u32 	max_segment_size;
+	__u16 	max_segments;
+	__u8	pad[2];
+};
+
 struct ublk_params {
 	/*
 	 * Total length of parameters, userspace has to set 'len' for both
@@ -423,6 +442,7 @@ struct ublk_params {
 #define UBLK_PARAM_TYPE_DEVT            (1 << 2)
 #define UBLK_PARAM_TYPE_ZONED           (1 << 3)
 #define UBLK_PARAM_TYPE_DMA_ALIGN       (1 << 4)
+#define UBLK_PARAM_TYPE_SEGMENT         (1 << 5)
 	__u32	types;			/* types of parameter included */
 
 	struct ublk_param_basic		basic;
@@ -430,6 +450,7 @@ struct ublk_params {
 	struct ublk_param_devt		devt;
 	struct ublk_param_zoned	zoned;
 	struct ublk_param_dma_align	dma;
+	struct ublk_param_segment	seg;
 };
 
 #endif
diff mbox series

Patch

diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index acb6aed7be75..53a463681a41 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -74,7 +74,7 @@ 
 #define UBLK_PARAM_TYPE_ALL                                \
 	(UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD | \
 	 UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED |    \
-	 UBLK_PARAM_TYPE_DMA_ALIGN)
+	 UBLK_PARAM_TYPE_DMA_ALIGN | UBLK_PARAM_TYPE_SEGMENT)
 
 struct ublk_rq_data {
 	struct kref ref;
@@ -580,6 +580,13 @@  static int ublk_validate_params(const struct ublk_device *ub)
 			return -EINVAL;
 	}
 
+	if (ub->params.types & UBLK_PARAM_TYPE_SEGMENT) {
+		const struct ublk_param_segment *p = &ub->params.seg;
+
+		if (!is_power_of_2(p->seg_boundary_mask + 1))
+			return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -2350,6 +2357,12 @@  static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
 	if (ub->params.types & UBLK_PARAM_TYPE_DMA_ALIGN)
 		lim.dma_alignment = ub->params.dma.alignment;
 
+	if (ub->params.types & UBLK_PARAM_TYPE_SEGMENT) {
+		lim.seg_boundary_mask = ub->params.seg.seg_boundary_mask;
+		lim.max_segment_size = ub->params.seg.max_segment_size;
+		lim.max_segments = ub->params.seg.max_segments;
+	}
+
 	if (wait_for_completion_interruptible(&ub->completion) != 0)
 		return -EINTR;
 
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
index 7255b36b5cf6..83c2b94251f0 100644
--- a/include/uapi/linux/ublk_cmd.h
+++ b/include/uapi/linux/ublk_cmd.h
@@ -410,6 +410,13 @@  struct ublk_param_dma_align {
 	__u8	pad[4];
 };
 
+struct ublk_param_segment {
+	__u64 	seg_boundary_mask;
+	__u32 	max_segment_size;
+	__u16 	max_segments;
+	__u8	pad[2];
+};
+
 struct ublk_params {
 	/*
 	 * Total length of parameters, userspace has to set 'len' for both
@@ -423,6 +430,7 @@  struct ublk_params {
 #define UBLK_PARAM_TYPE_DEVT            (1 << 2)
 #define UBLK_PARAM_TYPE_ZONED           (1 << 3)
 #define UBLK_PARAM_TYPE_DMA_ALIGN       (1 << 4)
+#define UBLK_PARAM_TYPE_SEGMENT         (1 << 5)
 	__u32	types;			/* types of parameter included */
 
 	struct ublk_param_basic		basic;
@@ -430,6 +438,7 @@  struct ublk_params {
 	struct ublk_param_devt		devt;
 	struct ublk_param_zoned	zoned;
 	struct ublk_param_dma_align	dma;
+	struct ublk_param_segment	seg;
 };
 
 #endif