diff mbox series

[v5,3/8] dmaengine: Add API function dmaengine_prep_slave_dma_vec()

Message ID 20231219175009.65482-4-paul@crapouillou.net (mailing list archive)
State Changes Requested
Headers show
Series iio: new DMABUF based API, v5 | expand

Commit Message

Paul Cercueil Dec. 19, 2023, 5:50 p.m. UTC
This function can be used to initiate a scatter-gather DMA transfer,
where the address and size of each segment is located in one entry of
the dma_vec array.

The major difference with dmaengine_prep_slave_sg() is that it supports
specifying the lengths of each DMA transfer; as trying to override the
length of the transfer with dmaengine_prep_slave_sg() is a very tedious
process. The introduction of a new API function is also justified by the
fact that scatterlists are on their way out.

Note that dmaengine_prep_interleaved_dma() is not helpful either in that
case, as it assumes that the address of each segment will be higher than
the one of the previous segment, which we just cannot guarantee in case
of a scatter-gather transfer.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>

---
v3: New patch

v5: Replace with function dmaengine_prep_slave_dma_vec(), and struct
    'dma_vec'.
    Note that at some point we will need to support cyclic transfers
    using dmaengine_prep_slave_dma_vec(). Maybe with a new "flags"
    parameter to the function?
---
 include/linux/dmaengine.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

Comments

Jonathan Cameron Dec. 21, 2023, 11:40 a.m. UTC | #1
On Tue, 19 Dec 2023 18:50:04 +0100
Paul Cercueil <paul@crapouillou.net> wrote:

> This function can be used to initiate a scatter-gather DMA transfer,
> where the address and size of each segment is located in one entry of
> the dma_vec array.
> 
> The major difference with dmaengine_prep_slave_sg() is that it supports
> specifying the lengths of each DMA transfer; as trying to override the
> length of the transfer with dmaengine_prep_slave_sg() is a very tedious
> process. The introduction of a new API function is also justified by the
> fact that scatterlists are on their way out.
> 
> Note that dmaengine_prep_interleaved_dma() is not helpful either in that
> case, as it assumes that the address of each segment will be higher than
> the one of the previous segment, which we just cannot guarantee in case
> of a scatter-gather transfer.
> 
> Signed-off-by: Paul Cercueil <paul@crapouillou.net>

This and the next patch look fine to me as clearly simplify things for
our usecases, but they are really something for the dmaengine maintainers
to comment on.

Jonathan
Vinod Koul Dec. 21, 2023, 3:14 p.m. UTC | #2
On 19-12-23, 18:50, Paul Cercueil wrote:
> This function can be used to initiate a scatter-gather DMA transfer,
> where the address and size of each segment is located in one entry of
> the dma_vec array.
> 
> The major difference with dmaengine_prep_slave_sg() is that it supports
> specifying the lengths of each DMA transfer; as trying to override the
> length of the transfer with dmaengine_prep_slave_sg() is a very tedious
> process. The introduction of a new API function is also justified by the
> fact that scatterlists are on their way out.
> 
> Note that dmaengine_prep_interleaved_dma() is not helpful either in that
> case, as it assumes that the address of each segment will be higher than
> the one of the previous segment, which we just cannot guarantee in case
> of a scatter-gather transfer.
> 
> Signed-off-by: Paul Cercueil <paul@crapouillou.net>
> 
> ---
> v3: New patch
> 
> v5: Replace with function dmaengine_prep_slave_dma_vec(), and struct
>     'dma_vec'.
>     Note that at some point we will need to support cyclic transfers
>     using dmaengine_prep_slave_dma_vec(). Maybe with a new "flags"
>     parameter to the function?
> ---
>  include/linux/dmaengine.h | 25 +++++++++++++++++++++++++
>  1 file changed, 25 insertions(+)
> 
> diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
> index 3df70d6131c8..ee5931ddb42f 100644
> --- a/include/linux/dmaengine.h
> +++ b/include/linux/dmaengine.h
> @@ -160,6 +160,16 @@ struct dma_interleaved_template {
>  	struct data_chunk sgl[];
>  };
>  
> +/**
> + * struct dma_vec - DMA vector
> + * @addr: Bus address of the start of the vector
> + * @len: Length in bytes of the DMA vector
> + */
> +struct dma_vec {
> +	dma_addr_t addr;
> +	size_t len;
> +};

so you want to transfer multiple buffers, right? why not use
dmaengine_prep_slave_sg(). If there is reason for not using that one?

Furthermore I missed replying to your email earlier on use of
dmaengine_prep_interleaved_dma(), my apologies.
That can be made to work for you as well. Please see the notes where icg
can be ignored and it does not need icg value to be set

Infact, interleaved api can be made to work in most of these cases I can
think of...


> +
>  /**
>   * enum dma_ctrl_flags - DMA flags to augment operation preparation,
>   *  control completion, and communicate status.
> @@ -910,6 +920,10 @@ struct dma_device {
>  	struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)(
>  		struct dma_chan *chan, unsigned long flags);
>  
> +	struct dma_async_tx_descriptor *(*device_prep_slave_dma_vec)(
> +		struct dma_chan *chan, const struct dma_vec *vecs,
> +		size_t nents, enum dma_transfer_direction direction,
> +		unsigned long flags);
>  	struct dma_async_tx_descriptor *(*device_prep_slave_sg)(
>  		struct dma_chan *chan, struct scatterlist *sgl,
>  		unsigned int sg_len, enum dma_transfer_direction direction,
> @@ -972,6 +986,17 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single(
>  						  dir, flags, NULL);
>  }
>  
> +static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_dma_vec(
> +	struct dma_chan *chan, const struct dma_vec *vecs, size_t nents,
> +	enum dma_transfer_direction dir, unsigned long flags)
> +{
> +	if (!chan || !chan->device || !chan->device->device_prep_slave_dma_vec)
> +		return NULL;
> +
> +	return chan->device->device_prep_slave_dma_vec(chan, vecs, nents,
> +						       dir, flags);
> +}
> +
>  static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_sg(
>  	struct dma_chan *chan, struct scatterlist *sgl,	unsigned int sg_len,
>  	enum dma_transfer_direction dir, unsigned long flags)
> -- 
> 2.43.0
Paul Cercueil Dec. 21, 2023, 3:29 p.m. UTC | #3
Hi Vinod,

Le jeudi 21 décembre 2023 à 20:44 +0530, Vinod Koul a écrit :
> On 19-12-23, 18:50, Paul Cercueil wrote:
> > This function can be used to initiate a scatter-gather DMA
> > transfer,
> > where the address and size of each segment is located in one entry
> > of
> > the dma_vec array.
> > 
> > The major difference with dmaengine_prep_slave_sg() is that it
> > supports
> > specifying the lengths of each DMA transfer; as trying to override
> > the
> > length of the transfer with dmaengine_prep_slave_sg() is a very
> > tedious
> > process. The introduction of a new API function is also justified
> > by the
> > fact that scatterlists are on their way out.
> > 
> > Note that dmaengine_prep_interleaved_dma() is not helpful either in
> > that
> > case, as it assumes that the address of each segment will be higher
> > than
> > the one of the previous segment, which we just cannot guarantee in
> > case
> > of a scatter-gather transfer.
> > 
> > Signed-off-by: Paul Cercueil <paul@crapouillou.net>
> > 
> > ---
> > v3: New patch
> > 
> > v5: Replace with function dmaengine_prep_slave_dma_vec(), and
> > struct
> >     'dma_vec'.
> >     Note that at some point we will need to support cyclic
> > transfers
> >     using dmaengine_prep_slave_dma_vec(). Maybe with a new "flags"
> >     parameter to the function?
> > ---
> >  include/linux/dmaengine.h | 25 +++++++++++++++++++++++++
> >  1 file changed, 25 insertions(+)
> > 
> > diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
> > index 3df70d6131c8..ee5931ddb42f 100644
> > --- a/include/linux/dmaengine.h
> > +++ b/include/linux/dmaengine.h
> > @@ -160,6 +160,16 @@ struct dma_interleaved_template {
> >  	struct data_chunk sgl[];
> >  };
> >  
> > +/**
> > + * struct dma_vec - DMA vector
> > + * @addr: Bus address of the start of the vector
> > + * @len: Length in bytes of the DMA vector
> > + */
> > +struct dma_vec {
> > +	dma_addr_t addr;
> > +	size_t len;
> > +};
> 
> so you want to transfer multiple buffers, right? why not use
> dmaengine_prep_slave_sg(). If there is reason for not using that one?

Well I think I answer that in the commit message, don't I?

> Furthermore I missed replying to your email earlier on use of
> dmaengine_prep_interleaved_dma(), my apologies.
> That can be made to work for you as well. Please see the notes where
> icg
> can be ignored and it does not need icg value to be set
> 
> Infact, interleaved api can be made to work in most of these cases I
> can
> think of...

So if I want to transfer 16 bytes from 0x10, then 16 bytes from 0x0,
then 16 bytes from 0x20, how should I configure the
dma_interleaved_template?

Cheers,
-Paul

> > +
> >  /**
> >   * enum dma_ctrl_flags - DMA flags to augment operation
> > preparation,
> >   *  control completion, and communicate status.
> > @@ -910,6 +920,10 @@ struct dma_device {
> >  	struct dma_async_tx_descriptor
> > *(*device_prep_dma_interrupt)(
> >  		struct dma_chan *chan, unsigned long flags);
> >  
> > +	struct dma_async_tx_descriptor
> > *(*device_prep_slave_dma_vec)(
> > +		struct dma_chan *chan, const struct dma_vec *vecs,
> > +		size_t nents, enum dma_transfer_direction
> > direction,
> > +		unsigned long flags);
> >  	struct dma_async_tx_descriptor *(*device_prep_slave_sg)(
> >  		struct dma_chan *chan, struct scatterlist *sgl,
> >  		unsigned int sg_len, enum dma_transfer_direction
> > direction,
> > @@ -972,6 +986,17 @@ static inline struct dma_async_tx_descriptor
> > *dmaengine_prep_slave_single(
> >  						  dir, flags,
> > NULL);
> >  }
> >  
> > +static inline struct dma_async_tx_descriptor
> > *dmaengine_prep_slave_dma_vec(
> > +	struct dma_chan *chan, const struct dma_vec *vecs, size_t
> > nents,
> > +	enum dma_transfer_direction dir, unsigned long flags)
> > +{
> > +	if (!chan || !chan->device || !chan->device-
> > >device_prep_slave_dma_vec)
> > +		return NULL;
> > +
> > +	return chan->device->device_prep_slave_dma_vec(chan, vecs,
> > nents,
> > +						       dir,
> > flags);
> > +}
> > +
> >  static inline struct dma_async_tx_descriptor
> > *dmaengine_prep_slave_sg(
> >  	struct dma_chan *chan, struct scatterlist
> > *sgl,	unsigned int sg_len,
> >  	enum dma_transfer_direction dir, unsigned long flags)
> > -- 
> > 2.43.0
>
Paul Cercueil Jan. 8, 2024, 12:20 p.m. UTC | #4
Hi Vinod,

Le jeudi 21 décembre 2023 à 20:44 +0530, Vinod Koul a écrit :
> On 19-12-23, 18:50, Paul Cercueil wrote:
> > This function can be used to initiate a scatter-gather DMA
> > transfer,
> > where the address and size of each segment is located in one entry
> > of
> > the dma_vec array.
> > 
> > The major difference with dmaengine_prep_slave_sg() is that it
> > supports
> > specifying the lengths of each DMA transfer; as trying to override
> > the
> > length of the transfer with dmaengine_prep_slave_sg() is a very
> > tedious
> > process. The introduction of a new API function is also justified
> > by the
> > fact that scatterlists are on their way out.
> > 
> > Note that dmaengine_prep_interleaved_dma() is not helpful either in
> > that
> > case, as it assumes that the address of each segment will be higher
> > than
> > the one of the previous segment, which we just cannot guarantee in
> > case
> > of a scatter-gather transfer.
> > 
> > Signed-off-by: Paul Cercueil <paul@crapouillou.net>
> > 
> > ---
> > v3: New patch
> > 
> > v5: Replace with function dmaengine_prep_slave_dma_vec(), and
> > struct
> >     'dma_vec'.
> >     Note that at some point we will need to support cyclic
> > transfers
> >     using dmaengine_prep_slave_dma_vec(). Maybe with a new "flags"
> >     parameter to the function?
> > ---
> >  include/linux/dmaengine.h | 25 +++++++++++++++++++++++++
> >  1 file changed, 25 insertions(+)
> > 
> > diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
> > index 3df70d6131c8..ee5931ddb42f 100644
> > --- a/include/linux/dmaengine.h
> > +++ b/include/linux/dmaengine.h
> > @@ -160,6 +160,16 @@ struct dma_interleaved_template {
> >  	struct data_chunk sgl[];
> >  };
> >  
> > +/**
> > + * struct dma_vec - DMA vector
> > + * @addr: Bus address of the start of the vector
> > + * @len: Length in bytes of the DMA vector
> > + */
> > +struct dma_vec {
> > +	dma_addr_t addr;
> > +	size_t len;
> > +};

I don't want to be pushy, but I'd like to know how to solve this now,
otherwise I'll just send the same patches for my v6.

> so you want to transfer multiple buffers, right? why not use
> dmaengine_prep_slave_sg(). If there is reason for not using that one?

The reason is that we want to have the possibility to transfer less
than the total size of the scatterlist, and that's currently very hard
to do - scatterlists were designed to not be tampered with.

Christian König then suggested to introduce a "dma_vec" which had been
on his TODO list for a while now.

> Furthermore I missed replying to your email earlier on use of
> dmaengine_prep_interleaved_dma(), my apologies.
> That can be made to work for you as well. Please see the notes where
> icg
> can be ignored and it does not need icg value to be set
> 
> Infact, interleaved api can be made to work in most of these cases I
> can
> think of...

Interleaved API only supports incrementing addresses, I see no way to
decrement the address (without using crude hacks e.g. overflowing
size_t). I can't guarantee that my DMABUF's pages are ordered in
memory.

Cheers,
-Paul

> > +
> >  /**
> >   * enum dma_ctrl_flags - DMA flags to augment operation
> > preparation,
> >   *  control completion, and communicate status.
> > @@ -910,6 +920,10 @@ struct dma_device {
> >  	struct dma_async_tx_descriptor
> > *(*device_prep_dma_interrupt)(
> >  		struct dma_chan *chan, unsigned long flags);
> >  
> > +	struct dma_async_tx_descriptor
> > *(*device_prep_slave_dma_vec)(
> > +		struct dma_chan *chan, const struct dma_vec *vecs,
> > +		size_t nents, enum dma_transfer_direction
> > direction,
> > +		unsigned long flags);
> >  	struct dma_async_tx_descriptor *(*device_prep_slave_sg)(
> >  		struct dma_chan *chan, struct scatterlist *sgl,
> >  		unsigned int sg_len, enum dma_transfer_direction
> > direction,
> > @@ -972,6 +986,17 @@ static inline struct dma_async_tx_descriptor
> > *dmaengine_prep_slave_single(
> >  						  dir, flags,
> > NULL);
> >  }
> >  
> > +static inline struct dma_async_tx_descriptor
> > *dmaengine_prep_slave_dma_vec(
> > +	struct dma_chan *chan, const struct dma_vec *vecs, size_t
> > nents,
> > +	enum dma_transfer_direction dir, unsigned long flags)
> > +{
> > +	if (!chan || !chan->device || !chan->device-
> > >device_prep_slave_dma_vec)
> > +		return NULL;
> > +
> > +	return chan->device->device_prep_slave_dma_vec(chan, vecs,
> > nents,
> > +						       dir,
> > flags);
> > +}
> > +
> >  static inline struct dma_async_tx_descriptor
> > *dmaengine_prep_slave_sg(
> >  	struct dma_chan *chan, struct scatterlist
> > *sgl,	unsigned int sg_len,
> >  	enum dma_transfer_direction dir, unsigned long flags)
> > -- 
> > 2.43.0
>
Vinod Koul Jan. 22, 2024, 11:06 a.m. UTC | #5
Hi Paul,


On 08-01-24, 13:20, Paul Cercueil wrote:
> Hi Vinod,
> 
> Le jeudi 21 décembre 2023 à 20:44 +0530, Vinod Koul a écrit :
> > On 19-12-23, 18:50, Paul Cercueil wrote:
> > > This function can be used to initiate a scatter-gather DMA
> > > transfer,
> > > where the address and size of each segment is located in one entry
> > > of
> > > the dma_vec array.
> > > 
> > > The major difference with dmaengine_prep_slave_sg() is that it
> > > supports
> > > specifying the lengths of each DMA transfer; as trying to override
> > > the
> > > length of the transfer with dmaengine_prep_slave_sg() is a very
> > > tedious
> > > process. The introduction of a new API function is also justified
> > > by the
> > > fact that scatterlists are on their way out.
> > > 
> > > Note that dmaengine_prep_interleaved_dma() is not helpful either in
> > > that
> > > case, as it assumes that the address of each segment will be higher
> > > than
> > > the one of the previous segment, which we just cannot guarantee in
> > > case
> > > of a scatter-gather transfer.
> > > 
> > > Signed-off-by: Paul Cercueil <paul@crapouillou.net>
> > > 
> > > ---
> > > v3: New patch
> > > 
> > > v5: Replace with function dmaengine_prep_slave_dma_vec(), and
> > > struct
> > >     'dma_vec'.
> > >     Note that at some point we will need to support cyclic
> > > transfers
> > >     using dmaengine_prep_slave_dma_vec(). Maybe with a new "flags"
> > >     parameter to the function?
> > > ---
> > >  include/linux/dmaengine.h | 25 +++++++++++++++++++++++++
> > >  1 file changed, 25 insertions(+)
> > > 
> > > diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
> > > index 3df70d6131c8..ee5931ddb42f 100644
> > > --- a/include/linux/dmaengine.h
> > > +++ b/include/linux/dmaengine.h
> > > @@ -160,6 +160,16 @@ struct dma_interleaved_template {
> > >  	struct data_chunk sgl[];
> > >  };
> > >  
> > > +/**
> > > + * struct dma_vec - DMA vector
> > > + * @addr: Bus address of the start of the vector
> > > + * @len: Length in bytes of the DMA vector
> > > + */
> > > +struct dma_vec {
> > > +	dma_addr_t addr;
> > > +	size_t len;
> > > +};
> 
> I don't want to be pushy, but I'd like to know how to solve this now,
> otherwise I'll just send the same patches for my v6.
> 
> > so you want to transfer multiple buffers, right? why not use
> > dmaengine_prep_slave_sg(). If there is reason for not using that one?
> 
> The reason is that we want to have the possibility to transfer less
> than the total size of the scatterlist, and that's currently very hard
> to do - scatterlists were designed to not be tampered with.
> 
> Christian König then suggested to introduce a "dma_vec" which had been
> on his TODO list for a while now.

Yeah for this interleaved seems overkill. Lets go with this api. I would
suggest change the name of the API replacing slave with peripheral
though
diff mbox series

Patch

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 3df70d6131c8..ee5931ddb42f 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -160,6 +160,16 @@  struct dma_interleaved_template {
 	struct data_chunk sgl[];
 };
 
+/**
+ * struct dma_vec - DMA vector
+ * @addr: Bus address of the start of the vector
+ * @len: Length in bytes of the DMA vector
+ */
+struct dma_vec {
+	dma_addr_t addr;
+	size_t len;
+};
+
 /**
  * enum dma_ctrl_flags - DMA flags to augment operation preparation,
  *  control completion, and communicate status.
@@ -910,6 +920,10 @@  struct dma_device {
 	struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)(
 		struct dma_chan *chan, unsigned long flags);
 
+	struct dma_async_tx_descriptor *(*device_prep_slave_dma_vec)(
+		struct dma_chan *chan, const struct dma_vec *vecs,
+		size_t nents, enum dma_transfer_direction direction,
+		unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_slave_sg)(
 		struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
@@ -972,6 +986,17 @@  static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single(
 						  dir, flags, NULL);
 }
 
+static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_dma_vec(
+	struct dma_chan *chan, const struct dma_vec *vecs, size_t nents,
+	enum dma_transfer_direction dir, unsigned long flags)
+{
+	if (!chan || !chan->device || !chan->device->device_prep_slave_dma_vec)
+		return NULL;
+
+	return chan->device->device_prep_slave_dma_vec(chan, vecs, nents,
+						       dir, flags);
+}
+
 static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_sg(
 	struct dma_chan *chan, struct scatterlist *sgl,	unsigned int sg_len,
 	enum dma_transfer_direction dir, unsigned long flags)