diff mbox series

[RFC,1/6] ALSA: compress: add Sample Rate Converter codec support

Message ID 1722940003-20126-2-git-send-email-shengjiu.wang@nxp.com (mailing list archive)
State Superseded
Headers show
Series ASoC: fsl: add memory to memory function for ASRC | expand

Commit Message

Shengjiu Wang Aug. 6, 2024, 10:26 a.m. UTC
Add Sample Rate Converter(SRC) codec support, define the output
format and rate for SRC.

Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
---
 include/uapi/sound/compress_offload.h | 2 ++
 include/uapi/sound/compress_params.h  | 9 ++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

Comments

Pierre-Louis Bossart Aug. 6, 2024, 11:16 a.m. UTC | #1
On 8/6/24 12:26, Shengjiu Wang wrote:
> Add Sample Rate Converter(SRC) codec support, define the output
> format and rate for SRC.
> 
> Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
> ---
>  include/uapi/sound/compress_offload.h | 2 ++
>  include/uapi/sound/compress_params.h  | 9 ++++++++-
>  2 files changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h
> index 98772b0cbcb7..8b2b72f94e26 100644
> --- a/include/uapi/sound/compress_offload.h
> +++ b/include/uapi/sound/compress_offload.h
> @@ -112,10 +112,12 @@ struct snd_compr_codec_caps {
>   * end of the track
>   * @SNDRV_COMPRESS_ENCODER_DELAY: no of samples inserted by the encoder at the
>   * beginning of the track
> + * @SNDRV_COMPRESS_SRC_RATIO_MOD: Resampling Ratio Modifier for sample rate converter
>   */
>  enum sndrv_compress_encoder {
>  	SNDRV_COMPRESS_ENCODER_PADDING = 1,
>  	SNDRV_COMPRESS_ENCODER_DELAY = 2,
> +	SNDRV_COMPRESS_SRC_RATIO_MOD = 3,
>  };

this sounds wrong to me. The sample rate converter is not an "encoder",
and the properties for padding/delay are totally specific to an encoder
function.

The other point is that I am not sure how standard this ratio_mod
parameter is. This could be totally specific to a specific
implementation, and another ASRC might have different parameters.

>  
>  /**
> diff --git a/include/uapi/sound/compress_params.h b/include/uapi/sound/compress_params.h
> index ddc77322d571..0843773ea6b4 100644
> --- a/include/uapi/sound/compress_params.h
> +++ b/include/uapi/sound/compress_params.h
> @@ -43,7 +43,8 @@
>  #define SND_AUDIOCODEC_BESPOKE               ((__u32) 0x0000000E)
>  #define SND_AUDIOCODEC_ALAC                  ((__u32) 0x0000000F)
>  #define SND_AUDIOCODEC_APE                   ((__u32) 0x00000010)
> -#define SND_AUDIOCODEC_MAX                   SND_AUDIOCODEC_APE
> +#define SND_AUDIOCODEC_SRC                   ((__u32) 0x00000011)
> +#define SND_AUDIOCODEC_MAX                   SND_AUDIOCODEC_SRC

I am not sure this is wise to change such definitions?
>  
>  /*
>   * Profile and modes are listed with bit masks. This allows for a
> @@ -324,6 +325,11 @@ struct snd_dec_ape {
>  	__u32 seek_table_present;
>  } __attribute__((packed, aligned(4)));
>  
> +struct snd_dec_src {
> +	__u32 format_out;
> +	__u32 rate_out;
> +} __attribute__((packed, aligned(4)));

Again I am not sure how standard those parameters are, and even if they
were if their representation is reusable.

And the compressed API has a good split between encoders and decoders, I
am not sure how an SRC can be classified as either.

>  union snd_codec_options {
>  	struct snd_enc_wma wma;
>  	struct snd_enc_vorbis vorbis;
> @@ -334,6 +340,7 @@ union snd_codec_options {
>  	struct snd_dec_wma wma_d;
>  	struct snd_dec_alac alac_d;
>  	struct snd_dec_ape ape_d;
> +	struct snd_dec_src src;
>  } __attribute__((packed, aligned(4)));
>  
>  /** struct snd_codec_desc - description of codec capabilities
Jeff Brower Aug. 6, 2024, 11:39 a.m. UTC | #2
All-

"The sample rate converter is not an encoder ..."

Indeed, an encoder creates a compressed bitstream from audio data  
(typically linear PCM samples), normally for transmission of some  
type. A sample rate converter generates audio data from audio data,  
and is normally applied prior to an encoder because it can only accept  
a limited range of sample rates.

-Jeff

Quoting Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>:

> On 8/6/24 12:26, Shengjiu Wang wrote:
>> Add Sample Rate Converter(SRC) codec support, define the output
>> format and rate for SRC.
>>
>> Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
>> ---
>>  include/uapi/sound/compress_offload.h | 2 ++
>>  include/uapi/sound/compress_params.h  | 9 ++++++++-
>>  2 files changed, 10 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/uapi/sound/compress_offload.h  
>> b/include/uapi/sound/compress_offload.h
>> index 98772b0cbcb7..8b2b72f94e26 100644
>> --- a/include/uapi/sound/compress_offload.h
>> +++ b/include/uapi/sound/compress_offload.h
>> @@ -112,10 +112,12 @@ struct snd_compr_codec_caps {
>>   * end of the track
>>   * @SNDRV_COMPRESS_ENCODER_DELAY: no of samples inserted by the  
>> encoder at the
>>   * beginning of the track
>> + * @SNDRV_COMPRESS_SRC_RATIO_MOD: Resampling Ratio Modifier for  
>> sample rate converter
>>   */
>>  enum sndrv_compress_encoder {
>>  	SNDRV_COMPRESS_ENCODER_PADDING = 1,
>>  	SNDRV_COMPRESS_ENCODER_DELAY = 2,
>> +	SNDRV_COMPRESS_SRC_RATIO_MOD = 3,
>>  };
>
> this sounds wrong to me. The sample rate converter is not an "encoder",
> and the properties for padding/delay are totally specific to an encoder
> function.
>
> The other point is that I am not sure how standard this ratio_mod
> parameter is. This could be totally specific to a specific
> implementation, and another ASRC might have different parameters.
>
>>
>>  /**
>> diff --git a/include/uapi/sound/compress_params.h  
>> b/include/uapi/sound/compress_params.h
>> index ddc77322d571..0843773ea6b4 100644
>> --- a/include/uapi/sound/compress_params.h
>> +++ b/include/uapi/sound/compress_params.h
>> @@ -43,7 +43,8 @@
>>  #define SND_AUDIOCODEC_BESPOKE               ((__u32) 0x0000000E)
>>  #define SND_AUDIOCODEC_ALAC                  ((__u32) 0x0000000F)
>>  #define SND_AUDIOCODEC_APE                   ((__u32) 0x00000010)
>> -#define SND_AUDIOCODEC_MAX                   SND_AUDIOCODEC_APE
>> +#define SND_AUDIOCODEC_SRC                   ((__u32) 0x00000011)
>> +#define SND_AUDIOCODEC_MAX                   SND_AUDIOCODEC_SRC
>
> I am not sure this is wise to change such definitions?
>>
>>  /*
>>   * Profile and modes are listed with bit masks. This allows for a
>> @@ -324,6 +325,11 @@ struct snd_dec_ape {
>>  	__u32 seek_table_present;
>>  } __attribute__((packed, aligned(4)));
>>
>> +struct snd_dec_src {
>> +	__u32 format_out;
>> +	__u32 rate_out;
>> +} __attribute__((packed, aligned(4)));
>
> Again I am not sure how standard those parameters are, and even if they
> were if their representation is reusable.
>
> And the compressed API has a good split between encoders and decoders, I
> am not sure how an SRC can be classified as either.
>
>>  union snd_codec_options {
>>  	struct snd_enc_wma wma;
>>  	struct snd_enc_vorbis vorbis;
>> @@ -334,6 +340,7 @@ union snd_codec_options {
>>  	struct snd_dec_wma wma_d;
>>  	struct snd_dec_alac alac_d;
>>  	struct snd_dec_ape ape_d;
>> +	struct snd_dec_src src;
>>  } __attribute__((packed, aligned(4)));
>>
>>  /** struct snd_codec_desc - description of codec capabilities
Shengjiu Wang Aug. 8, 2024, 9:17 a.m. UTC | #3
On Tue, Aug 6, 2024 at 7:25 PM Pierre-Louis Bossart
<pierre-louis.bossart@linux.intel.com> wrote:
>
>
>
> On 8/6/24 12:26, Shengjiu Wang wrote:
> > Add Sample Rate Converter(SRC) codec support, define the output
> > format and rate for SRC.
> >
> > Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
> > ---
> >  include/uapi/sound/compress_offload.h | 2 ++
> >  include/uapi/sound/compress_params.h  | 9 ++++++++-
> >  2 files changed, 10 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h
> > index 98772b0cbcb7..8b2b72f94e26 100644
> > --- a/include/uapi/sound/compress_offload.h
> > +++ b/include/uapi/sound/compress_offload.h
> > @@ -112,10 +112,12 @@ struct snd_compr_codec_caps {
> >   * end of the track
> >   * @SNDRV_COMPRESS_ENCODER_DELAY: no of samples inserted by the encoder at the
> >   * beginning of the track
> > + * @SNDRV_COMPRESS_SRC_RATIO_MOD: Resampling Ratio Modifier for sample rate converter
> >   */
> >  enum sndrv_compress_encoder {
> >       SNDRV_COMPRESS_ENCODER_PADDING = 1,
> >       SNDRV_COMPRESS_ENCODER_DELAY = 2,
> > +     SNDRV_COMPRESS_SRC_RATIO_MOD = 3,
> >  };
>
> this sounds wrong to me. The sample rate converter is not an "encoder",
> and the properties for padding/delay are totally specific to an encoder
> function.

There is only decoder and encoder definition for compress,  I know
it is difficult to add SRC to encoder or decoder classification,
SRC is a Post Processing.  I hope you can have a recommandation.

Thanks.

Best regards
Shengjiu Wang

>
> The other point is that I am not sure how standard this ratio_mod
> parameter is. This could be totally specific to a specific
> implementation, and another ASRC might have different parameters.
>
> >
> >  /**
> > diff --git a/include/uapi/sound/compress_params.h b/include/uapi/sound/compress_params.h
> > index ddc77322d571..0843773ea6b4 100644
> > --- a/include/uapi/sound/compress_params.h
> > +++ b/include/uapi/sound/compress_params.h
> > @@ -43,7 +43,8 @@
> >  #define SND_AUDIOCODEC_BESPOKE               ((__u32) 0x0000000E)
> >  #define SND_AUDIOCODEC_ALAC                  ((__u32) 0x0000000F)
> >  #define SND_AUDIOCODEC_APE                   ((__u32) 0x00000010)
> > -#define SND_AUDIOCODEC_MAX                   SND_AUDIOCODEC_APE
> > +#define SND_AUDIOCODEC_SRC                   ((__u32) 0x00000011)
> > +#define SND_AUDIOCODEC_MAX                   SND_AUDIOCODEC_SRC
>
> I am not sure this is wise to change such definitions?
> >
> >  /*
> >   * Profile and modes are listed with bit masks. This allows for a
> > @@ -324,6 +325,11 @@ struct snd_dec_ape {
> >       __u32 seek_table_present;
> >  } __attribute__((packed, aligned(4)));
> >
> > +struct snd_dec_src {
> > +     __u32 format_out;
> > +     __u32 rate_out;
> > +} __attribute__((packed, aligned(4)));
>
> Again I am not sure how standard those parameters are, and even if they
> were if their representation is reusable.
>
> And the compressed API has a good split between encoders and decoders, I
> am not sure how an SRC can be classified as either.
>
> >  union snd_codec_options {
> >       struct snd_enc_wma wma;
> >       struct snd_enc_vorbis vorbis;
> > @@ -334,6 +340,7 @@ union snd_codec_options {
> >       struct snd_dec_wma wma_d;
> >       struct snd_dec_alac alac_d;
> >       struct snd_dec_ape ape_d;
> > +     struct snd_dec_src src;
> >  } __attribute__((packed, aligned(4)));
> >
> >  /** struct snd_codec_desc - description of codec capabilities
>
Pierre-Louis Bossart Aug. 8, 2024, 11:27 a.m. UTC | #4
On 8/8/24 11:17, Shengjiu Wang wrote:
> On Tue, Aug 6, 2024 at 7:25 PM Pierre-Louis Bossart
> <pierre-louis.bossart@linux.intel.com> wrote:
>>
>>
>>
>> On 8/6/24 12:26, Shengjiu Wang wrote:
>>> Add Sample Rate Converter(SRC) codec support, define the output
>>> format and rate for SRC.
>>>
>>> Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
>>> ---
>>>  include/uapi/sound/compress_offload.h | 2 ++
>>>  include/uapi/sound/compress_params.h  | 9 ++++++++-
>>>  2 files changed, 10 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h
>>> index 98772b0cbcb7..8b2b72f94e26 100644
>>> --- a/include/uapi/sound/compress_offload.h
>>> +++ b/include/uapi/sound/compress_offload.h
>>> @@ -112,10 +112,12 @@ struct snd_compr_codec_caps {
>>>   * end of the track
>>>   * @SNDRV_COMPRESS_ENCODER_DELAY: no of samples inserted by the encoder at the
>>>   * beginning of the track
>>> + * @SNDRV_COMPRESS_SRC_RATIO_MOD: Resampling Ratio Modifier for sample rate converter
>>>   */
>>>  enum sndrv_compress_encoder {
>>>       SNDRV_COMPRESS_ENCODER_PADDING = 1,
>>>       SNDRV_COMPRESS_ENCODER_DELAY = 2,
>>> +     SNDRV_COMPRESS_SRC_RATIO_MOD = 3,
>>>  };
>>
>> this sounds wrong to me. The sample rate converter is not an "encoder",
>> and the properties for padding/delay are totally specific to an encoder
>> function.
> 
> There is only decoder and encoder definition for compress,  I know
> it is difficult to add SRC to encoder or decoder classification,
> SRC is a Post Processing.  I hope you can have a recommandation

I don't. I think we're blurring layers in a really odd way.

The main reason why the compress API was added is to remove the
byte-to-time conversions. But that's clearly not useful for a
post-processing of PCM data, where the bitrate is constant. It really
feels like we're adding this memory-to-memory API to the compress
framework because we don't have anything else available, not because it
makes sense to do so.

Then there's the issue of parameters, we chose to only add parameters
for standard encoders/decoders. Post-processing is highly specific and
the parameter definitions varies from one implementation to another -
and usually parameters are handled in an opaque way with binary
controls. This is best handled with a UUID that needs to be known only
to applications and low-level firmware/hardware, the kernel code should
not have to be modified for each and every processing and to add new
parameters. It just does not scale and it's unmaintainable.

At the very least if you really want to use this compress API, extend it
to use a non-descript "UUID-defined" type and an opaque set of
parameters with this UUID passed in a header.
Jaroslav Kysela Aug. 8, 2024, 12:02 p.m. UTC | #5
On 08. 08. 24 13:27, Pierre-Louis Bossart wrote:
> 
> 
> On 8/8/24 11:17, Shengjiu Wang wrote:
>> On Tue, Aug 6, 2024 at 7:25 PM Pierre-Louis Bossart
>> <pierre-louis.bossart@linux.intel.com> wrote:
>>>
>>>
>>>
>>> On 8/6/24 12:26, Shengjiu Wang wrote:
>>>> Add Sample Rate Converter(SRC) codec support, define the output
>>>> format and rate for SRC.
>>>>
>>>> Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
>>>> ---
>>>>   include/uapi/sound/compress_offload.h | 2 ++
>>>>   include/uapi/sound/compress_params.h  | 9 ++++++++-
>>>>   2 files changed, 10 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h
>>>> index 98772b0cbcb7..8b2b72f94e26 100644
>>>> --- a/include/uapi/sound/compress_offload.h
>>>> +++ b/include/uapi/sound/compress_offload.h
>>>> @@ -112,10 +112,12 @@ struct snd_compr_codec_caps {
>>>>    * end of the track
>>>>    * @SNDRV_COMPRESS_ENCODER_DELAY: no of samples inserted by the encoder at the
>>>>    * beginning of the track
>>>> + * @SNDRV_COMPRESS_SRC_RATIO_MOD: Resampling Ratio Modifier for sample rate converter
>>>>    */
>>>>   enum sndrv_compress_encoder {
>>>>        SNDRV_COMPRESS_ENCODER_PADDING = 1,
>>>>        SNDRV_COMPRESS_ENCODER_DELAY = 2,
>>>> +     SNDRV_COMPRESS_SRC_RATIO_MOD = 3,
>>>>   };
>>>
>>> this sounds wrong to me. The sample rate converter is not an "encoder",
>>> and the properties for padding/delay are totally specific to an encoder
>>> function.
>>
>> There is only decoder and encoder definition for compress,  I know
>> it is difficult to add SRC to encoder or decoder classification,
>> SRC is a Post Processing.  I hope you can have a recommandation
> 
> I don't. I think we're blurring layers in a really odd way.
> 
> The main reason why the compress API was added is to remove the
> byte-to-time conversions. But that's clearly not useful for a
> post-processing of PCM data, where the bitrate is constant. It really
> feels like we're adding this memory-to-memory API to the compress
> framework because we don't have anything else available, not because it
> makes sense to do so.

It makes sense to offload decoder/encoder tasks as batch processing for 
standard compress stream and return back result (PCM stream or encoded stream) 
to user space. So it makes sense to use the compress interface (parameters 
handshake) for it. Let's talk about the proper SRC extension.

For SRC and dynamic rate modification. I would just create an ALSA control for 
this. We are already using the "PCM Rate Shift 100000" control in the 
sound/drivers/aloop.c for this purpose (something like pitch in MIDI) for 
example. There is no requirement to add this function through metadata ioctls. 
As bonus, this control can be monitored with multiple tasks.

> Then there's the issue of parameters, we chose to only add parameters
> for standard encoders/decoders. Post-processing is highly specific and
> the parameter definitions varies from one implementation to another -
> and usually parameters are handled in an opaque way with binary
> controls. This is best handled with a UUID that needs to be known only
> to applications and low-level firmware/hardware, the kernel code should
> not have to be modified for each and every processing and to add new
> parameters. It just does not scale and it's unmaintainable.
> 
> At the very least if you really want to use this compress API, extend it
> to use a non-descript "UUID-defined" type and an opaque set of
> parameters with this UUID passed in a header.

We don't need to use UUID-defined scheme for simple (A)SRC implementation. As 
I noted, the specific runtime controls may use existing ALSA control API.

					Jaroslav
Pierre-Louis Bossart Aug. 8, 2024, 12:19 p.m. UTC | #6
files changed, 10 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/
>>>>> sound/compress_offload.h
>>>>> index 98772b0cbcb7..8b2b72f94e26 100644
>>>>> --- a/include/uapi/sound/compress_offload.h
>>>>> +++ b/include/uapi/sound/compress_offload.h
>>>>> @@ -112,10 +112,12 @@ struct snd_compr_codec_caps {
>>>>>    * end of the track
>>>>>    * @SNDRV_COMPRESS_ENCODER_DELAY: no of samples inserted by the
>>>>> encoder at the
>>>>>    * beginning of the track
>>>>> + * @SNDRV_COMPRESS_SRC_RATIO_MOD: Resampling Ratio Modifier for
>>>>> sample rate converter
>>>>>    */
>>>>>   enum sndrv_compress_encoder {
>>>>>        SNDRV_COMPRESS_ENCODER_PADDING = 1,
>>>>>        SNDRV_COMPRESS_ENCODER_DELAY = 2,
>>>>> +     SNDRV_COMPRESS_SRC_RATIO_MOD = 3,
>>>>>   };
>>>>
>>>> this sounds wrong to me. The sample rate converter is not an "encoder",
>>>> and the properties for padding/delay are totally specific to an encoder
>>>> function.
>>>
>>> There is only decoder and encoder definition for compress,  I know
>>> it is difficult to add SRC to encoder or decoder classification,
>>> SRC is a Post Processing.  I hope you can have a recommandation
>>
>> I don't. I think we're blurring layers in a really odd way.
>>
>> The main reason why the compress API was added is to remove the
>> byte-to-time conversions. But that's clearly not useful for a
>> post-processing of PCM data, where the bitrate is constant. It really
>> feels like we're adding this memory-to-memory API to the compress
>> framework because we don't have anything else available, not because it
>> makes sense to do so.
> 
> It makes sense to offload decoder/encoder tasks as batch processing for
> standard compress stream and return back result (PCM stream or encoded
> stream) to user space. So it makes sense to use the compress interface
> (parameters handshake) for it. Let's talk about the proper SRC extension.
> 
> For SRC and dynamic rate modification. I would just create an ALSA
> control for this. We are already using the "PCM Rate Shift 100000"
> control in the sound/drivers/aloop.c for this purpose (something like
> pitch in MIDI) for example. There is no requirement to add this function
> through metadata ioctls. As bonus, this control can be monitored with
> multiple tasks.

this wouldn't work when the rate is estimated in firmware/hardware,
which is precisely what the 'asynchronous' part of ASRC does.


>> Then there's the issue of parameters, we chose to only add parameters
>> for standard encoders/decoders. Post-processing is highly specific and
>> the parameter definitions varies from one implementation to another -
>> and usually parameters are handled in an opaque way with binary
>> controls. This is best handled with a UUID that needs to be known only
>> to applications and low-level firmware/hardware, the kernel code should
>> not have to be modified for each and every processing and to add new
>> parameters. It just does not scale and it's unmaintainable.
>>
>> At the very least if you really want to use this compress API, extend it
>> to use a non-descript "UUID-defined" type and an opaque set of
>> parameters with this UUID passed in a header.
> 
> We don't need to use UUID-defined scheme for simple (A)SRC
> implementation. As I noted, the specific runtime controls may use
> existing ALSA control API.

"Simple (A)SRC" is an oxymoron. There are multiple ways to define the
performance, and how the drift estimator is handled. There's nothing
simple if you look under the hood. The SOF implementation has for
example those parameters:

uint32_t source_rate;           /**< Define fixed source rate or */
				/**< use 0 to indicate need to get */
				/**< the rate from stream */
uint32_t sink_rate;             /**< Define fixed sink rate or */
				/**< use 0 to indicate need to get */
				/**< the rate from stream */
uint32_t asynchronous_mode;     /**< synchronous 0, asynchronous 1 */
				/**< When 1 the ASRC tracks and */
				/**< compensates for drift. */
uint32_t operation_mode;        /**< push 0, pull 1, In push mode the */
				/**< ASRC consumes a defined number */
				/**< of frames at input, with varying */
				/**< number of frames at output. */
				/**< In pull mode the ASRC outputs */
				/**< a defined number of frames while */
				/**< number of input frames varies. */

They are clearly different from what is suggested above with a 'ratio-mod'.

Same if you have a 'simple EQ'. there are dozens of ways to implement
the functionality with FIR, IIR or a combination of the two, and
multiple bands.

The point is that you have to think upfront about a generic way to pass
parameters. We didn't have to do it for encoders/decoders because we
only catered to well-documented standard solutions only. By choosing to
support PCM processing, a new can of worms is now open.

I repeat: please do not make the mistake of listing all processing with
an enum and a new structure for parameters every time someone needs a
specific transform in their pipeline. We made that mistake with SOF and
had to backtrack rather quickly. The only way to scale is an identifier
that is NOT included in the kernel code but is known to higher and
lower-levels only.
Jaroslav Kysela Aug. 8, 2024, 3:51 p.m. UTC | #7
On 08. 08. 24 14:19, Pierre-Louis Bossart wrote:
>   files changed, 10 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/
>>>>>> sound/compress_offload.h
>>>>>> index 98772b0cbcb7..8b2b72f94e26 100644
>>>>>> --- a/include/uapi/sound/compress_offload.h
>>>>>> +++ b/include/uapi/sound/compress_offload.h
>>>>>> @@ -112,10 +112,12 @@ struct snd_compr_codec_caps {
>>>>>>     * end of the track
>>>>>>     * @SNDRV_COMPRESS_ENCODER_DELAY: no of samples inserted by the
>>>>>> encoder at the
>>>>>>     * beginning of the track
>>>>>> + * @SNDRV_COMPRESS_SRC_RATIO_MOD: Resampling Ratio Modifier for
>>>>>> sample rate converter
>>>>>>     */
>>>>>>    enum sndrv_compress_encoder {
>>>>>>         SNDRV_COMPRESS_ENCODER_PADDING = 1,
>>>>>>         SNDRV_COMPRESS_ENCODER_DELAY = 2,
>>>>>> +     SNDRV_COMPRESS_SRC_RATIO_MOD = 3,
>>>>>>    };
>>>>>
>>>>> this sounds wrong to me. The sample rate converter is not an "encoder",
>>>>> and the properties for padding/delay are totally specific to an encoder
>>>>> function.
>>>>
>>>> There is only decoder and encoder definition for compress,  I know
>>>> it is difficult to add SRC to encoder or decoder classification,
>>>> SRC is a Post Processing.  I hope you can have a recommandation
>>>
>>> I don't. I think we're blurring layers in a really odd way.
>>>
>>> The main reason why the compress API was added is to remove the
>>> byte-to-time conversions. But that's clearly not useful for a
>>> post-processing of PCM data, where the bitrate is constant. It really
>>> feels like we're adding this memory-to-memory API to the compress
>>> framework because we don't have anything else available, not because it
>>> makes sense to do so.
>>
>> It makes sense to offload decoder/encoder tasks as batch processing for
>> standard compress stream and return back result (PCM stream or encoded
>> stream) to user space. So it makes sense to use the compress interface
>> (parameters handshake) for it. Let's talk about the proper SRC extension.
>>
>> For SRC and dynamic rate modification. I would just create an ALSA
>> control for this. We are already using the "PCM Rate Shift 100000"
>> control in the sound/drivers/aloop.c for this purpose (something like
>> pitch in MIDI) for example. There is no requirement to add this function
>> through metadata ioctls. As bonus, this control can be monitored with
>> multiple tasks.
> 
> this wouldn't work when the rate is estimated in firmware/hardware,
> which is precisely what the 'asynchronous' part of ASRC does.
> 
> 
>>> Then there's the issue of parameters, we chose to only add parameters
>>> for standard encoders/decoders. Post-processing is highly specific and
>>> the parameter definitions varies from one implementation to another -
>>> and usually parameters are handled in an opaque way with binary
>>> controls. This is best handled with a UUID that needs to be known only
>>> to applications and low-level firmware/hardware, the kernel code should
>>> not have to be modified for each and every processing and to add new
>>> parameters. It just does not scale and it's unmaintainable.
>>>
>>> At the very least if you really want to use this compress API, extend it
>>> to use a non-descript "UUID-defined" type and an opaque set of
>>> parameters with this UUID passed in a header.
>>
>> We don't need to use UUID-defined scheme for simple (A)SRC
>> implementation. As I noted, the specific runtime controls may use
>> existing ALSA control API.
> 
> "Simple (A)SRC" is an oxymoron. There are multiple ways to define the
> performance, and how the drift estimator is handled. There's nothing
> simple if you look under the hood. The SOF implementation has for
> example those parameters:
> 
> uint32_t source_rate;           /**< Define fixed source rate or */
> 				/**< use 0 to indicate need to get */
> 				/**< the rate from stream */
> uint32_t sink_rate;             /**< Define fixed sink rate or */
> 				/**< use 0 to indicate need to get */
> 				/**< the rate from stream */
> uint32_t asynchronous_mode;     /**< synchronous 0, asynchronous 1 */
> 				/**< When 1 the ASRC tracks and */
> 				/**< compensates for drift. */
> uint32_t operation_mode;        /**< push 0, pull 1, In push mode the */
> 				/**< ASRC consumes a defined number */
> 				/**< of frames at input, with varying */
> 				/**< number of frames at output. */
> 				/**< In pull mode the ASRC outputs */
> 				/**< a defined number of frames while */
> 				/**< number of input frames varies. */
> 
> They are clearly different from what is suggested above with a 'ratio-mod'.

I don't think so. The proposed (A)SRC for compress-accel is just one case for 
the above configs where the input is known and output is controlled by the 
requested rate. The I/O mechanism is abstracted enough in this case and the 
driver/hardware/firmware must follow it.

> Same if you have a 'simple EQ'. there are dozens of ways to implement
> the functionality with FIR, IIR or a combination of the two, and
> multiple bands.
> 
> The point is that you have to think upfront about a generic way to pass
> parameters. We didn't have to do it for encoders/decoders because we
> only catered to well-documented standard solutions only. By choosing to
> support PCM processing, a new can of worms is now open.
> 
> I repeat: please do not make the mistake of listing all processing with
> an enum and a new structure for parameters every time someone needs a
> specific transform in their pipeline. We made that mistake with SOF and
> had to backtrack rather quickly. The only way to scale is an identifier
> that is NOT included in the kernel code but is known to higher and
> lower-levels only.

There are two ways - black box (UUID - as you suggested) - or well defined 
purpose (abstraction). For your example 'simple EQ', the parameters should be 
the band (frequency range) volume values. It's abstract and the real filters 
(resp. implementation) used behind may depend on the hardware/driver capabilities.

 From my view, the really special cases may be handled as black box, but 
others like (A)SRC should follow some well-defined abstraction IMHO to not 
force user space to handle all special cases.

						Jaroslav
Pierre-Louis Bossart Aug. 9, 2024, 7:19 a.m. UTC | #8
>>>> Then there's the issue of parameters, we chose to only add parameters
>>>> for standard encoders/decoders. Post-processing is highly specific and
>>>> the parameter definitions varies from one implementation to another -
>>>> and usually parameters are handled in an opaque way with binary
>>>> controls. This is best handled with a UUID that needs to be known only
>>>> to applications and low-level firmware/hardware, the kernel code should
>>>> not have to be modified for each and every processing and to add new
>>>> parameters. It just does not scale and it's unmaintainable.
>>>>
>>>> At the very least if you really want to use this compress API,
>>>> extend it
>>>> to use a non-descript "UUID-defined" type and an opaque set of
>>>> parameters with this UUID passed in a header.
>>>
>>> We don't need to use UUID-defined scheme for simple (A)SRC
>>> implementation. As I noted, the specific runtime controls may use
>>> existing ALSA control API.
>>
>> "Simple (A)SRC" is an oxymoron. There are multiple ways to define the
>> performance, and how the drift estimator is handled. There's nothing
>> simple if you look under the hood. The SOF implementation has for
>> example those parameters:
>>
>> uint32_t source_rate;           /**< Define fixed source rate or */
>>                 /**< use 0 to indicate need to get */
>>                 /**< the rate from stream */
>> uint32_t sink_rate;             /**< Define fixed sink rate or */
>>                 /**< use 0 to indicate need to get */
>>                 /**< the rate from stream */
>> uint32_t asynchronous_mode;     /**< synchronous 0, asynchronous 1 */
>>                 /**< When 1 the ASRC tracks and */
>>                 /**< compensates for drift. */
>> uint32_t operation_mode;        /**< push 0, pull 1, In push mode the */
>>                 /**< ASRC consumes a defined number */
>>                 /**< of frames at input, with varying */
>>                 /**< number of frames at output. */
>>                 /**< In pull mode the ASRC outputs */
>>                 /**< a defined number of frames while */
>>                 /**< number of input frames varies. */
>>
>> They are clearly different from what is suggested above with a 'ratio-
>> mod'.
> 
> I don't think so. The proposed (A)SRC for compress-accel is just one
> case for the above configs where the input is known and output is
> controlled by the requested rate. The I/O mechanism is abstracted enough
> in this case and the driver/hardware/firmware must follow it.

ASRC is usually added when the nominal rates are known but the clock
sources differ and the drift needs to be estimated at run-time and the
coefficients or interpolation modified dynamically

If the ratio is known exactly and there's no clock drift, then it's a
different problem where the filter coefficients are constant.

>> Same if you have a 'simple EQ'. there are dozens of ways to implement
>> the functionality with FIR, IIR or a combination of the two, and
>> multiple bands.
>>
>> The point is that you have to think upfront about a generic way to pass
>> parameters. We didn't have to do it for encoders/decoders because we
>> only catered to well-documented standard solutions only. By choosing to
>> support PCM processing, a new can of worms is now open.
>>
>> I repeat: please do not make the mistake of listing all processing with
>> an enum and a new structure for parameters every time someone needs a
>> specific transform in their pipeline. We made that mistake with SOF and
>> had to backtrack rather quickly. The only way to scale is an identifier
>> that is NOT included in the kernel code but is known to higher and
>> lower-levels only.
> 
> There are two ways - black box (UUID - as you suggested) - or well
> defined purpose (abstraction). For your example 'simple EQ', the
> parameters should be the band (frequency range) volume values. It's
> abstract and the real filters (resp. implementation) used behind may
> depend on the hardware/driver capabilities.

Indeed there is a possibility that the parameters are high-level, but
that would require firmware or hardware to be able to generate actual
coefficients from those parameters. That usually requires some advanced
math which isn't necessarily obvious to implement with fixed-point hardware.

> From my view, the really special cases may be handled as black box, but
> others like (A)SRC should follow some well-defined abstraction IMHO to
> not force user space to handle all special cases.

I am not against the high-level abstractions, e.g. along the lines of
what Android defined:
https://developer.android.com/reference/android/media/audiofx/AudioEffect

That's not sufficient however, we also need to make sure there's an
ability to provide pre-computed coefficients in an opaque manner for
processing that doesn't fit in the well-defined cases. In practice there
are very few 3rd party IP that fits in well-defined cases, everyone has
secret-sauce parameters and options.
Shengjiu Wang Aug. 9, 2024, 10:14 a.m. UTC | #9
On Fri, Aug 9, 2024 at 3:25 PM Pierre-Louis Bossart
<pierre-louis.bossart@linux.intel.com> wrote:
>
>
> >>>> Then there's the issue of parameters, we chose to only add parameters
> >>>> for standard encoders/decoders. Post-processing is highly specific and
> >>>> the parameter definitions varies from one implementation to another -
> >>>> and usually parameters are handled in an opaque way with binary
> >>>> controls. This is best handled with a UUID that needs to be known only
> >>>> to applications and low-level firmware/hardware, the kernel code should
> >>>> not have to be modified for each and every processing and to add new
> >>>> parameters. It just does not scale and it's unmaintainable.
> >>>>
> >>>> At the very least if you really want to use this compress API,
> >>>> extend it
> >>>> to use a non-descript "UUID-defined" type and an opaque set of
> >>>> parameters with this UUID passed in a header.
> >>>
> >>> We don't need to use UUID-defined scheme for simple (A)SRC
> >>> implementation. As I noted, the specific runtime controls may use
> >>> existing ALSA control API.
> >>
> >> "Simple (A)SRC" is an oxymoron. There are multiple ways to define the
> >> performance, and how the drift estimator is handled. There's nothing
> >> simple if you look under the hood. The SOF implementation has for
> >> example those parameters:
> >>
> >> uint32_t source_rate;           /**< Define fixed source rate or */
> >>                 /**< use 0 to indicate need to get */
> >>                 /**< the rate from stream */
> >> uint32_t sink_rate;             /**< Define fixed sink rate or */
> >>                 /**< use 0 to indicate need to get */
> >>                 /**< the rate from stream */
> >> uint32_t asynchronous_mode;     /**< synchronous 0, asynchronous 1 */
> >>                 /**< When 1 the ASRC tracks and */
> >>                 /**< compensates for drift. */
> >> uint32_t operation_mode;        /**< push 0, pull 1, In push mode the */
> >>                 /**< ASRC consumes a defined number */
> >>                 /**< of frames at input, with varying */
> >>                 /**< number of frames at output. */
> >>                 /**< In pull mode the ASRC outputs */
> >>                 /**< a defined number of frames while */
> >>                 /**< number of input frames varies. */
> >>
> >> They are clearly different from what is suggested above with a 'ratio-
> >> mod'.
> >
> > I don't think so. The proposed (A)SRC for compress-accel is just one
> > case for the above configs where the input is known and output is
> > controlled by the requested rate. The I/O mechanism is abstracted enough
> > in this case and the driver/hardware/firmware must follow it.
>
> ASRC is usually added when the nominal rates are known but the clock
> sources differ and the drift needs to be estimated at run-time and the
> coefficients or interpolation modified dynamically
>
> If the ratio is known exactly and there's no clock drift, then it's a
> different problem where the filter coefficients are constant.
>
> >> Same if you have a 'simple EQ'. there are dozens of ways to implement
> >> the functionality with FIR, IIR or a combination of the two, and
> >> multiple bands.
> >>
> >> The point is that you have to think upfront about a generic way to pass
> >> parameters. We didn't have to do it for encoders/decoders because we
> >> only catered to well-documented standard solutions only. By choosing to
> >> support PCM processing, a new can of worms is now open.
> >>
> >> I repeat: please do not make the mistake of listing all processing with
> >> an enum and a new structure for parameters every time someone needs a
> >> specific transform in their pipeline. We made that mistake with SOF and
> >> had to backtrack rather quickly. The only way to scale is an identifier
> >> that is NOT included in the kernel code but is known to higher and
> >> lower-levels only.
> >
> > There are two ways - black box (UUID - as you suggested) - or well
> > defined purpose (abstraction). For your example 'simple EQ', the
> > parameters should be the band (frequency range) volume values. It's
> > abstract and the real filters (resp. implementation) used behind may
> > depend on the hardware/driver capabilities.
>
> Indeed there is a possibility that the parameters are high-level, but
> that would require firmware or hardware to be able to generate actual
> coefficients from those parameters. That usually requires some advanced
> math which isn't necessarily obvious to implement with fixed-point hardware.
>
> > From my view, the really special cases may be handled as black box, but
> > others like (A)SRC should follow some well-defined abstraction IMHO to
> > not force user space to handle all special cases.
>
> I am not against the high-level abstractions, e.g. along the lines of
> what Android defined:
> https://developer.android.com/reference/android/media/audiofx/AudioEffect
>
> That's not sufficient however, we also need to make sure there's an
> ability to provide pre-computed coefficients in an opaque manner for
> processing that doesn't fit in the well-defined cases. In practice there
> are very few 3rd party IP that fits in well-defined cases, everyone has
> secret-sauce parameters and options.

Appreciate the discussion.

Let me explain the reason for the change:

Why I use the metadata ioctl is because the ALSA controls are binding
to the sound card.  What I want is the controls can be bound to
snd_compr_stream, because the ASRC compress sound card can
support multi instances ( the ASRC can support multi conversion in
parallel).   The ALSA controls can't be used for this case,  the only
choice in current compress API is metadata ioctl. And metadata
ioctl can be called many times which can meet the ratio modifier
requirement (ratio may be drift on the fly)

And compress API uses codec as the unit for capability query and
parameter setting,  So I think need to define "SND_AUDIOCODEC_SRC'
and 'struct snd_dec_src',  for the 'snd_dec_src' just defined output
format and output rate, channels definition just reuse the snd_codec.ch_in.

I understand your concern, but there seems no better option.
If you have, please guide me. Thanks.

Best regards
Shengjiu Wang
Pierre-Louis Bossart Aug. 9, 2024, 12:52 p.m. UTC | #10
> Why I use the metadata ioctl is because the ALSA controls are binding
> to the sound card.  What I want is the controls can be bound to
> snd_compr_stream, because the ASRC compress sound card can
> support multi instances ( the ASRC can support multi conversion in
> parallel).   The ALSA controls can't be used for this case,  the only
> choice in current compress API is metadata ioctl. 

I don't know if there is really a technical limitation for this, this is
for Jaroslav to comment. I am not sure why it would be a problem to e.g.
have a volume control prior to an encoder or after a decoder.

> And metadata
> ioctl can be called many times which can meet the ratio modifier
> requirement (ratio may be drift on the fly)

Interesting, that's yet another way of handling the drift with userspace
modifying the ratio dynamically. That's different to what I've seen before.

> And compress API uses codec as the unit for capability query and
> parameter setting,  So I think need to define "SND_AUDIOCODEC_SRC'
> and 'struct snd_dec_src',  for the 'snd_dec_src' just defined output
> format and output rate, channels definition just reuse the snd_codec.ch_in.

The capability query is an interesting point as well, it's not clear how
to expose to userspace what this specific implementation can do, while
at the same time *requiring* userpace to update the ratio dynamically.
For something like this to work, userspace needs to have pre-existing
information on how the SRC works.
Jaroslav Kysela Aug. 9, 2024, 1:51 p.m. UTC | #11
On 09. 08. 24 12:14, Shengjiu Wang wrote:
> On Fri, Aug 9, 2024 at 3:25 PM Pierre-Louis Bossart
> <pierre-louis.bossart@linux.intel.com> wrote:
>>
>>
>>>>>> Then there's the issue of parameters, we chose to only add parameters
>>>>>> for standard encoders/decoders. Post-processing is highly specific and
>>>>>> the parameter definitions varies from one implementation to another -
>>>>>> and usually parameters are handled in an opaque way with binary
>>>>>> controls. This is best handled with a UUID that needs to be known only
>>>>>> to applications and low-level firmware/hardware, the kernel code should
>>>>>> not have to be modified for each and every processing and to add new
>>>>>> parameters. It just does not scale and it's unmaintainable.
>>>>>>
>>>>>> At the very least if you really want to use this compress API,
>>>>>> extend it
>>>>>> to use a non-descript "UUID-defined" type and an opaque set of
>>>>>> parameters with this UUID passed in a header.
>>>>>
>>>>> We don't need to use UUID-defined scheme for simple (A)SRC
>>>>> implementation. As I noted, the specific runtime controls may use
>>>>> existing ALSA control API.
>>>>
>>>> "Simple (A)SRC" is an oxymoron. There are multiple ways to define the
>>>> performance, and how the drift estimator is handled. There's nothing
>>>> simple if you look under the hood. The SOF implementation has for
>>>> example those parameters:
>>>>
>>>> uint32_t source_rate;           /**< Define fixed source rate or */
>>>>                  /**< use 0 to indicate need to get */
>>>>                  /**< the rate from stream */
>>>> uint32_t sink_rate;             /**< Define fixed sink rate or */
>>>>                  /**< use 0 to indicate need to get */
>>>>                  /**< the rate from stream */
>>>> uint32_t asynchronous_mode;     /**< synchronous 0, asynchronous 1 */
>>>>                  /**< When 1 the ASRC tracks and */
>>>>                  /**< compensates for drift. */
>>>> uint32_t operation_mode;        /**< push 0, pull 1, In push mode the */
>>>>                  /**< ASRC consumes a defined number */
>>>>                  /**< of frames at input, with varying */
>>>>                  /**< number of frames at output. */
>>>>                  /**< In pull mode the ASRC outputs */
>>>>                  /**< a defined number of frames while */
>>>>                  /**< number of input frames varies. */
>>>>
>>>> They are clearly different from what is suggested above with a 'ratio-
>>>> mod'.
>>>
>>> I don't think so. The proposed (A)SRC for compress-accel is just one
>>> case for the above configs where the input is known and output is
>>> controlled by the requested rate. The I/O mechanism is abstracted enough
>>> in this case and the driver/hardware/firmware must follow it.
>>
>> ASRC is usually added when the nominal rates are known but the clock
>> sources differ and the drift needs to be estimated at run-time and the
>> coefficients or interpolation modified dynamically
>>
>> If the ratio is known exactly and there's no clock drift, then it's a
>> different problem where the filter coefficients are constant.
>>
>>>> Same if you have a 'simple EQ'. there are dozens of ways to implement
>>>> the functionality with FIR, IIR or a combination of the two, and
>>>> multiple bands.
>>>>
>>>> The point is that you have to think upfront about a generic way to pass
>>>> parameters. We didn't have to do it for encoders/decoders because we
>>>> only catered to well-documented standard solutions only. By choosing to
>>>> support PCM processing, a new can of worms is now open.
>>>>
>>>> I repeat: please do not make the mistake of listing all processing with
>>>> an enum and a new structure for parameters every time someone needs a
>>>> specific transform in their pipeline. We made that mistake with SOF and
>>>> had to backtrack rather quickly. The only way to scale is an identifier
>>>> that is NOT included in the kernel code but is known to higher and
>>>> lower-levels only.
>>>
>>> There are two ways - black box (UUID - as you suggested) - or well
>>> defined purpose (abstraction). For your example 'simple EQ', the
>>> parameters should be the band (frequency range) volume values. It's
>>> abstract and the real filters (resp. implementation) used behind may
>>> depend on the hardware/driver capabilities.
>>
>> Indeed there is a possibility that the parameters are high-level, but
>> that would require firmware or hardware to be able to generate actual
>> coefficients from those parameters. That usually requires some advanced
>> math which isn't necessarily obvious to implement with fixed-point hardware.
>>
>>>  From my view, the really special cases may be handled as black box, but
>>> others like (A)SRC should follow some well-defined abstraction IMHO to
>>> not force user space to handle all special cases.
>>
>> I am not against the high-level abstractions, e.g. along the lines of
>> what Android defined:
>> https://developer.android.com/reference/android/media/audiofx/AudioEffect
>>
>> That's not sufficient however, we also need to make sure there's an
>> ability to provide pre-computed coefficients in an opaque manner for
>> processing that doesn't fit in the well-defined cases. In practice there
>> are very few 3rd party IP that fits in well-defined cases, everyone has
>> secret-sauce parameters and options.
> 
> Appreciate the discussion.
> 
> Let me explain the reason for the change:
> 
> Why I use the metadata ioctl is because the ALSA controls are binding
> to the sound card.  What I want is the controls can be bound to
> snd_compr_stream, because the ASRC compress sound card can
> support multi instances ( the ASRC can support multi conversion in
> parallel).   The ALSA controls can't be used for this case,  the only
> choice in current compress API is metadata ioctl. And metadata
> ioctl can be called many times which can meet the ratio modifier
> requirement (ratio may be drift on the fly)

This argument is not valid. The controls are bound to the card, but the 
element identifiers have already iface (interface), device and subdevice 
numbers. We are using controls for PCM devices for example. The binding is 
straight.

Just add SNDRV_CTL_ELEM_IFACE_COMPRESS define and specify the compress device 
number in the 'struct snd_ctl_elem_id'.

					Jaroslav
Jaroslav Kysela Aug. 9, 2024, 2 p.m. UTC | #12
On 09. 08. 24 14:52, Pierre-Louis Bossart wrote:

>> And metadata
>> ioctl can be called many times which can meet the ratio modifier
>> requirement (ratio may be drift on the fly)
> 
> Interesting, that's yet another way of handling the drift with userspace
> modifying the ratio dynamically. That's different to what I've seen before.

Note that the "timing" is managed by the user space with this scheme.

>> And compress API uses codec as the unit for capability query and
>> parameter setting,  So I think need to define "SND_AUDIOCODEC_SRC'
>> and 'struct snd_dec_src',  for the 'snd_dec_src' just defined output
>> format and output rate, channels definition just reuse the snd_codec.ch_in.
> 
> The capability query is an interesting point as well, it's not clear how
> to expose to userspace what this specific implementation can do, while
> at the same time *requiring* userpace to update the ratio dynamically.
> For something like this to work, userspace needs to have pre-existing
> information on how the SRC works.

Yes, it's about abstraction. The user space wants to push data, read data back 
converted to the target rate and eventually modify the drift using a control 
managing clocks using own way. We can eventually assume, that if this control 
does not exist, the drift cannot be controlled. Also, nice thing is that the 
control has min and max values (range), so driver can specify the drift range, 
too.

And again, look to "PCM Rate Shift 100000" control implementation in 
sound/drivers/aloop.c. It would be nice to have the base offset for the 
shift/drift/pitch value standardized.

					Jaroslav
Pierre-Louis Bossart Aug. 9, 2024, 7:20 p.m. UTC | #13
>>> And metadata
>>> ioctl can be called many times which can meet the ratio modifier
>>> requirement (ratio may be drift on the fly)
>>
>> Interesting, that's yet another way of handling the drift with userspace
>> modifying the ratio dynamically. That's different to what I've seen
>> before.
> 
> Note that the "timing" is managed by the user space with this scheme.
> 
>>> And compress API uses codec as the unit for capability query and
>>> parameter setting,  So I think need to define "SND_AUDIOCODEC_SRC'
>>> and 'struct snd_dec_src',  for the 'snd_dec_src' just defined output
>>> format and output rate, channels definition just reuse the
>>> snd_codec.ch_in.
>>
>> The capability query is an interesting point as well, it's not clear how
>> to expose to userspace what this specific implementation can do, while
>> at the same time *requiring* userpace to update the ratio dynamically.
>> For something like this to work, userspace needs to have pre-existing
>> information on how the SRC works.
> 
> Yes, it's about abstraction. The user space wants to push data, read
> data back converted to the target rate and eventually modify the drift
> using a control managing clocks using own way. We can eventually assume,
> that if this control does not exist, the drift cannot be controlled.
> Also, nice thing is that the control has min and max values (range), so
> driver can specify the drift range, too.

This mode of operation would be fine, but if you add the SRC as part of
the processing allowed in a compressed stream, it might be used in a
'regular' real-time pipeline and the arguments  and implementation could
be very different.

In other words, this SRC support looks like an extension of the compress
API for a very narrow usage restricted to the memory-to-memory case
only. I worry a bit about the next steps... Are there going to be other
types of PCM processing like this, and if yes, how would we know if they
are intended to be used for the 'regular' compress API or for the
memory-to-memory scheme?
Shengjiu Wang Aug. 12, 2024, 10:24 a.m. UTC | #14
On Fri, Aug 9, 2024 at 10:01 PM Jaroslav Kysela <perex@perex.cz> wrote:
>
> On 09. 08. 24 14:52, Pierre-Louis Bossart wrote:
>
> >> And metadata
> >> ioctl can be called many times which can meet the ratio modifier
> >> requirement (ratio may be drift on the fly)
> >
> > Interesting, that's yet another way of handling the drift with userspace
> > modifying the ratio dynamically. That's different to what I've seen before.
>
> Note that the "timing" is managed by the user space with this scheme.
>
> >> And compress API uses codec as the unit for capability query and
> >> parameter setting,  So I think need to define "SND_AUDIOCODEC_SRC'
> >> and 'struct snd_dec_src',  for the 'snd_dec_src' just defined output
> >> format and output rate, channels definition just reuse the snd_codec.ch_in.
> >
> > The capability query is an interesting point as well, it's not clear how
> > to expose to userspace what this specific implementation can do, while
> > at the same time *requiring* userpace to update the ratio dynamically.
> > For something like this to work, userspace needs to have pre-existing
> > information on how the SRC works.
>
> Yes, it's about abstraction. The user space wants to push data, read data back
> converted to the target rate and eventually modify the drift using a control
> managing clocks using own way. We can eventually assume, that if this control
> does not exist, the drift cannot be controlled. Also, nice thing is that the
> control has min and max values (range), so driver can specify the drift range,
> too.
>
> And again, look to "PCM Rate Shift 100000" control implementation in
> sound/drivers/aloop.c. It would be nice to have the base offset for the
> shift/drift/pitch value standardized.

Thanks.

But the ASRC driver I implemented is different, I just register one sound
card, one device/subdevice.  but the ASRC hardware support 4 instances
together, so user can open the card device 4 times to create 4 instances
then the controls can only bind with compress streams.

I think I can remove the 'SNDRV_COMPRESS_SRC_RATIO_MOD',
Only define a private type for driver,  which means only the ASRC driver
and its user application know the type.

For the change in 'include/uapi/sound/compress_params.h",  should I
keep them,  is there any other suggestion for them?

Best regards
Shengjiu Wang

>
>                                         Jaroslav
>
> --
> Jaroslav Kysela <perex@perex.cz>
> Linux Sound Maintainer; ALSA Project; Red Hat, Inc.
>
Jaroslav Kysela Aug. 12, 2024, 1:31 p.m. UTC | #15
On 12. 08. 24 12:24, Shengjiu Wang wrote:
> On Fri, Aug 9, 2024 at 10:01 PM Jaroslav Kysela <perex@perex.cz> wrote:
>>
>> On 09. 08. 24 14:52, Pierre-Louis Bossart wrote:
>>
>>>> And metadata
>>>> ioctl can be called many times which can meet the ratio modifier
>>>> requirement (ratio may be drift on the fly)
>>>
>>> Interesting, that's yet another way of handling the drift with userspace
>>> modifying the ratio dynamically. That's different to what I've seen before.
>>
>> Note that the "timing" is managed by the user space with this scheme.
>>
>>>> And compress API uses codec as the unit for capability query and
>>>> parameter setting,  So I think need to define "SND_AUDIOCODEC_SRC'
>>>> and 'struct snd_dec_src',  for the 'snd_dec_src' just defined output
>>>> format and output rate, channels definition just reuse the snd_codec.ch_in.
>>>
>>> The capability query is an interesting point as well, it's not clear how
>>> to expose to userspace what this specific implementation can do, while
>>> at the same time *requiring* userpace to update the ratio dynamically.
>>> For something like this to work, userspace needs to have pre-existing
>>> information on how the SRC works.
>>
>> Yes, it's about abstraction. The user space wants to push data, read data back
>> converted to the target rate and eventually modify the drift using a control
>> managing clocks using own way. We can eventually assume, that if this control
>> does not exist, the drift cannot be controlled. Also, nice thing is that the
>> control has min and max values (range), so driver can specify the drift range,
>> too.
>>
>> And again, look to "PCM Rate Shift 100000" control implementation in
>> sound/drivers/aloop.c. It would be nice to have the base offset for the
>> shift/drift/pitch value standardized.
> 
> Thanks.
> 
> But the ASRC driver I implemented is different, I just register one sound
> card, one device/subdevice.  but the ASRC hardware support 4 instances
> together, so user can open the card device 4 times to create 4 instances
> then the controls can only bind with compress streams.

It's just a reason to add the subdevice code for the compress offload layer 
like we have in other APIs for overall consistency. I'll try to work on this.

> I think I can remove the 'SNDRV_COMPRESS_SRC_RATIO_MOD',

Yes.

> Only define a private type for driver,  which means only the ASRC driver
> and its user application know the type.

The control API should be used for this IMHO.

> For the change in 'include/uapi/sound/compress_params.h",  should I
> keep them,  is there any other suggestion for them?

I don't see a problem there.

					Jaroslav
Pierre-Louis Bossart Aug. 12, 2024, 1:43 p.m. UTC | #16
On 8/12/24 15:31, Jaroslav Kysela wrote:
> On 12. 08. 24 12:24, Shengjiu Wang wrote:
>> On Fri, Aug 9, 2024 at 10:01 PM Jaroslav Kysela <perex@perex.cz> wrote:
>>>
>>> On 09. 08. 24 14:52, Pierre-Louis Bossart wrote:
>>>
>>>>> And metadata
>>>>> ioctl can be called many times which can meet the ratio modifier
>>>>> requirement (ratio may be drift on the fly)
>>>>
>>>> Interesting, that's yet another way of handling the drift with
>>>> userspace
>>>> modifying the ratio dynamically. That's different to what I've seen
>>>> before.
>>>
>>> Note that the "timing" is managed by the user space with this scheme.
>>>
>>>>> And compress API uses codec as the unit for capability query and
>>>>> parameter setting,  So I think need to define "SND_AUDIOCODEC_SRC'
>>>>> and 'struct snd_dec_src',  for the 'snd_dec_src' just defined output
>>>>> format and output rate, channels definition just reuse the
>>>>> snd_codec.ch_in.
>>>>
>>>> The capability query is an interesting point as well, it's not clear
>>>> how
>>>> to expose to userspace what this specific implementation can do, while
>>>> at the same time *requiring* userpace to update the ratio dynamically.
>>>> For something like this to work, userspace needs to have pre-existing
>>>> information on how the SRC works.
>>>
>>> Yes, it's about abstraction. The user space wants to push data, read
>>> data back
>>> converted to the target rate and eventually modify the drift using a
>>> control
>>> managing clocks using own way. We can eventually assume, that if this
>>> control
>>> does not exist, the drift cannot be controlled. Also, nice thing is
>>> that the
>>> control has min and max values (range), so driver can specify the
>>> drift range,
>>> too.
>>>
>>> And again, look to "PCM Rate Shift 100000" control implementation in
>>> sound/drivers/aloop.c. It would be nice to have the base offset for the
>>> shift/drift/pitch value standardized.
>>
>> Thanks.
>>
>> But the ASRC driver I implemented is different, I just register one sound
>> card, one device/subdevice.  but the ASRC hardware support 4 instances
>> together, so user can open the card device 4 times to create 4 instances
>> then the controls can only bind with compress streams.
> 
> It's just a reason to add the subdevice code for the compress offload
> layer like we have in other APIs for overall consistency. I'll try to
> work on this.

I thought this was supported already? I remember there was a request to
enable more than one compressed stream for enhanced cross-fade support
with different formats? That isn't supported with the single-device +
PARTIAL_DRAIN method.

Vinod?

>> I think I can remove the 'SNDRV_COMPRESS_SRC_RATIO_MOD',
> 
> Yes.
> 
>> Only define a private type for driver,  which means only the ASRC driver
>> and its user application know the type.
> 
> The control API should be used for this IMHO.

Agree, this would be a 'clean' split where the compress API is used for
the data parts and the control parts used otherwise to alter the ratio
or whatever else is needed.

>> For the change in 'include/uapi/sound/compress_params.h",  should I
>> keep them,  is there any other suggestion for them?

You can add the SRC type but if you use a control for the parameters you
don't need to add anything for the encoder options, do you?
Shengjiu Wang Aug. 14, 2024, 2:22 a.m. UTC | #17
On Mon, Aug 12, 2024 at 9:44 PM Pierre-Louis Bossart
<pierre-louis.bossart@linux.intel.com> wrote:
>
>
>
> On 8/12/24 15:31, Jaroslav Kysela wrote:
> > On 12. 08. 24 12:24, Shengjiu Wang wrote:
> >> On Fri, Aug 9, 2024 at 10:01 PM Jaroslav Kysela <perex@perex.cz> wrote:
> >>>
> >>> On 09. 08. 24 14:52, Pierre-Louis Bossart wrote:
> >>>
> >>>>> And metadata
> >>>>> ioctl can be called many times which can meet the ratio modifier
> >>>>> requirement (ratio may be drift on the fly)
> >>>>
> >>>> Interesting, that's yet another way of handling the drift with
> >>>> userspace
> >>>> modifying the ratio dynamically. That's different to what I've seen
> >>>> before.
> >>>
> >>> Note that the "timing" is managed by the user space with this scheme.
> >>>
> >>>>> And compress API uses codec as the unit for capability query and
> >>>>> parameter setting,  So I think need to define "SND_AUDIOCODEC_SRC'
> >>>>> and 'struct snd_dec_src',  for the 'snd_dec_src' just defined output
> >>>>> format and output rate, channels definition just reuse the
> >>>>> snd_codec.ch_in.
> >>>>
> >>>> The capability query is an interesting point as well, it's not clear
> >>>> how
> >>>> to expose to userspace what this specific implementation can do, while
> >>>> at the same time *requiring* userpace to update the ratio dynamically.
> >>>> For something like this to work, userspace needs to have pre-existing
> >>>> information on how the SRC works.
> >>>
> >>> Yes, it's about abstraction. The user space wants to push data, read
> >>> data back
> >>> converted to the target rate and eventually modify the drift using a
> >>> control
> >>> managing clocks using own way. We can eventually assume, that if this
> >>> control
> >>> does not exist, the drift cannot be controlled. Also, nice thing is
> >>> that the
> >>> control has min and max values (range), so driver can specify the
> >>> drift range,
> >>> too.
> >>>
> >>> And again, look to "PCM Rate Shift 100000" control implementation in
> >>> sound/drivers/aloop.c. It would be nice to have the base offset for the
> >>> shift/drift/pitch value standardized.
> >>
> >> Thanks.
> >>
> >> But the ASRC driver I implemented is different, I just register one sound
> >> card, one device/subdevice.  but the ASRC hardware support 4 instances
> >> together, so user can open the card device 4 times to create 4 instances
> >> then the controls can only bind with compress streams.
> >
> > It's just a reason to add the subdevice code for the compress offload
> > layer like we have in other APIs for overall consistency. I'll try to
> > work on this.
>
> I thought this was supported already? I remember there was a request to
> enable more than one compressed stream for enhanced cross-fade support
> with different formats? That isn't supported with the single-device +
> PARTIAL_DRAIN method.
>
> Vinod?
>
> >> I think I can remove the 'SNDRV_COMPRESS_SRC_RATIO_MOD',
> >
> > Yes.
> >
> >> Only define a private type for driver,  which means only the ASRC driver
> >> and its user application know the type.
> >
> > The control API should be used for this IMHO.
>
> Agree, this would be a 'clean' split where the compress API is used for
> the data parts and the control parts used otherwise to alter the ratio
> or whatever else is needed.
>
> >> For the change in 'include/uapi/sound/compress_params.h",  should I
> >> keep them,  is there any other suggestion for them?
>
> You can add the SRC type but if you use a control for the parameters you
> don't need to add anything for the encoder options, do you?
>

Yes, to go further, I think we can use SND_AUDIOCODEC_PCM, then
the SRC type will be dropped.

But my understanding of the control means the .set_metadata() API, right?
As I said, the output rate, output format, and ratio modifier are applied to
the instances of ASRC,  which is the snd_compr_stream in driver.
so only the .set_metadata() API can be used for these purposes.

Best regards
Shengjiu Wang
Pierre-Louis Bossart Aug. 14, 2024, 9:40 a.m. UTC | #18
> Yes, to go further, I think we can use SND_AUDIOCODEC_PCM, then
> the SRC type will be dropped.

sounds good.

> But my understanding of the control means the .set_metadata() API, right?
> As I said, the output rate, output format, and ratio modifier are applied to
> the instances of ASRC,  which is the snd_compr_stream in driver.
> so only the .set_metadata() API can be used for these purposes.

Humm, this is more controversial.

The term 'metadata' really referred to known information present in
headers or additional ID3 tags and not in the compressed file itself.
The .set_metadata was assumed to be called ONCE before decoding.

But here you have a need to update the ratio modifier on a regular basis
to compensate for the drift. This isn't what this specific callback was
designed for. We could change and allow this callback to be used
multiple times, but then this could create problems for existing
implementations which cannot deal with modified metadata on the fly.

And then there's the problem of defining a 'key' for the metadata. the
definition of the key is a u32, so there's plenty of space for different
implementations, but a collision is possible. We'd need an agreement on
how to allocate keys to different solutions without changing the header
file for every implementation.

It sounds like we'd need a 'runtime params' callback - unless there's a
better trick to tie the control and compress layers?
Shengjiu Wang Aug. 14, 2024, 11:12 a.m. UTC | #19
On Wed, Aug 14, 2024 at 5:40 PM Pierre-Louis Bossart
<pierre-louis.bossart@linux.intel.com> wrote:
>
>
> > Yes, to go further, I think we can use SND_AUDIOCODEC_PCM, then
> > the SRC type will be dropped.
>
> sounds good.
>
> > But my understanding of the control means the .set_metadata() API, right?
> > As I said, the output rate, output format, and ratio modifier are applied to
> > the instances of ASRC,  which is the snd_compr_stream in driver.
> > so only the .set_metadata() API can be used for these purposes.
>
> Humm, this is more controversial.
>
> The term 'metadata' really referred to known information present in
> headers or additional ID3 tags and not in the compressed file itself.
> The .set_metadata was assumed to be called ONCE before decoding.
>
> But here you have a need to update the ratio modifier on a regular basis
> to compensate for the drift. This isn't what this specific callback was
> designed for. We could change and allow this callback to be used
> multiple times, but then this could create problems for existing
> implementations which cannot deal with modified metadata on the fly.

.set_metadata can be called multi times now, no need to change currently.

>
> And then there's the problem of defining a 'key' for the metadata. the
> definition of the key is a u32, so there's plenty of space for different
> implementations, but a collision is possible. We'd need an agreement on
> how to allocate keys to different solutions without changing the header
> file for every implementation.

Can we define a private space for each case?   For example the key larger
than 0x80000000 is private, each driver can define it by themself?

>
> It sounds like we'd need a 'runtime params' callback - unless there's a
> better trick to tie the control and compress layers?
>
Pierre-Louis Bossart Aug. 14, 2024, 11:58 a.m. UTC | #20
On 8/14/24 13:12, Shengjiu Wang wrote:
> On Wed, Aug 14, 2024 at 5:40 PM Pierre-Louis Bossart
> <pierre-louis.bossart@linux.intel.com> wrote:
>>
>>
>>> Yes, to go further, I think we can use SND_AUDIOCODEC_PCM, then
>>> the SRC type will be dropped.
>>
>> sounds good.
>>
>>> But my understanding of the control means the .set_metadata() API, right?
>>> As I said, the output rate, output format, and ratio modifier are applied to
>>> the instances of ASRC,  which is the snd_compr_stream in driver.
>>> so only the .set_metadata() API can be used for these purposes.
>>
>> Humm, this is more controversial.
>>
>> The term 'metadata' really referred to known information present in
>> headers or additional ID3 tags and not in the compressed file itself.
>> The .set_metadata was assumed to be called ONCE before decoding.
>>
>> But here you have a need to update the ratio modifier on a regular basis
>> to compensate for the drift. This isn't what this specific callback was
>> designed for. We could change and allow this callback to be used
>> multiple times, but then this could create problems for existing
>> implementations which cannot deal with modified metadata on the fly.
> 
> .set_metadata can be called multi times now, no need to change currently.

Not really, this set_metadata() callback is used only for gapless
transitions between tracks, see fcplay.c in tinycompress.

And now I am really confused because tinycompress uses an IOCTL directly:

	metadata.key = SNDRV_COMPRESS_ENCODER_PADDING;
	metadata.value[0] = mdata->encoder_padding;
	if (ioctl(compress->fd, SNDRV_COMPRESS_SET_METADATA, &metadata))

Whereas you want to use the ops callback directly from the control layer?

What would present a userspace program from using the ioctl directly
then? In that case, why do we need the control? I must be missing something.


>> And then there's the problem of defining a 'key' for the metadata. the
>> definition of the key is a u32, so there's plenty of space for different
>> implementations, but a collision is possible. We'd need an agreement on
>> how to allocate keys to different solutions without changing the header
>> file for every implementation.
> 
> Can we define a private space for each case?   For example the key larger
> than 0x80000000 is private, each driver can define it by themself?

that would be a possibility indeed - provided that the opens above are
straightened out.

>> It sounds like we'd need a 'runtime params' callback - unless there's a
>> better trick to tie the control and compress layers?
Jaroslav Kysela Aug. 14, 2024, 2:48 p.m. UTC | #21
On 14. 08. 24 13:58, Pierre-Louis Bossart wrote:
> 
> 
> On 8/14/24 13:12, Shengjiu Wang wrote:
>> On Wed, Aug 14, 2024 at 5:40 PM Pierre-Louis Bossart
>> <pierre-louis.bossart@linux.intel.com> wrote:
>>>
>>>
>>>> Yes, to go further, I think we can use SND_AUDIOCODEC_PCM, then
>>>> the SRC type will be dropped.
>>>
>>> sounds good.
>>>
>>>> But my understanding of the control means the .set_metadata() API, right?
>>>> As I said, the output rate, output format, and ratio modifier are applied to
>>>> the instances of ASRC,  which is the snd_compr_stream in driver.
>>>> so only the .set_metadata() API can be used for these purposes.
>>>
>>> Humm, this is more controversial.
>>>
>>> The term 'metadata' really referred to known information present in
>>> headers or additional ID3 tags and not in the compressed file itself.
>>> The .set_metadata was assumed to be called ONCE before decoding.
>>>
>>> But here you have a need to update the ratio modifier on a regular basis
>>> to compensate for the drift. This isn't what this specific callback was
>>> designed for. We could change and allow this callback to be used
>>> multiple times, but then this could create problems for existing
>>> implementations which cannot deal with modified metadata on the fly.
>>
>> .set_metadata can be called multi times now, no need to change currently.
> 
> Not really, this set_metadata() callback is used only for gapless
> transitions between tracks, see fcplay.c in tinycompress.
> 
> And now I am really confused because tinycompress uses an IOCTL directly:
> 
> 	metadata.key = SNDRV_COMPRESS_ENCODER_PADDING;
> 	metadata.value[0] = mdata->encoder_padding;
> 	if (ioctl(compress->fd, SNDRV_COMPRESS_SET_METADATA, &metadata))
> 
> Whereas you want to use the ops callback directly from the control layer?
> 
> What would present a userspace program from using the ioctl directly
> then? In that case, why do we need the control? I must be missing something.

The whole discussion is which place is more appropriate for the runtime 
controls (like the frequency shift). My opinion is, if we have a layer for 
this which can be used for presence of those controls and even range / type / 
notifications, we should use it.

The new/updated ioctls bounded only to active file descriptor does not allow 
to monitor those values outside.

>>> And then there's the problem of defining a 'key' for the metadata. the
>>> definition of the key is a u32, so there's plenty of space for different
>>> implementations, but a collision is possible. We'd need an agreement on
>>> how to allocate keys to different solutions without changing the header
>>> file for every implementation.
>>
>> Can we define a private space for each case?   For example the key larger
>> than 0x80000000 is private, each driver can define it by themself?
> 
> that would be a possibility indeed - provided that the opens above are
> straightened out.
> 
>>> It sounds like we'd need a 'runtime params' callback - unless there's a
>>> better trick to tie the control and compress layers?

I don't follow. If the compress driver code uses card/device/subdevice 
numbers, we can address the control properly. The problem is just that 
subdevice support in missing the current compress code / API.

For me, the compress_params.h changes may also require to pay attention to the 
encoding/decoding of the current compressed streams. So something like this 
may be more appropriate for the first step:

diff --git a/include/uapi/sound/compress_params.h 
b/include/uapi/sound/compress_params.h
index ddc77322d571..c664d15410eb 100644
--- a/include/uapi/sound/compress_params.h
+++ b/include/uapi/sound/compress_params.h
@@ -347,6 +347,8 @@ union snd_codec_options {
   * @modes: Supported modes. See SND_AUDIOMODE defines
   * @formats: Supported formats. See SND_AUDIOSTREAMFORMAT defines
   * @min_buffer: Minimum buffer size handled by codec implementation
+ * @pcm_formats: Output (for decoders) or input (for encoders)
+ *               PCM formats (required to accel mode, 0 for other modes)
   * @reserved: reserved for future use
   *
   * This structure provides a scalar value for profiles, modes and stream
@@ -370,7 +372,8 @@ struct snd_codec_desc {
         __u32 modes;
         __u32 formats;
         __u32 min_buffer;
-       __u32 reserved[15];
+       __u32 pcm_formats;
+       __u32 reserved[14];
  } __attribute__((packed, aligned(4)));

  /** struct snd_codec
@@ -395,6 +398,8 @@ struct snd_codec_desc {
   * @align: Block alignment in bytes of an audio sample.
   *             Only required for PCM or IEC formats.
   * @options: encoder-specific settings
+ * @pcm_format: Output (for decoders) or input (for encoders)
+ *               PCM formats (required to accel mode, 0 for other modes)
   * @reserved: reserved for future use
   */

@@ -411,7 +416,8 @@ struct snd_codec {
         __u32 format;
         __u32 align;
         union snd_codec_options options;
-       __u32 reserved[3];
+       __u32 pcm_format;
+       __u32 reserved[2];
  } __attribute__((packed, aligned(4)));

  #endif

Then the SRC extension may be like:

diff --git a/include/uapi/sound/compress_params.h 
b/include/uapi/sound/compress_params.h
index c664d15410eb..5d51ecba6d55 100644
--- a/include/uapi/sound/compress_params.h
+++ b/include/uapi/sound/compress_params.h
@@ -334,6 +334,14 @@ union snd_codec_options {
  	struct snd_dec_wma wma_d;
  	struct snd_dec_alac alac_d;
  	struct snd_dec_ape ape_d;
+	struct {
+		__u32 out_sample_rate;
+	} src_d;
+} __attribute__((packed, aligned(4)));
+
+struct snd_codec_desc_src {
+	__u32 out_sample_rate_min;
+	__u32 out_sample_rate_max;
  } __attribute__((packed, aligned(4)));

  /** struct snd_codec_desc - description of codec capabilities
@@ -349,6 +357,7 @@ union snd_codec_options {
   * @min_buffer: Minimum buffer size handled by codec implementation
   * @pcm_formats: Output (for decoders) or input (for encoders)
   *               PCM formats (required to accel mode, 0 for other modes)
+ * @u_space: union space (for codec dependent date)
   * @reserved: reserved for future use
   *
   * This structure provides a scalar value for profiles, modes and stream
@@ -373,7 +382,11 @@ struct snd_codec_desc {
  	__u32 formats;
  	__u32 min_buffer;
  	__u32 pcm_formats;
-	__u32 reserved[14];
+	union {
+		__u32 u_space[6];
+		struct snd_codec_desc_src src;
+	} __attribute__((packed, aligned(4)));
+	__u32 reserved[8];
  } __attribute__((packed, aligned(4)));

  /** struct snd_codec

This will allow to handshake the output rate between user space and kernel 
driver. Eventually we can use a rate bitmap to be more precise in "struct 
snd_codec_desc_src" (or combination of range/bitmap).

						Jaroslav
diff mbox series

Patch

diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h
index 98772b0cbcb7..8b2b72f94e26 100644
--- a/include/uapi/sound/compress_offload.h
+++ b/include/uapi/sound/compress_offload.h
@@ -112,10 +112,12 @@  struct snd_compr_codec_caps {
  * end of the track
  * @SNDRV_COMPRESS_ENCODER_DELAY: no of samples inserted by the encoder at the
  * beginning of the track
+ * @SNDRV_COMPRESS_SRC_RATIO_MOD: Resampling Ratio Modifier for sample rate converter
  */
 enum sndrv_compress_encoder {
 	SNDRV_COMPRESS_ENCODER_PADDING = 1,
 	SNDRV_COMPRESS_ENCODER_DELAY = 2,
+	SNDRV_COMPRESS_SRC_RATIO_MOD = 3,
 };
 
 /**
diff --git a/include/uapi/sound/compress_params.h b/include/uapi/sound/compress_params.h
index ddc77322d571..0843773ea6b4 100644
--- a/include/uapi/sound/compress_params.h
+++ b/include/uapi/sound/compress_params.h
@@ -43,7 +43,8 @@ 
 #define SND_AUDIOCODEC_BESPOKE               ((__u32) 0x0000000E)
 #define SND_AUDIOCODEC_ALAC                  ((__u32) 0x0000000F)
 #define SND_AUDIOCODEC_APE                   ((__u32) 0x00000010)
-#define SND_AUDIOCODEC_MAX                   SND_AUDIOCODEC_APE
+#define SND_AUDIOCODEC_SRC                   ((__u32) 0x00000011)
+#define SND_AUDIOCODEC_MAX                   SND_AUDIOCODEC_SRC
 
 /*
  * Profile and modes are listed with bit masks. This allows for a
@@ -324,6 +325,11 @@  struct snd_dec_ape {
 	__u32 seek_table_present;
 } __attribute__((packed, aligned(4)));
 
+struct snd_dec_src {
+	__u32 format_out;
+	__u32 rate_out;
+} __attribute__((packed, aligned(4)));
+
 union snd_codec_options {
 	struct snd_enc_wma wma;
 	struct snd_enc_vorbis vorbis;
@@ -334,6 +340,7 @@  union snd_codec_options {
 	struct snd_dec_wma wma_d;
 	struct snd_dec_alac alac_d;
 	struct snd_dec_ape ape_d;
+	struct snd_dec_src src;
 } __attribute__((packed, aligned(4)));
 
 /** struct snd_codec_desc - description of codec capabilities