Message ID | 1722940003-20126-2-git-send-email-shengjiu.wang@nxp.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | ASoC: fsl: add memory to memory function for ASRC | expand |
On 8/6/24 12:26, Shengjiu Wang wrote: > Add Sample Rate Converter(SRC) codec support, define the output > format and rate for SRC. > > Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com> > --- > include/uapi/sound/compress_offload.h | 2 ++ > include/uapi/sound/compress_params.h | 9 ++++++++- > 2 files changed, 10 insertions(+), 1 deletion(-) > > diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h > index 98772b0cbcb7..8b2b72f94e26 100644 > --- a/include/uapi/sound/compress_offload.h > +++ b/include/uapi/sound/compress_offload.h > @@ -112,10 +112,12 @@ struct snd_compr_codec_caps { > * end of the track > * @SNDRV_COMPRESS_ENCODER_DELAY: no of samples inserted by the encoder at the > * beginning of the track > + * @SNDRV_COMPRESS_SRC_RATIO_MOD: Resampling Ratio Modifier for sample rate converter > */ > enum sndrv_compress_encoder { > SNDRV_COMPRESS_ENCODER_PADDING = 1, > SNDRV_COMPRESS_ENCODER_DELAY = 2, > + SNDRV_COMPRESS_SRC_RATIO_MOD = 3, > }; this sounds wrong to me. The sample rate converter is not an "encoder", and the properties for padding/delay are totally specific to an encoder function. The other point is that I am not sure how standard this ratio_mod parameter is. This could be totally specific to a specific implementation, and another ASRC might have different parameters. > > /** > diff --git a/include/uapi/sound/compress_params.h b/include/uapi/sound/compress_params.h > index ddc77322d571..0843773ea6b4 100644 > --- a/include/uapi/sound/compress_params.h > +++ b/include/uapi/sound/compress_params.h > @@ -43,7 +43,8 @@ > #define SND_AUDIOCODEC_BESPOKE ((__u32) 0x0000000E) > #define SND_AUDIOCODEC_ALAC ((__u32) 0x0000000F) > #define SND_AUDIOCODEC_APE ((__u32) 0x00000010) > -#define SND_AUDIOCODEC_MAX SND_AUDIOCODEC_APE > +#define SND_AUDIOCODEC_SRC ((__u32) 0x00000011) > +#define SND_AUDIOCODEC_MAX SND_AUDIOCODEC_SRC I am not sure this is wise to change such definitions? > > /* > * Profile and modes are listed with bit masks. This allows for a > @@ -324,6 +325,11 @@ struct snd_dec_ape { > __u32 seek_table_present; > } __attribute__((packed, aligned(4))); > > +struct snd_dec_src { > + __u32 format_out; > + __u32 rate_out; > +} __attribute__((packed, aligned(4))); Again I am not sure how standard those parameters are, and even if they were if their representation is reusable. And the compressed API has a good split between encoders and decoders, I am not sure how an SRC can be classified as either. > union snd_codec_options { > struct snd_enc_wma wma; > struct snd_enc_vorbis vorbis; > @@ -334,6 +340,7 @@ union snd_codec_options { > struct snd_dec_wma wma_d; > struct snd_dec_alac alac_d; > struct snd_dec_ape ape_d; > + struct snd_dec_src src; > } __attribute__((packed, aligned(4))); > > /** struct snd_codec_desc - description of codec capabilities
All- "The sample rate converter is not an encoder ..." Indeed, an encoder creates a compressed bitstream from audio data (typically linear PCM samples), normally for transmission of some type. A sample rate converter generates audio data from audio data, and is normally applied prior to an encoder because it can only accept a limited range of sample rates. -Jeff Quoting Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>: > On 8/6/24 12:26, Shengjiu Wang wrote: >> Add Sample Rate Converter(SRC) codec support, define the output >> format and rate for SRC. >> >> Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com> >> --- >> include/uapi/sound/compress_offload.h | 2 ++ >> include/uapi/sound/compress_params.h | 9 ++++++++- >> 2 files changed, 10 insertions(+), 1 deletion(-) >> >> diff --git a/include/uapi/sound/compress_offload.h >> b/include/uapi/sound/compress_offload.h >> index 98772b0cbcb7..8b2b72f94e26 100644 >> --- a/include/uapi/sound/compress_offload.h >> +++ b/include/uapi/sound/compress_offload.h >> @@ -112,10 +112,12 @@ struct snd_compr_codec_caps { >> * end of the track >> * @SNDRV_COMPRESS_ENCODER_DELAY: no of samples inserted by the >> encoder at the >> * beginning of the track >> + * @SNDRV_COMPRESS_SRC_RATIO_MOD: Resampling Ratio Modifier for >> sample rate converter >> */ >> enum sndrv_compress_encoder { >> SNDRV_COMPRESS_ENCODER_PADDING = 1, >> SNDRV_COMPRESS_ENCODER_DELAY = 2, >> + SNDRV_COMPRESS_SRC_RATIO_MOD = 3, >> }; > > this sounds wrong to me. The sample rate converter is not an "encoder", > and the properties for padding/delay are totally specific to an encoder > function. > > The other point is that I am not sure how standard this ratio_mod > parameter is. This could be totally specific to a specific > implementation, and another ASRC might have different parameters. > >> >> /** >> diff --git a/include/uapi/sound/compress_params.h >> b/include/uapi/sound/compress_params.h >> index ddc77322d571..0843773ea6b4 100644 >> --- a/include/uapi/sound/compress_params.h >> +++ b/include/uapi/sound/compress_params.h >> @@ -43,7 +43,8 @@ >> #define SND_AUDIOCODEC_BESPOKE ((__u32) 0x0000000E) >> #define SND_AUDIOCODEC_ALAC ((__u32) 0x0000000F) >> #define SND_AUDIOCODEC_APE ((__u32) 0x00000010) >> -#define SND_AUDIOCODEC_MAX SND_AUDIOCODEC_APE >> +#define SND_AUDIOCODEC_SRC ((__u32) 0x00000011) >> +#define SND_AUDIOCODEC_MAX SND_AUDIOCODEC_SRC > > I am not sure this is wise to change such definitions? >> >> /* >> * Profile and modes are listed with bit masks. This allows for a >> @@ -324,6 +325,11 @@ struct snd_dec_ape { >> __u32 seek_table_present; >> } __attribute__((packed, aligned(4))); >> >> +struct snd_dec_src { >> + __u32 format_out; >> + __u32 rate_out; >> +} __attribute__((packed, aligned(4))); > > Again I am not sure how standard those parameters are, and even if they > were if their representation is reusable. > > And the compressed API has a good split between encoders and decoders, I > am not sure how an SRC can be classified as either. > >> union snd_codec_options { >> struct snd_enc_wma wma; >> struct snd_enc_vorbis vorbis; >> @@ -334,6 +340,7 @@ union snd_codec_options { >> struct snd_dec_wma wma_d; >> struct snd_dec_alac alac_d; >> struct snd_dec_ape ape_d; >> + struct snd_dec_src src; >> } __attribute__((packed, aligned(4))); >> >> /** struct snd_codec_desc - description of codec capabilities
On Tue, Aug 6, 2024 at 7:25 PM Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com> wrote: > > > > On 8/6/24 12:26, Shengjiu Wang wrote: > > Add Sample Rate Converter(SRC) codec support, define the output > > format and rate for SRC. > > > > Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com> > > --- > > include/uapi/sound/compress_offload.h | 2 ++ > > include/uapi/sound/compress_params.h | 9 ++++++++- > > 2 files changed, 10 insertions(+), 1 deletion(-) > > > > diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h > > index 98772b0cbcb7..8b2b72f94e26 100644 > > --- a/include/uapi/sound/compress_offload.h > > +++ b/include/uapi/sound/compress_offload.h > > @@ -112,10 +112,12 @@ struct snd_compr_codec_caps { > > * end of the track > > * @SNDRV_COMPRESS_ENCODER_DELAY: no of samples inserted by the encoder at the > > * beginning of the track > > + * @SNDRV_COMPRESS_SRC_RATIO_MOD: Resampling Ratio Modifier for sample rate converter > > */ > > enum sndrv_compress_encoder { > > SNDRV_COMPRESS_ENCODER_PADDING = 1, > > SNDRV_COMPRESS_ENCODER_DELAY = 2, > > + SNDRV_COMPRESS_SRC_RATIO_MOD = 3, > > }; > > this sounds wrong to me. The sample rate converter is not an "encoder", > and the properties for padding/delay are totally specific to an encoder > function. There is only decoder and encoder definition for compress, I know it is difficult to add SRC to encoder or decoder classification, SRC is a Post Processing. I hope you can have a recommandation. Thanks. Best regards Shengjiu Wang > > The other point is that I am not sure how standard this ratio_mod > parameter is. This could be totally specific to a specific > implementation, and another ASRC might have different parameters. > > > > > /** > > diff --git a/include/uapi/sound/compress_params.h b/include/uapi/sound/compress_params.h > > index ddc77322d571..0843773ea6b4 100644 > > --- a/include/uapi/sound/compress_params.h > > +++ b/include/uapi/sound/compress_params.h > > @@ -43,7 +43,8 @@ > > #define SND_AUDIOCODEC_BESPOKE ((__u32) 0x0000000E) > > #define SND_AUDIOCODEC_ALAC ((__u32) 0x0000000F) > > #define SND_AUDIOCODEC_APE ((__u32) 0x00000010) > > -#define SND_AUDIOCODEC_MAX SND_AUDIOCODEC_APE > > +#define SND_AUDIOCODEC_SRC ((__u32) 0x00000011) > > +#define SND_AUDIOCODEC_MAX SND_AUDIOCODEC_SRC > > I am not sure this is wise to change such definitions? > > > > /* > > * Profile and modes are listed with bit masks. This allows for a > > @@ -324,6 +325,11 @@ struct snd_dec_ape { > > __u32 seek_table_present; > > } __attribute__((packed, aligned(4))); > > > > +struct snd_dec_src { > > + __u32 format_out; > > + __u32 rate_out; > > +} __attribute__((packed, aligned(4))); > > Again I am not sure how standard those parameters are, and even if they > were if their representation is reusable. > > And the compressed API has a good split between encoders and decoders, I > am not sure how an SRC can be classified as either. > > > union snd_codec_options { > > struct snd_enc_wma wma; > > struct snd_enc_vorbis vorbis; > > @@ -334,6 +340,7 @@ union snd_codec_options { > > struct snd_dec_wma wma_d; > > struct snd_dec_alac alac_d; > > struct snd_dec_ape ape_d; > > + struct snd_dec_src src; > > } __attribute__((packed, aligned(4))); > > > > /** struct snd_codec_desc - description of codec capabilities >
On 8/8/24 11:17, Shengjiu Wang wrote: > On Tue, Aug 6, 2024 at 7:25 PM Pierre-Louis Bossart > <pierre-louis.bossart@linux.intel.com> wrote: >> >> >> >> On 8/6/24 12:26, Shengjiu Wang wrote: >>> Add Sample Rate Converter(SRC) codec support, define the output >>> format and rate for SRC. >>> >>> Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com> >>> --- >>> include/uapi/sound/compress_offload.h | 2 ++ >>> include/uapi/sound/compress_params.h | 9 ++++++++- >>> 2 files changed, 10 insertions(+), 1 deletion(-) >>> >>> diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h >>> index 98772b0cbcb7..8b2b72f94e26 100644 >>> --- a/include/uapi/sound/compress_offload.h >>> +++ b/include/uapi/sound/compress_offload.h >>> @@ -112,10 +112,12 @@ struct snd_compr_codec_caps { >>> * end of the track >>> * @SNDRV_COMPRESS_ENCODER_DELAY: no of samples inserted by the encoder at the >>> * beginning of the track >>> + * @SNDRV_COMPRESS_SRC_RATIO_MOD: Resampling Ratio Modifier for sample rate converter >>> */ >>> enum sndrv_compress_encoder { >>> SNDRV_COMPRESS_ENCODER_PADDING = 1, >>> SNDRV_COMPRESS_ENCODER_DELAY = 2, >>> + SNDRV_COMPRESS_SRC_RATIO_MOD = 3, >>> }; >> >> this sounds wrong to me. The sample rate converter is not an "encoder", >> and the properties for padding/delay are totally specific to an encoder >> function. > > There is only decoder and encoder definition for compress, I know > it is difficult to add SRC to encoder or decoder classification, > SRC is a Post Processing. I hope you can have a recommandation I don't. I think we're blurring layers in a really odd way. The main reason why the compress API was added is to remove the byte-to-time conversions. But that's clearly not useful for a post-processing of PCM data, where the bitrate is constant. It really feels like we're adding this memory-to-memory API to the compress framework because we don't have anything else available, not because it makes sense to do so. Then there's the issue of parameters, we chose to only add parameters for standard encoders/decoders. Post-processing is highly specific and the parameter definitions varies from one implementation to another - and usually parameters are handled in an opaque way with binary controls. This is best handled with a UUID that needs to be known only to applications and low-level firmware/hardware, the kernel code should not have to be modified for each and every processing and to add new parameters. It just does not scale and it's unmaintainable. At the very least if you really want to use this compress API, extend it to use a non-descript "UUID-defined" type and an opaque set of parameters with this UUID passed in a header.
On 08. 08. 24 13:27, Pierre-Louis Bossart wrote: > > > On 8/8/24 11:17, Shengjiu Wang wrote: >> On Tue, Aug 6, 2024 at 7:25 PM Pierre-Louis Bossart >> <pierre-louis.bossart@linux.intel.com> wrote: >>> >>> >>> >>> On 8/6/24 12:26, Shengjiu Wang wrote: >>>> Add Sample Rate Converter(SRC) codec support, define the output >>>> format and rate for SRC. >>>> >>>> Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com> >>>> --- >>>> include/uapi/sound/compress_offload.h | 2 ++ >>>> include/uapi/sound/compress_params.h | 9 ++++++++- >>>> 2 files changed, 10 insertions(+), 1 deletion(-) >>>> >>>> diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h >>>> index 98772b0cbcb7..8b2b72f94e26 100644 >>>> --- a/include/uapi/sound/compress_offload.h >>>> +++ b/include/uapi/sound/compress_offload.h >>>> @@ -112,10 +112,12 @@ struct snd_compr_codec_caps { >>>> * end of the track >>>> * @SNDRV_COMPRESS_ENCODER_DELAY: no of samples inserted by the encoder at the >>>> * beginning of the track >>>> + * @SNDRV_COMPRESS_SRC_RATIO_MOD: Resampling Ratio Modifier for sample rate converter >>>> */ >>>> enum sndrv_compress_encoder { >>>> SNDRV_COMPRESS_ENCODER_PADDING = 1, >>>> SNDRV_COMPRESS_ENCODER_DELAY = 2, >>>> + SNDRV_COMPRESS_SRC_RATIO_MOD = 3, >>>> }; >>> >>> this sounds wrong to me. The sample rate converter is not an "encoder", >>> and the properties for padding/delay are totally specific to an encoder >>> function. >> >> There is only decoder and encoder definition for compress, I know >> it is difficult to add SRC to encoder or decoder classification, >> SRC is a Post Processing. I hope you can have a recommandation > > I don't. I think we're blurring layers in a really odd way. > > The main reason why the compress API was added is to remove the > byte-to-time conversions. But that's clearly not useful for a > post-processing of PCM data, where the bitrate is constant. It really > feels like we're adding this memory-to-memory API to the compress > framework because we don't have anything else available, not because it > makes sense to do so. It makes sense to offload decoder/encoder tasks as batch processing for standard compress stream and return back result (PCM stream or encoded stream) to user space. So it makes sense to use the compress interface (parameters handshake) for it. Let's talk about the proper SRC extension. For SRC and dynamic rate modification. I would just create an ALSA control for this. We are already using the "PCM Rate Shift 100000" control in the sound/drivers/aloop.c for this purpose (something like pitch in MIDI) for example. There is no requirement to add this function through metadata ioctls. As bonus, this control can be monitored with multiple tasks. > Then there's the issue of parameters, we chose to only add parameters > for standard encoders/decoders. Post-processing is highly specific and > the parameter definitions varies from one implementation to another - > and usually parameters are handled in an opaque way with binary > controls. This is best handled with a UUID that needs to be known only > to applications and low-level firmware/hardware, the kernel code should > not have to be modified for each and every processing and to add new > parameters. It just does not scale and it's unmaintainable. > > At the very least if you really want to use this compress API, extend it > to use a non-descript "UUID-defined" type and an opaque set of > parameters with this UUID passed in a header. We don't need to use UUID-defined scheme for simple (A)SRC implementation. As I noted, the specific runtime controls may use existing ALSA control API. Jaroslav
files changed, 10 insertions(+), 1 deletion(-) >>>>> >>>>> diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/ >>>>> sound/compress_offload.h >>>>> index 98772b0cbcb7..8b2b72f94e26 100644 >>>>> --- a/include/uapi/sound/compress_offload.h >>>>> +++ b/include/uapi/sound/compress_offload.h >>>>> @@ -112,10 +112,12 @@ struct snd_compr_codec_caps { >>>>> * end of the track >>>>> * @SNDRV_COMPRESS_ENCODER_DELAY: no of samples inserted by the >>>>> encoder at the >>>>> * beginning of the track >>>>> + * @SNDRV_COMPRESS_SRC_RATIO_MOD: Resampling Ratio Modifier for >>>>> sample rate converter >>>>> */ >>>>> enum sndrv_compress_encoder { >>>>> SNDRV_COMPRESS_ENCODER_PADDING = 1, >>>>> SNDRV_COMPRESS_ENCODER_DELAY = 2, >>>>> + SNDRV_COMPRESS_SRC_RATIO_MOD = 3, >>>>> }; >>>> >>>> this sounds wrong to me. The sample rate converter is not an "encoder", >>>> and the properties for padding/delay are totally specific to an encoder >>>> function. >>> >>> There is only decoder and encoder definition for compress, I know >>> it is difficult to add SRC to encoder or decoder classification, >>> SRC is a Post Processing. I hope you can have a recommandation >> >> I don't. I think we're blurring layers in a really odd way. >> >> The main reason why the compress API was added is to remove the >> byte-to-time conversions. But that's clearly not useful for a >> post-processing of PCM data, where the bitrate is constant. It really >> feels like we're adding this memory-to-memory API to the compress >> framework because we don't have anything else available, not because it >> makes sense to do so. > > It makes sense to offload decoder/encoder tasks as batch processing for > standard compress stream and return back result (PCM stream or encoded > stream) to user space. So it makes sense to use the compress interface > (parameters handshake) for it. Let's talk about the proper SRC extension. > > For SRC and dynamic rate modification. I would just create an ALSA > control for this. We are already using the "PCM Rate Shift 100000" > control in the sound/drivers/aloop.c for this purpose (something like > pitch in MIDI) for example. There is no requirement to add this function > through metadata ioctls. As bonus, this control can be monitored with > multiple tasks. this wouldn't work when the rate is estimated in firmware/hardware, which is precisely what the 'asynchronous' part of ASRC does. >> Then there's the issue of parameters, we chose to only add parameters >> for standard encoders/decoders. Post-processing is highly specific and >> the parameter definitions varies from one implementation to another - >> and usually parameters are handled in an opaque way with binary >> controls. This is best handled with a UUID that needs to be known only >> to applications and low-level firmware/hardware, the kernel code should >> not have to be modified for each and every processing and to add new >> parameters. It just does not scale and it's unmaintainable. >> >> At the very least if you really want to use this compress API, extend it >> to use a non-descript "UUID-defined" type and an opaque set of >> parameters with this UUID passed in a header. > > We don't need to use UUID-defined scheme for simple (A)SRC > implementation. As I noted, the specific runtime controls may use > existing ALSA control API. "Simple (A)SRC" is an oxymoron. There are multiple ways to define the performance, and how the drift estimator is handled. There's nothing simple if you look under the hood. The SOF implementation has for example those parameters: uint32_t source_rate; /**< Define fixed source rate or */ /**< use 0 to indicate need to get */ /**< the rate from stream */ uint32_t sink_rate; /**< Define fixed sink rate or */ /**< use 0 to indicate need to get */ /**< the rate from stream */ uint32_t asynchronous_mode; /**< synchronous 0, asynchronous 1 */ /**< When 1 the ASRC tracks and */ /**< compensates for drift. */ uint32_t operation_mode; /**< push 0, pull 1, In push mode the */ /**< ASRC consumes a defined number */ /**< of frames at input, with varying */ /**< number of frames at output. */ /**< In pull mode the ASRC outputs */ /**< a defined number of frames while */ /**< number of input frames varies. */ They are clearly different from what is suggested above with a 'ratio-mod'. Same if you have a 'simple EQ'. there are dozens of ways to implement the functionality with FIR, IIR or a combination of the two, and multiple bands. The point is that you have to think upfront about a generic way to pass parameters. We didn't have to do it for encoders/decoders because we only catered to well-documented standard solutions only. By choosing to support PCM processing, a new can of worms is now open. I repeat: please do not make the mistake of listing all processing with an enum and a new structure for parameters every time someone needs a specific transform in their pipeline. We made that mistake with SOF and had to backtrack rather quickly. The only way to scale is an identifier that is NOT included in the kernel code but is known to higher and lower-levels only.
On 08. 08. 24 14:19, Pierre-Louis Bossart wrote: > files changed, 10 insertions(+), 1 deletion(-) >>>>>> >>>>>> diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/ >>>>>> sound/compress_offload.h >>>>>> index 98772b0cbcb7..8b2b72f94e26 100644 >>>>>> --- a/include/uapi/sound/compress_offload.h >>>>>> +++ b/include/uapi/sound/compress_offload.h >>>>>> @@ -112,10 +112,12 @@ struct snd_compr_codec_caps { >>>>>> * end of the track >>>>>> * @SNDRV_COMPRESS_ENCODER_DELAY: no of samples inserted by the >>>>>> encoder at the >>>>>> * beginning of the track >>>>>> + * @SNDRV_COMPRESS_SRC_RATIO_MOD: Resampling Ratio Modifier for >>>>>> sample rate converter >>>>>> */ >>>>>> enum sndrv_compress_encoder { >>>>>> SNDRV_COMPRESS_ENCODER_PADDING = 1, >>>>>> SNDRV_COMPRESS_ENCODER_DELAY = 2, >>>>>> + SNDRV_COMPRESS_SRC_RATIO_MOD = 3, >>>>>> }; >>>>> >>>>> this sounds wrong to me. The sample rate converter is not an "encoder", >>>>> and the properties for padding/delay are totally specific to an encoder >>>>> function. >>>> >>>> There is only decoder and encoder definition for compress, I know >>>> it is difficult to add SRC to encoder or decoder classification, >>>> SRC is a Post Processing. I hope you can have a recommandation >>> >>> I don't. I think we're blurring layers in a really odd way. >>> >>> The main reason why the compress API was added is to remove the >>> byte-to-time conversions. But that's clearly not useful for a >>> post-processing of PCM data, where the bitrate is constant. It really >>> feels like we're adding this memory-to-memory API to the compress >>> framework because we don't have anything else available, not because it >>> makes sense to do so. >> >> It makes sense to offload decoder/encoder tasks as batch processing for >> standard compress stream and return back result (PCM stream or encoded >> stream) to user space. So it makes sense to use the compress interface >> (parameters handshake) for it. Let's talk about the proper SRC extension. >> >> For SRC and dynamic rate modification. I would just create an ALSA >> control for this. We are already using the "PCM Rate Shift 100000" >> control in the sound/drivers/aloop.c for this purpose (something like >> pitch in MIDI) for example. There is no requirement to add this function >> through metadata ioctls. As bonus, this control can be monitored with >> multiple tasks. > > this wouldn't work when the rate is estimated in firmware/hardware, > which is precisely what the 'asynchronous' part of ASRC does. > > >>> Then there's the issue of parameters, we chose to only add parameters >>> for standard encoders/decoders. Post-processing is highly specific and >>> the parameter definitions varies from one implementation to another - >>> and usually parameters are handled in an opaque way with binary >>> controls. This is best handled with a UUID that needs to be known only >>> to applications and low-level firmware/hardware, the kernel code should >>> not have to be modified for each and every processing and to add new >>> parameters. It just does not scale and it's unmaintainable. >>> >>> At the very least if you really want to use this compress API, extend it >>> to use a non-descript "UUID-defined" type and an opaque set of >>> parameters with this UUID passed in a header. >> >> We don't need to use UUID-defined scheme for simple (A)SRC >> implementation. As I noted, the specific runtime controls may use >> existing ALSA control API. > > "Simple (A)SRC" is an oxymoron. There are multiple ways to define the > performance, and how the drift estimator is handled. There's nothing > simple if you look under the hood. The SOF implementation has for > example those parameters: > > uint32_t source_rate; /**< Define fixed source rate or */ > /**< use 0 to indicate need to get */ > /**< the rate from stream */ > uint32_t sink_rate; /**< Define fixed sink rate or */ > /**< use 0 to indicate need to get */ > /**< the rate from stream */ > uint32_t asynchronous_mode; /**< synchronous 0, asynchronous 1 */ > /**< When 1 the ASRC tracks and */ > /**< compensates for drift. */ > uint32_t operation_mode; /**< push 0, pull 1, In push mode the */ > /**< ASRC consumes a defined number */ > /**< of frames at input, with varying */ > /**< number of frames at output. */ > /**< In pull mode the ASRC outputs */ > /**< a defined number of frames while */ > /**< number of input frames varies. */ > > They are clearly different from what is suggested above with a 'ratio-mod'. I don't think so. The proposed (A)SRC for compress-accel is just one case for the above configs where the input is known and output is controlled by the requested rate. The I/O mechanism is abstracted enough in this case and the driver/hardware/firmware must follow it. > Same if you have a 'simple EQ'. there are dozens of ways to implement > the functionality with FIR, IIR or a combination of the two, and > multiple bands. > > The point is that you have to think upfront about a generic way to pass > parameters. We didn't have to do it for encoders/decoders because we > only catered to well-documented standard solutions only. By choosing to > support PCM processing, a new can of worms is now open. > > I repeat: please do not make the mistake of listing all processing with > an enum and a new structure for parameters every time someone needs a > specific transform in their pipeline. We made that mistake with SOF and > had to backtrack rather quickly. The only way to scale is an identifier > that is NOT included in the kernel code but is known to higher and > lower-levels only. There are two ways - black box (UUID - as you suggested) - or well defined purpose (abstraction). For your example 'simple EQ', the parameters should be the band (frequency range) volume values. It's abstract and the real filters (resp. implementation) used behind may depend on the hardware/driver capabilities. From my view, the really special cases may be handled as black box, but others like (A)SRC should follow some well-defined abstraction IMHO to not force user space to handle all special cases. Jaroslav
>>>> Then there's the issue of parameters, we chose to only add parameters >>>> for standard encoders/decoders. Post-processing is highly specific and >>>> the parameter definitions varies from one implementation to another - >>>> and usually parameters are handled in an opaque way with binary >>>> controls. This is best handled with a UUID that needs to be known only >>>> to applications and low-level firmware/hardware, the kernel code should >>>> not have to be modified for each and every processing and to add new >>>> parameters. It just does not scale and it's unmaintainable. >>>> >>>> At the very least if you really want to use this compress API, >>>> extend it >>>> to use a non-descript "UUID-defined" type and an opaque set of >>>> parameters with this UUID passed in a header. >>> >>> We don't need to use UUID-defined scheme for simple (A)SRC >>> implementation. As I noted, the specific runtime controls may use >>> existing ALSA control API. >> >> "Simple (A)SRC" is an oxymoron. There are multiple ways to define the >> performance, and how the drift estimator is handled. There's nothing >> simple if you look under the hood. The SOF implementation has for >> example those parameters: >> >> uint32_t source_rate; /**< Define fixed source rate or */ >> /**< use 0 to indicate need to get */ >> /**< the rate from stream */ >> uint32_t sink_rate; /**< Define fixed sink rate or */ >> /**< use 0 to indicate need to get */ >> /**< the rate from stream */ >> uint32_t asynchronous_mode; /**< synchronous 0, asynchronous 1 */ >> /**< When 1 the ASRC tracks and */ >> /**< compensates for drift. */ >> uint32_t operation_mode; /**< push 0, pull 1, In push mode the */ >> /**< ASRC consumes a defined number */ >> /**< of frames at input, with varying */ >> /**< number of frames at output. */ >> /**< In pull mode the ASRC outputs */ >> /**< a defined number of frames while */ >> /**< number of input frames varies. */ >> >> They are clearly different from what is suggested above with a 'ratio- >> mod'. > > I don't think so. The proposed (A)SRC for compress-accel is just one > case for the above configs where the input is known and output is > controlled by the requested rate. The I/O mechanism is abstracted enough > in this case and the driver/hardware/firmware must follow it. ASRC is usually added when the nominal rates are known but the clock sources differ and the drift needs to be estimated at run-time and the coefficients or interpolation modified dynamically If the ratio is known exactly and there's no clock drift, then it's a different problem where the filter coefficients are constant. >> Same if you have a 'simple EQ'. there are dozens of ways to implement >> the functionality with FIR, IIR or a combination of the two, and >> multiple bands. >> >> The point is that you have to think upfront about a generic way to pass >> parameters. We didn't have to do it for encoders/decoders because we >> only catered to well-documented standard solutions only. By choosing to >> support PCM processing, a new can of worms is now open. >> >> I repeat: please do not make the mistake of listing all processing with >> an enum and a new structure for parameters every time someone needs a >> specific transform in their pipeline. We made that mistake with SOF and >> had to backtrack rather quickly. The only way to scale is an identifier >> that is NOT included in the kernel code but is known to higher and >> lower-levels only. > > There are two ways - black box (UUID - as you suggested) - or well > defined purpose (abstraction). For your example 'simple EQ', the > parameters should be the band (frequency range) volume values. It's > abstract and the real filters (resp. implementation) used behind may > depend on the hardware/driver capabilities. Indeed there is a possibility that the parameters are high-level, but that would require firmware or hardware to be able to generate actual coefficients from those parameters. That usually requires some advanced math which isn't necessarily obvious to implement with fixed-point hardware. > From my view, the really special cases may be handled as black box, but > others like (A)SRC should follow some well-defined abstraction IMHO to > not force user space to handle all special cases. I am not against the high-level abstractions, e.g. along the lines of what Android defined: https://developer.android.com/reference/android/media/audiofx/AudioEffect That's not sufficient however, we also need to make sure there's an ability to provide pre-computed coefficients in an opaque manner for processing that doesn't fit in the well-defined cases. In practice there are very few 3rd party IP that fits in well-defined cases, everyone has secret-sauce parameters and options.
On Fri, Aug 9, 2024 at 3:25 PM Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com> wrote: > > > >>>> Then there's the issue of parameters, we chose to only add parameters > >>>> for standard encoders/decoders. Post-processing is highly specific and > >>>> the parameter definitions varies from one implementation to another - > >>>> and usually parameters are handled in an opaque way with binary > >>>> controls. This is best handled with a UUID that needs to be known only > >>>> to applications and low-level firmware/hardware, the kernel code should > >>>> not have to be modified for each and every processing and to add new > >>>> parameters. It just does not scale and it's unmaintainable. > >>>> > >>>> At the very least if you really want to use this compress API, > >>>> extend it > >>>> to use a non-descript "UUID-defined" type and an opaque set of > >>>> parameters with this UUID passed in a header. > >>> > >>> We don't need to use UUID-defined scheme for simple (A)SRC > >>> implementation. As I noted, the specific runtime controls may use > >>> existing ALSA control API. > >> > >> "Simple (A)SRC" is an oxymoron. There are multiple ways to define the > >> performance, and how the drift estimator is handled. There's nothing > >> simple if you look under the hood. The SOF implementation has for > >> example those parameters: > >> > >> uint32_t source_rate; /**< Define fixed source rate or */ > >> /**< use 0 to indicate need to get */ > >> /**< the rate from stream */ > >> uint32_t sink_rate; /**< Define fixed sink rate or */ > >> /**< use 0 to indicate need to get */ > >> /**< the rate from stream */ > >> uint32_t asynchronous_mode; /**< synchronous 0, asynchronous 1 */ > >> /**< When 1 the ASRC tracks and */ > >> /**< compensates for drift. */ > >> uint32_t operation_mode; /**< push 0, pull 1, In push mode the */ > >> /**< ASRC consumes a defined number */ > >> /**< of frames at input, with varying */ > >> /**< number of frames at output. */ > >> /**< In pull mode the ASRC outputs */ > >> /**< a defined number of frames while */ > >> /**< number of input frames varies. */ > >> > >> They are clearly different from what is suggested above with a 'ratio- > >> mod'. > > > > I don't think so. The proposed (A)SRC for compress-accel is just one > > case for the above configs where the input is known and output is > > controlled by the requested rate. The I/O mechanism is abstracted enough > > in this case and the driver/hardware/firmware must follow it. > > ASRC is usually added when the nominal rates are known but the clock > sources differ and the drift needs to be estimated at run-time and the > coefficients or interpolation modified dynamically > > If the ratio is known exactly and there's no clock drift, then it's a > different problem where the filter coefficients are constant. > > >> Same if you have a 'simple EQ'. there are dozens of ways to implement > >> the functionality with FIR, IIR or a combination of the two, and > >> multiple bands. > >> > >> The point is that you have to think upfront about a generic way to pass > >> parameters. We didn't have to do it for encoders/decoders because we > >> only catered to well-documented standard solutions only. By choosing to > >> support PCM processing, a new can of worms is now open. > >> > >> I repeat: please do not make the mistake of listing all processing with > >> an enum and a new structure for parameters every time someone needs a > >> specific transform in their pipeline. We made that mistake with SOF and > >> had to backtrack rather quickly. The only way to scale is an identifier > >> that is NOT included in the kernel code but is known to higher and > >> lower-levels only. > > > > There are two ways - black box (UUID - as you suggested) - or well > > defined purpose (abstraction). For your example 'simple EQ', the > > parameters should be the band (frequency range) volume values. It's > > abstract and the real filters (resp. implementation) used behind may > > depend on the hardware/driver capabilities. > > Indeed there is a possibility that the parameters are high-level, but > that would require firmware or hardware to be able to generate actual > coefficients from those parameters. That usually requires some advanced > math which isn't necessarily obvious to implement with fixed-point hardware. > > > From my view, the really special cases may be handled as black box, but > > others like (A)SRC should follow some well-defined abstraction IMHO to > > not force user space to handle all special cases. > > I am not against the high-level abstractions, e.g. along the lines of > what Android defined: > https://developer.android.com/reference/android/media/audiofx/AudioEffect > > That's not sufficient however, we also need to make sure there's an > ability to provide pre-computed coefficients in an opaque manner for > processing that doesn't fit in the well-defined cases. In practice there > are very few 3rd party IP that fits in well-defined cases, everyone has > secret-sauce parameters and options. Appreciate the discussion. Let me explain the reason for the change: Why I use the metadata ioctl is because the ALSA controls are binding to the sound card. What I want is the controls can be bound to snd_compr_stream, because the ASRC compress sound card can support multi instances ( the ASRC can support multi conversion in parallel). The ALSA controls can't be used for this case, the only choice in current compress API is metadata ioctl. And metadata ioctl can be called many times which can meet the ratio modifier requirement (ratio may be drift on the fly) And compress API uses codec as the unit for capability query and parameter setting, So I think need to define "SND_AUDIOCODEC_SRC' and 'struct snd_dec_src', for the 'snd_dec_src' just defined output format and output rate, channels definition just reuse the snd_codec.ch_in. I understand your concern, but there seems no better option. If you have, please guide me. Thanks. Best regards Shengjiu Wang
> Why I use the metadata ioctl is because the ALSA controls are binding > to the sound card. What I want is the controls can be bound to > snd_compr_stream, because the ASRC compress sound card can > support multi instances ( the ASRC can support multi conversion in > parallel). The ALSA controls can't be used for this case, the only > choice in current compress API is metadata ioctl. I don't know if there is really a technical limitation for this, this is for Jaroslav to comment. I am not sure why it would be a problem to e.g. have a volume control prior to an encoder or after a decoder. > And metadata > ioctl can be called many times which can meet the ratio modifier > requirement (ratio may be drift on the fly) Interesting, that's yet another way of handling the drift with userspace modifying the ratio dynamically. That's different to what I've seen before. > And compress API uses codec as the unit for capability query and > parameter setting, So I think need to define "SND_AUDIOCODEC_SRC' > and 'struct snd_dec_src', for the 'snd_dec_src' just defined output > format and output rate, channels definition just reuse the snd_codec.ch_in. The capability query is an interesting point as well, it's not clear how to expose to userspace what this specific implementation can do, while at the same time *requiring* userpace to update the ratio dynamically. For something like this to work, userspace needs to have pre-existing information on how the SRC works.
On 09. 08. 24 12:14, Shengjiu Wang wrote: > On Fri, Aug 9, 2024 at 3:25 PM Pierre-Louis Bossart > <pierre-louis.bossart@linux.intel.com> wrote: >> >> >>>>>> Then there's the issue of parameters, we chose to only add parameters >>>>>> for standard encoders/decoders. Post-processing is highly specific and >>>>>> the parameter definitions varies from one implementation to another - >>>>>> and usually parameters are handled in an opaque way with binary >>>>>> controls. This is best handled with a UUID that needs to be known only >>>>>> to applications and low-level firmware/hardware, the kernel code should >>>>>> not have to be modified for each and every processing and to add new >>>>>> parameters. It just does not scale and it's unmaintainable. >>>>>> >>>>>> At the very least if you really want to use this compress API, >>>>>> extend it >>>>>> to use a non-descript "UUID-defined" type and an opaque set of >>>>>> parameters with this UUID passed in a header. >>>>> >>>>> We don't need to use UUID-defined scheme for simple (A)SRC >>>>> implementation. As I noted, the specific runtime controls may use >>>>> existing ALSA control API. >>>> >>>> "Simple (A)SRC" is an oxymoron. There are multiple ways to define the >>>> performance, and how the drift estimator is handled. There's nothing >>>> simple if you look under the hood. The SOF implementation has for >>>> example those parameters: >>>> >>>> uint32_t source_rate; /**< Define fixed source rate or */ >>>> /**< use 0 to indicate need to get */ >>>> /**< the rate from stream */ >>>> uint32_t sink_rate; /**< Define fixed sink rate or */ >>>> /**< use 0 to indicate need to get */ >>>> /**< the rate from stream */ >>>> uint32_t asynchronous_mode; /**< synchronous 0, asynchronous 1 */ >>>> /**< When 1 the ASRC tracks and */ >>>> /**< compensates for drift. */ >>>> uint32_t operation_mode; /**< push 0, pull 1, In push mode the */ >>>> /**< ASRC consumes a defined number */ >>>> /**< of frames at input, with varying */ >>>> /**< number of frames at output. */ >>>> /**< In pull mode the ASRC outputs */ >>>> /**< a defined number of frames while */ >>>> /**< number of input frames varies. */ >>>> >>>> They are clearly different from what is suggested above with a 'ratio- >>>> mod'. >>> >>> I don't think so. The proposed (A)SRC for compress-accel is just one >>> case for the above configs where the input is known and output is >>> controlled by the requested rate. The I/O mechanism is abstracted enough >>> in this case and the driver/hardware/firmware must follow it. >> >> ASRC is usually added when the nominal rates are known but the clock >> sources differ and the drift needs to be estimated at run-time and the >> coefficients or interpolation modified dynamically >> >> If the ratio is known exactly and there's no clock drift, then it's a >> different problem where the filter coefficients are constant. >> >>>> Same if you have a 'simple EQ'. there are dozens of ways to implement >>>> the functionality with FIR, IIR or a combination of the two, and >>>> multiple bands. >>>> >>>> The point is that you have to think upfront about a generic way to pass >>>> parameters. We didn't have to do it for encoders/decoders because we >>>> only catered to well-documented standard solutions only. By choosing to >>>> support PCM processing, a new can of worms is now open. >>>> >>>> I repeat: please do not make the mistake of listing all processing with >>>> an enum and a new structure for parameters every time someone needs a >>>> specific transform in their pipeline. We made that mistake with SOF and >>>> had to backtrack rather quickly. The only way to scale is an identifier >>>> that is NOT included in the kernel code but is known to higher and >>>> lower-levels only. >>> >>> There are two ways - black box (UUID - as you suggested) - or well >>> defined purpose (abstraction). For your example 'simple EQ', the >>> parameters should be the band (frequency range) volume values. It's >>> abstract and the real filters (resp. implementation) used behind may >>> depend on the hardware/driver capabilities. >> >> Indeed there is a possibility that the parameters are high-level, but >> that would require firmware or hardware to be able to generate actual >> coefficients from those parameters. That usually requires some advanced >> math which isn't necessarily obvious to implement with fixed-point hardware. >> >>> From my view, the really special cases may be handled as black box, but >>> others like (A)SRC should follow some well-defined abstraction IMHO to >>> not force user space to handle all special cases. >> >> I am not against the high-level abstractions, e.g. along the lines of >> what Android defined: >> https://developer.android.com/reference/android/media/audiofx/AudioEffect >> >> That's not sufficient however, we also need to make sure there's an >> ability to provide pre-computed coefficients in an opaque manner for >> processing that doesn't fit in the well-defined cases. In practice there >> are very few 3rd party IP that fits in well-defined cases, everyone has >> secret-sauce parameters and options. > > Appreciate the discussion. > > Let me explain the reason for the change: > > Why I use the metadata ioctl is because the ALSA controls are binding > to the sound card. What I want is the controls can be bound to > snd_compr_stream, because the ASRC compress sound card can > support multi instances ( the ASRC can support multi conversion in > parallel). The ALSA controls can't be used for this case, the only > choice in current compress API is metadata ioctl. And metadata > ioctl can be called many times which can meet the ratio modifier > requirement (ratio may be drift on the fly) This argument is not valid. The controls are bound to the card, but the element identifiers have already iface (interface), device and subdevice numbers. We are using controls for PCM devices for example. The binding is straight. Just add SNDRV_CTL_ELEM_IFACE_COMPRESS define and specify the compress device number in the 'struct snd_ctl_elem_id'. Jaroslav
On 09. 08. 24 14:52, Pierre-Louis Bossart wrote: >> And metadata >> ioctl can be called many times which can meet the ratio modifier >> requirement (ratio may be drift on the fly) > > Interesting, that's yet another way of handling the drift with userspace > modifying the ratio dynamically. That's different to what I've seen before. Note that the "timing" is managed by the user space with this scheme. >> And compress API uses codec as the unit for capability query and >> parameter setting, So I think need to define "SND_AUDIOCODEC_SRC' >> and 'struct snd_dec_src', for the 'snd_dec_src' just defined output >> format and output rate, channels definition just reuse the snd_codec.ch_in. > > The capability query is an interesting point as well, it's not clear how > to expose to userspace what this specific implementation can do, while > at the same time *requiring* userpace to update the ratio dynamically. > For something like this to work, userspace needs to have pre-existing > information on how the SRC works. Yes, it's about abstraction. The user space wants to push data, read data back converted to the target rate and eventually modify the drift using a control managing clocks using own way. We can eventually assume, that if this control does not exist, the drift cannot be controlled. Also, nice thing is that the control has min and max values (range), so driver can specify the drift range, too. And again, look to "PCM Rate Shift 100000" control implementation in sound/drivers/aloop.c. It would be nice to have the base offset for the shift/drift/pitch value standardized. Jaroslav
>>> And metadata >>> ioctl can be called many times which can meet the ratio modifier >>> requirement (ratio may be drift on the fly) >> >> Interesting, that's yet another way of handling the drift with userspace >> modifying the ratio dynamically. That's different to what I've seen >> before. > > Note that the "timing" is managed by the user space with this scheme. > >>> And compress API uses codec as the unit for capability query and >>> parameter setting, So I think need to define "SND_AUDIOCODEC_SRC' >>> and 'struct snd_dec_src', for the 'snd_dec_src' just defined output >>> format and output rate, channels definition just reuse the >>> snd_codec.ch_in. >> >> The capability query is an interesting point as well, it's not clear how >> to expose to userspace what this specific implementation can do, while >> at the same time *requiring* userpace to update the ratio dynamically. >> For something like this to work, userspace needs to have pre-existing >> information on how the SRC works. > > Yes, it's about abstraction. The user space wants to push data, read > data back converted to the target rate and eventually modify the drift > using a control managing clocks using own way. We can eventually assume, > that if this control does not exist, the drift cannot be controlled. > Also, nice thing is that the control has min and max values (range), so > driver can specify the drift range, too. This mode of operation would be fine, but if you add the SRC as part of the processing allowed in a compressed stream, it might be used in a 'regular' real-time pipeline and the arguments and implementation could be very different. In other words, this SRC support looks like an extension of the compress API for a very narrow usage restricted to the memory-to-memory case only. I worry a bit about the next steps... Are there going to be other types of PCM processing like this, and if yes, how would we know if they are intended to be used for the 'regular' compress API or for the memory-to-memory scheme?
On Fri, Aug 9, 2024 at 10:01 PM Jaroslav Kysela <perex@perex.cz> wrote: > > On 09. 08. 24 14:52, Pierre-Louis Bossart wrote: > > >> And metadata > >> ioctl can be called many times which can meet the ratio modifier > >> requirement (ratio may be drift on the fly) > > > > Interesting, that's yet another way of handling the drift with userspace > > modifying the ratio dynamically. That's different to what I've seen before. > > Note that the "timing" is managed by the user space with this scheme. > > >> And compress API uses codec as the unit for capability query and > >> parameter setting, So I think need to define "SND_AUDIOCODEC_SRC' > >> and 'struct snd_dec_src', for the 'snd_dec_src' just defined output > >> format and output rate, channels definition just reuse the snd_codec.ch_in. > > > > The capability query is an interesting point as well, it's not clear how > > to expose to userspace what this specific implementation can do, while > > at the same time *requiring* userpace to update the ratio dynamically. > > For something like this to work, userspace needs to have pre-existing > > information on how the SRC works. > > Yes, it's about abstraction. The user space wants to push data, read data back > converted to the target rate and eventually modify the drift using a control > managing clocks using own way. We can eventually assume, that if this control > does not exist, the drift cannot be controlled. Also, nice thing is that the > control has min and max values (range), so driver can specify the drift range, > too. > > And again, look to "PCM Rate Shift 100000" control implementation in > sound/drivers/aloop.c. It would be nice to have the base offset for the > shift/drift/pitch value standardized. Thanks. But the ASRC driver I implemented is different, I just register one sound card, one device/subdevice. but the ASRC hardware support 4 instances together, so user can open the card device 4 times to create 4 instances then the controls can only bind with compress streams. I think I can remove the 'SNDRV_COMPRESS_SRC_RATIO_MOD', Only define a private type for driver, which means only the ASRC driver and its user application know the type. For the change in 'include/uapi/sound/compress_params.h", should I keep them, is there any other suggestion for them? Best regards Shengjiu Wang > > Jaroslav > > -- > Jaroslav Kysela <perex@perex.cz> > Linux Sound Maintainer; ALSA Project; Red Hat, Inc. >
On 12. 08. 24 12:24, Shengjiu Wang wrote: > On Fri, Aug 9, 2024 at 10:01 PM Jaroslav Kysela <perex@perex.cz> wrote: >> >> On 09. 08. 24 14:52, Pierre-Louis Bossart wrote: >> >>>> And metadata >>>> ioctl can be called many times which can meet the ratio modifier >>>> requirement (ratio may be drift on the fly) >>> >>> Interesting, that's yet another way of handling the drift with userspace >>> modifying the ratio dynamically. That's different to what I've seen before. >> >> Note that the "timing" is managed by the user space with this scheme. >> >>>> And compress API uses codec as the unit for capability query and >>>> parameter setting, So I think need to define "SND_AUDIOCODEC_SRC' >>>> and 'struct snd_dec_src', for the 'snd_dec_src' just defined output >>>> format and output rate, channels definition just reuse the snd_codec.ch_in. >>> >>> The capability query is an interesting point as well, it's not clear how >>> to expose to userspace what this specific implementation can do, while >>> at the same time *requiring* userpace to update the ratio dynamically. >>> For something like this to work, userspace needs to have pre-existing >>> information on how the SRC works. >> >> Yes, it's about abstraction. The user space wants to push data, read data back >> converted to the target rate and eventually modify the drift using a control >> managing clocks using own way. We can eventually assume, that if this control >> does not exist, the drift cannot be controlled. Also, nice thing is that the >> control has min and max values (range), so driver can specify the drift range, >> too. >> >> And again, look to "PCM Rate Shift 100000" control implementation in >> sound/drivers/aloop.c. It would be nice to have the base offset for the >> shift/drift/pitch value standardized. > > Thanks. > > But the ASRC driver I implemented is different, I just register one sound > card, one device/subdevice. but the ASRC hardware support 4 instances > together, so user can open the card device 4 times to create 4 instances > then the controls can only bind with compress streams. It's just a reason to add the subdevice code for the compress offload layer like we have in other APIs for overall consistency. I'll try to work on this. > I think I can remove the 'SNDRV_COMPRESS_SRC_RATIO_MOD', Yes. > Only define a private type for driver, which means only the ASRC driver > and its user application know the type. The control API should be used for this IMHO. > For the change in 'include/uapi/sound/compress_params.h", should I > keep them, is there any other suggestion for them? I don't see a problem there. Jaroslav
On 8/12/24 15:31, Jaroslav Kysela wrote: > On 12. 08. 24 12:24, Shengjiu Wang wrote: >> On Fri, Aug 9, 2024 at 10:01 PM Jaroslav Kysela <perex@perex.cz> wrote: >>> >>> On 09. 08. 24 14:52, Pierre-Louis Bossart wrote: >>> >>>>> And metadata >>>>> ioctl can be called many times which can meet the ratio modifier >>>>> requirement (ratio may be drift on the fly) >>>> >>>> Interesting, that's yet another way of handling the drift with >>>> userspace >>>> modifying the ratio dynamically. That's different to what I've seen >>>> before. >>> >>> Note that the "timing" is managed by the user space with this scheme. >>> >>>>> And compress API uses codec as the unit for capability query and >>>>> parameter setting, So I think need to define "SND_AUDIOCODEC_SRC' >>>>> and 'struct snd_dec_src', for the 'snd_dec_src' just defined output >>>>> format and output rate, channels definition just reuse the >>>>> snd_codec.ch_in. >>>> >>>> The capability query is an interesting point as well, it's not clear >>>> how >>>> to expose to userspace what this specific implementation can do, while >>>> at the same time *requiring* userpace to update the ratio dynamically. >>>> For something like this to work, userspace needs to have pre-existing >>>> information on how the SRC works. >>> >>> Yes, it's about abstraction. The user space wants to push data, read >>> data back >>> converted to the target rate and eventually modify the drift using a >>> control >>> managing clocks using own way. We can eventually assume, that if this >>> control >>> does not exist, the drift cannot be controlled. Also, nice thing is >>> that the >>> control has min and max values (range), so driver can specify the >>> drift range, >>> too. >>> >>> And again, look to "PCM Rate Shift 100000" control implementation in >>> sound/drivers/aloop.c. It would be nice to have the base offset for the >>> shift/drift/pitch value standardized. >> >> Thanks. >> >> But the ASRC driver I implemented is different, I just register one sound >> card, one device/subdevice. but the ASRC hardware support 4 instances >> together, so user can open the card device 4 times to create 4 instances >> then the controls can only bind with compress streams. > > It's just a reason to add the subdevice code for the compress offload > layer like we have in other APIs for overall consistency. I'll try to > work on this. I thought this was supported already? I remember there was a request to enable more than one compressed stream for enhanced cross-fade support with different formats? That isn't supported with the single-device + PARTIAL_DRAIN method. Vinod? >> I think I can remove the 'SNDRV_COMPRESS_SRC_RATIO_MOD', > > Yes. > >> Only define a private type for driver, which means only the ASRC driver >> and its user application know the type. > > The control API should be used for this IMHO. Agree, this would be a 'clean' split where the compress API is used for the data parts and the control parts used otherwise to alter the ratio or whatever else is needed. >> For the change in 'include/uapi/sound/compress_params.h", should I >> keep them, is there any other suggestion for them? You can add the SRC type but if you use a control for the parameters you don't need to add anything for the encoder options, do you?
On Mon, Aug 12, 2024 at 9:44 PM Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com> wrote: > > > > On 8/12/24 15:31, Jaroslav Kysela wrote: > > On 12. 08. 24 12:24, Shengjiu Wang wrote: > >> On Fri, Aug 9, 2024 at 10:01 PM Jaroslav Kysela <perex@perex.cz> wrote: > >>> > >>> On 09. 08. 24 14:52, Pierre-Louis Bossart wrote: > >>> > >>>>> And metadata > >>>>> ioctl can be called many times which can meet the ratio modifier > >>>>> requirement (ratio may be drift on the fly) > >>>> > >>>> Interesting, that's yet another way of handling the drift with > >>>> userspace > >>>> modifying the ratio dynamically. That's different to what I've seen > >>>> before. > >>> > >>> Note that the "timing" is managed by the user space with this scheme. > >>> > >>>>> And compress API uses codec as the unit for capability query and > >>>>> parameter setting, So I think need to define "SND_AUDIOCODEC_SRC' > >>>>> and 'struct snd_dec_src', for the 'snd_dec_src' just defined output > >>>>> format and output rate, channels definition just reuse the > >>>>> snd_codec.ch_in. > >>>> > >>>> The capability query is an interesting point as well, it's not clear > >>>> how > >>>> to expose to userspace what this specific implementation can do, while > >>>> at the same time *requiring* userpace to update the ratio dynamically. > >>>> For something like this to work, userspace needs to have pre-existing > >>>> information on how the SRC works. > >>> > >>> Yes, it's about abstraction. The user space wants to push data, read > >>> data back > >>> converted to the target rate and eventually modify the drift using a > >>> control > >>> managing clocks using own way. We can eventually assume, that if this > >>> control > >>> does not exist, the drift cannot be controlled. Also, nice thing is > >>> that the > >>> control has min and max values (range), so driver can specify the > >>> drift range, > >>> too. > >>> > >>> And again, look to "PCM Rate Shift 100000" control implementation in > >>> sound/drivers/aloop.c. It would be nice to have the base offset for the > >>> shift/drift/pitch value standardized. > >> > >> Thanks. > >> > >> But the ASRC driver I implemented is different, I just register one sound > >> card, one device/subdevice. but the ASRC hardware support 4 instances > >> together, so user can open the card device 4 times to create 4 instances > >> then the controls can only bind with compress streams. > > > > It's just a reason to add the subdevice code for the compress offload > > layer like we have in other APIs for overall consistency. I'll try to > > work on this. > > I thought this was supported already? I remember there was a request to > enable more than one compressed stream for enhanced cross-fade support > with different formats? That isn't supported with the single-device + > PARTIAL_DRAIN method. > > Vinod? > > >> I think I can remove the 'SNDRV_COMPRESS_SRC_RATIO_MOD', > > > > Yes. > > > >> Only define a private type for driver, which means only the ASRC driver > >> and its user application know the type. > > > > The control API should be used for this IMHO. > > Agree, this would be a 'clean' split where the compress API is used for > the data parts and the control parts used otherwise to alter the ratio > or whatever else is needed. > > >> For the change in 'include/uapi/sound/compress_params.h", should I > >> keep them, is there any other suggestion for them? > > You can add the SRC type but if you use a control for the parameters you > don't need to add anything for the encoder options, do you? > Yes, to go further, I think we can use SND_AUDIOCODEC_PCM, then the SRC type will be dropped. But my understanding of the control means the .set_metadata() API, right? As I said, the output rate, output format, and ratio modifier are applied to the instances of ASRC, which is the snd_compr_stream in driver. so only the .set_metadata() API can be used for these purposes. Best regards Shengjiu Wang
> Yes, to go further, I think we can use SND_AUDIOCODEC_PCM, then > the SRC type will be dropped. sounds good. > But my understanding of the control means the .set_metadata() API, right? > As I said, the output rate, output format, and ratio modifier are applied to > the instances of ASRC, which is the snd_compr_stream in driver. > so only the .set_metadata() API can be used for these purposes. Humm, this is more controversial. The term 'metadata' really referred to known information present in headers or additional ID3 tags and not in the compressed file itself. The .set_metadata was assumed to be called ONCE before decoding. But here you have a need to update the ratio modifier on a regular basis to compensate for the drift. This isn't what this specific callback was designed for. We could change and allow this callback to be used multiple times, but then this could create problems for existing implementations which cannot deal with modified metadata on the fly. And then there's the problem of defining a 'key' for the metadata. the definition of the key is a u32, so there's plenty of space for different implementations, but a collision is possible. We'd need an agreement on how to allocate keys to different solutions without changing the header file for every implementation. It sounds like we'd need a 'runtime params' callback - unless there's a better trick to tie the control and compress layers?
On Wed, Aug 14, 2024 at 5:40 PM Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com> wrote: > > > > Yes, to go further, I think we can use SND_AUDIOCODEC_PCM, then > > the SRC type will be dropped. > > sounds good. > > > But my understanding of the control means the .set_metadata() API, right? > > As I said, the output rate, output format, and ratio modifier are applied to > > the instances of ASRC, which is the snd_compr_stream in driver. > > so only the .set_metadata() API can be used for these purposes. > > Humm, this is more controversial. > > The term 'metadata' really referred to known information present in > headers or additional ID3 tags and not in the compressed file itself. > The .set_metadata was assumed to be called ONCE before decoding. > > But here you have a need to update the ratio modifier on a regular basis > to compensate for the drift. This isn't what this specific callback was > designed for. We could change and allow this callback to be used > multiple times, but then this could create problems for existing > implementations which cannot deal with modified metadata on the fly. .set_metadata can be called multi times now, no need to change currently. > > And then there's the problem of defining a 'key' for the metadata. the > definition of the key is a u32, so there's plenty of space for different > implementations, but a collision is possible. We'd need an agreement on > how to allocate keys to different solutions without changing the header > file for every implementation. Can we define a private space for each case? For example the key larger than 0x80000000 is private, each driver can define it by themself? > > It sounds like we'd need a 'runtime params' callback - unless there's a > better trick to tie the control and compress layers? >
On 8/14/24 13:12, Shengjiu Wang wrote: > On Wed, Aug 14, 2024 at 5:40 PM Pierre-Louis Bossart > <pierre-louis.bossart@linux.intel.com> wrote: >> >> >>> Yes, to go further, I think we can use SND_AUDIOCODEC_PCM, then >>> the SRC type will be dropped. >> >> sounds good. >> >>> But my understanding of the control means the .set_metadata() API, right? >>> As I said, the output rate, output format, and ratio modifier are applied to >>> the instances of ASRC, which is the snd_compr_stream in driver. >>> so only the .set_metadata() API can be used for these purposes. >> >> Humm, this is more controversial. >> >> The term 'metadata' really referred to known information present in >> headers or additional ID3 tags and not in the compressed file itself. >> The .set_metadata was assumed to be called ONCE before decoding. >> >> But here you have a need to update the ratio modifier on a regular basis >> to compensate for the drift. This isn't what this specific callback was >> designed for. We could change and allow this callback to be used >> multiple times, but then this could create problems for existing >> implementations which cannot deal with modified metadata on the fly. > > .set_metadata can be called multi times now, no need to change currently. Not really, this set_metadata() callback is used only for gapless transitions between tracks, see fcplay.c in tinycompress. And now I am really confused because tinycompress uses an IOCTL directly: metadata.key = SNDRV_COMPRESS_ENCODER_PADDING; metadata.value[0] = mdata->encoder_padding; if (ioctl(compress->fd, SNDRV_COMPRESS_SET_METADATA, &metadata)) Whereas you want to use the ops callback directly from the control layer? What would present a userspace program from using the ioctl directly then? In that case, why do we need the control? I must be missing something. >> And then there's the problem of defining a 'key' for the metadata. the >> definition of the key is a u32, so there's plenty of space for different >> implementations, but a collision is possible. We'd need an agreement on >> how to allocate keys to different solutions without changing the header >> file for every implementation. > > Can we define a private space for each case? For example the key larger > than 0x80000000 is private, each driver can define it by themself? that would be a possibility indeed - provided that the opens above are straightened out. >> It sounds like we'd need a 'runtime params' callback - unless there's a >> better trick to tie the control and compress layers?
On 14. 08. 24 13:58, Pierre-Louis Bossart wrote: > > > On 8/14/24 13:12, Shengjiu Wang wrote: >> On Wed, Aug 14, 2024 at 5:40 PM Pierre-Louis Bossart >> <pierre-louis.bossart@linux.intel.com> wrote: >>> >>> >>>> Yes, to go further, I think we can use SND_AUDIOCODEC_PCM, then >>>> the SRC type will be dropped. >>> >>> sounds good. >>> >>>> But my understanding of the control means the .set_metadata() API, right? >>>> As I said, the output rate, output format, and ratio modifier are applied to >>>> the instances of ASRC, which is the snd_compr_stream in driver. >>>> so only the .set_metadata() API can be used for these purposes. >>> >>> Humm, this is more controversial. >>> >>> The term 'metadata' really referred to known information present in >>> headers or additional ID3 tags and not in the compressed file itself. >>> The .set_metadata was assumed to be called ONCE before decoding. >>> >>> But here you have a need to update the ratio modifier on a regular basis >>> to compensate for the drift. This isn't what this specific callback was >>> designed for. We could change and allow this callback to be used >>> multiple times, but then this could create problems for existing >>> implementations which cannot deal with modified metadata on the fly. >> >> .set_metadata can be called multi times now, no need to change currently. > > Not really, this set_metadata() callback is used only for gapless > transitions between tracks, see fcplay.c in tinycompress. > > And now I am really confused because tinycompress uses an IOCTL directly: > > metadata.key = SNDRV_COMPRESS_ENCODER_PADDING; > metadata.value[0] = mdata->encoder_padding; > if (ioctl(compress->fd, SNDRV_COMPRESS_SET_METADATA, &metadata)) > > Whereas you want to use the ops callback directly from the control layer? > > What would present a userspace program from using the ioctl directly > then? In that case, why do we need the control? I must be missing something. The whole discussion is which place is more appropriate for the runtime controls (like the frequency shift). My opinion is, if we have a layer for this which can be used for presence of those controls and even range / type / notifications, we should use it. The new/updated ioctls bounded only to active file descriptor does not allow to monitor those values outside. >>> And then there's the problem of defining a 'key' for the metadata. the >>> definition of the key is a u32, so there's plenty of space for different >>> implementations, but a collision is possible. We'd need an agreement on >>> how to allocate keys to different solutions without changing the header >>> file for every implementation. >> >> Can we define a private space for each case? For example the key larger >> than 0x80000000 is private, each driver can define it by themself? > > that would be a possibility indeed - provided that the opens above are > straightened out. > >>> It sounds like we'd need a 'runtime params' callback - unless there's a >>> better trick to tie the control and compress layers? I don't follow. If the compress driver code uses card/device/subdevice numbers, we can address the control properly. The problem is just that subdevice support in missing the current compress code / API. For me, the compress_params.h changes may also require to pay attention to the encoding/decoding of the current compressed streams. So something like this may be more appropriate for the first step: diff --git a/include/uapi/sound/compress_params.h b/include/uapi/sound/compress_params.h index ddc77322d571..c664d15410eb 100644 --- a/include/uapi/sound/compress_params.h +++ b/include/uapi/sound/compress_params.h @@ -347,6 +347,8 @@ union snd_codec_options { * @modes: Supported modes. See SND_AUDIOMODE defines * @formats: Supported formats. See SND_AUDIOSTREAMFORMAT defines * @min_buffer: Minimum buffer size handled by codec implementation + * @pcm_formats: Output (for decoders) or input (for encoders) + * PCM formats (required to accel mode, 0 for other modes) * @reserved: reserved for future use * * This structure provides a scalar value for profiles, modes and stream @@ -370,7 +372,8 @@ struct snd_codec_desc { __u32 modes; __u32 formats; __u32 min_buffer; - __u32 reserved[15]; + __u32 pcm_formats; + __u32 reserved[14]; } __attribute__((packed, aligned(4))); /** struct snd_codec @@ -395,6 +398,8 @@ struct snd_codec_desc { * @align: Block alignment in bytes of an audio sample. * Only required for PCM or IEC formats. * @options: encoder-specific settings + * @pcm_format: Output (for decoders) or input (for encoders) + * PCM formats (required to accel mode, 0 for other modes) * @reserved: reserved for future use */ @@ -411,7 +416,8 @@ struct snd_codec { __u32 format; __u32 align; union snd_codec_options options; - __u32 reserved[3]; + __u32 pcm_format; + __u32 reserved[2]; } __attribute__((packed, aligned(4))); #endif Then the SRC extension may be like: diff --git a/include/uapi/sound/compress_params.h b/include/uapi/sound/compress_params.h index c664d15410eb..5d51ecba6d55 100644 --- a/include/uapi/sound/compress_params.h +++ b/include/uapi/sound/compress_params.h @@ -334,6 +334,14 @@ union snd_codec_options { struct snd_dec_wma wma_d; struct snd_dec_alac alac_d; struct snd_dec_ape ape_d; + struct { + __u32 out_sample_rate; + } src_d; +} __attribute__((packed, aligned(4))); + +struct snd_codec_desc_src { + __u32 out_sample_rate_min; + __u32 out_sample_rate_max; } __attribute__((packed, aligned(4))); /** struct snd_codec_desc - description of codec capabilities @@ -349,6 +357,7 @@ union snd_codec_options { * @min_buffer: Minimum buffer size handled by codec implementation * @pcm_formats: Output (for decoders) or input (for encoders) * PCM formats (required to accel mode, 0 for other modes) + * @u_space: union space (for codec dependent date) * @reserved: reserved for future use * * This structure provides a scalar value for profiles, modes and stream @@ -373,7 +382,11 @@ struct snd_codec_desc { __u32 formats; __u32 min_buffer; __u32 pcm_formats; - __u32 reserved[14]; + union { + __u32 u_space[6]; + struct snd_codec_desc_src src; + } __attribute__((packed, aligned(4))); + __u32 reserved[8]; } __attribute__((packed, aligned(4))); /** struct snd_codec This will allow to handshake the output rate between user space and kernel driver. Eventually we can use a rate bitmap to be more precise in "struct snd_codec_desc_src" (or combination of range/bitmap). Jaroslav
diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h index 98772b0cbcb7..8b2b72f94e26 100644 --- a/include/uapi/sound/compress_offload.h +++ b/include/uapi/sound/compress_offload.h @@ -112,10 +112,12 @@ struct snd_compr_codec_caps { * end of the track * @SNDRV_COMPRESS_ENCODER_DELAY: no of samples inserted by the encoder at the * beginning of the track + * @SNDRV_COMPRESS_SRC_RATIO_MOD: Resampling Ratio Modifier for sample rate converter */ enum sndrv_compress_encoder { SNDRV_COMPRESS_ENCODER_PADDING = 1, SNDRV_COMPRESS_ENCODER_DELAY = 2, + SNDRV_COMPRESS_SRC_RATIO_MOD = 3, }; /** diff --git a/include/uapi/sound/compress_params.h b/include/uapi/sound/compress_params.h index ddc77322d571..0843773ea6b4 100644 --- a/include/uapi/sound/compress_params.h +++ b/include/uapi/sound/compress_params.h @@ -43,7 +43,8 @@ #define SND_AUDIOCODEC_BESPOKE ((__u32) 0x0000000E) #define SND_AUDIOCODEC_ALAC ((__u32) 0x0000000F) #define SND_AUDIOCODEC_APE ((__u32) 0x00000010) -#define SND_AUDIOCODEC_MAX SND_AUDIOCODEC_APE +#define SND_AUDIOCODEC_SRC ((__u32) 0x00000011) +#define SND_AUDIOCODEC_MAX SND_AUDIOCODEC_SRC /* * Profile and modes are listed with bit masks. This allows for a @@ -324,6 +325,11 @@ struct snd_dec_ape { __u32 seek_table_present; } __attribute__((packed, aligned(4))); +struct snd_dec_src { + __u32 format_out; + __u32 rate_out; +} __attribute__((packed, aligned(4))); + union snd_codec_options { struct snd_enc_wma wma; struct snd_enc_vorbis vorbis; @@ -334,6 +340,7 @@ union snd_codec_options { struct snd_dec_wma wma_d; struct snd_dec_alac alac_d; struct snd_dec_ape ape_d; + struct snd_dec_src src; } __attribute__((packed, aligned(4))); /** struct snd_codec_desc - description of codec capabilities
Add Sample Rate Converter(SRC) codec support, define the output format and rate for SRC. Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com> --- include/uapi/sound/compress_offload.h | 2 ++ include/uapi/sound/compress_params.h | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-)