diff mbox series

[v4,02/13] ASoC: Intel: catpt: Define DSP operations

Message ID 20200812205753.29115-3-cezary.rojewski@intel.com (mailing list archive)
State New, archived
Headers show
Series ASoC: Intel: Catpt - Lynx and Wildcat point | expand

Commit Message

Cezary Rojewski Aug. 12, 2020, 8:57 p.m. UTC
Implement dsp lifecycle functions such as core RESET and STALL,
SRAM power control and LP clock selection. This also adds functions for
handling transport over DW DMA controller.

Signed-off-by: Cezary Rojewski <cezary.rojewski@intel.com>
---
 sound/soc/intel/catpt/dsp.c | 497 ++++++++++++++++++++++++++++++++++++
 1 file changed, 497 insertions(+)
 create mode 100644 sound/soc/intel/catpt/dsp.c

Comments

Andy Shevchenko Aug. 13, 2020, 6:51 p.m. UTC | #1
On Wed, Aug 12, 2020 at 10:57:42PM +0200, Cezary Rojewski wrote:
> Implement dsp lifecycle functions such as core RESET and STALL,
> SRAM power control and LP clock selection. This also adds functions for
> handling transport over DW DMA controller.

...

> +static void catpt_dma_transfer_complete(void *arg)
> +{
> +	struct catpt_dev *cdev = arg;
> +

> +	dev_dbg(cdev->dev, "%s\n", __func__);

Noise. Somebody should provide either trace events, of get used to function tracer / perf.

> +}

...

> +static bool catpt_dma_filter(struct dma_chan *chan, void *param)
> +{

> +	return chan->device->dev == (struct device *)param;

Redundant casting, and please, consider to swap left and right side (in this
case easily to find these and perhaps provide a generic helper function).

> +}

...

> +#define CATPT_DMA_DEVID		1 /* dma engine used */

Not sure I understand what exactly this means.

...

> +#define CATPT_DMA_MAXBURST	0x3

We have DMA engine definitions for that, please avoid magic numbers.

...

> +#define CATPT_DMA_DSP_ADDR_MASK	0xFFF00000

GENMASK()

...

> +	dma_cap_set(DMA_SLAVE, mask);

How this helps with mem2mem channel?

...

> +	chan = dma_request_channel(mask, catpt_dma_filter, cdev->dev);
> +	if (!chan) {
> +		dev_err(cdev->dev, "request channel failed\n");

> +		dump_stack();

Huh?!

> +		return ERR_PTR(-EPROBE_DEFER);
> +	}

...

> +	status = dma_wait_for_async_tx(desc);

> +	catpt_updatel_shim(cdev, HMDC,
> +			   CATPT_HMDC_HDDA(CATPT_DMA_DEVID, chan->chan_id), 0);

Update even in erroneous case?

> +	return (status == DMA_COMPLETE) ? 0 : -EPROTO;

...

> +static void catpt_dsp_set_srampge(struct catpt_dev *cdev, struct resource *sram,
> +				  unsigned long mask, unsigned long new)
> +{
> +	unsigned long old;
> +	u32 off = sram->start;
> +	u32 b = __ffs(mask);
> +
> +	old = catpt_readl_pci(cdev, VDRTCTL0) & mask;

> +	dev_dbg(cdev->dev, "SRAMPGE [0x%08lx] 0x%08lx -> 0x%08lx",
> +		mask, old, new);

trace event / point?

> +	if (old == new)
> +		return;
> +
> +	catpt_updatel_pci(cdev, VDRTCTL0, mask, new);

> +	udelay(60);

Delays should be commented.

> +
> +	/*
> +	 * dummy read as the very first access after block enable
> +	 * to prevent byte loss in future operations
> +	 */
> +	for_each_clear_bit_from(b, &new, fls(mask)) {

fls_long()

> +		u8 buf[4];
> +
> +		/* newly enabled: new bit=0 while old bit=1 */
> +		if (test_bit(b, &old)) {
> +			dev_dbg(cdev->dev, "sanitize block %ld: off 0x%08x\n",

> +				(b - __ffs(mask)), off);

Unneeded parentheses.

> +			memcpy_fromio(buf, cdev->lpe_ba + off, sizeof(buf));
> +		}
> +		off += CATPT_MEMBLOCK_SIZE;
> +	}
> +}

...

> +	/* offset value given mask's start and invert it as ON=b0 */

> +	new <<= __ffs(mask);
> +	new = ~(new) & mask;

Unneeded parentheses.
And perhaps in one line it will be better to understand:

	new = ~(new << __ffs(mask)) & mask;

...

> +	if (waiti) {
> +		/* wait for DSP to signal WAIT state */
> +		ret = catpt_readl_poll_shim(cdev, ISD,
> +					    reg, (reg & CATPT_ISD_DCPWM),
> +					    500, 10000);
> +		if (ret < 0) {

> +			dev_warn(cdev->dev, "await WAITI timeout\n");

Shouldn't be an error level?

> +			mutex_unlock(&cdev->clk_mutex);
> +			return ret;
> +		}
> +	}

...

> +int catpt_dsp_update_lpclock(struct catpt_dev *cdev)
> +{
> +	struct catpt_stream_runtime *stream;

> +	bool lp;
> +
> +	if (list_empty(&cdev->stream_list))
> +		return catpt_dsp_select_lpclock(cdev, true, true);
> +
> +	lp = true;
> +	list_for_each_entry(stream, &cdev->stream_list, node) {
> +		if (stream->prepared) {
> +			lp = false;
> +			break;
> +		}
> +	}
> +
> +	return catpt_dsp_select_lpclock(cdev, lp, true);

Seems too much duplication.

	struct catpt_stream_runtime *stream;

	list_for_each_entry(stream, &cdev->stream_list, node) {
		if (stream->prepared)
			return catpt_dsp_select_lpclock(cdev, false, true);
	}

	return catpt_dsp_select_lpclock(cdev, true, true);


Better?

> +}

...

> +	/* set D3 */
> +	catpt_updatel_pci(cdev, PMCS, CATPT_PMCS_PS, CATPT_PMCS_PS_D3HOT);
> +	udelay(50);

Don't we have PCI core function for this?

...

> +	/* set D0 */
> +	catpt_updatel_pci(cdev, PMCS, CATPT_PMCS_PS, 0);
> +	udelay(100);

Ditto.

...

> +	/* set D3 */
> +	catpt_updatel_pci(cdev, PMCS, CATPT_PMCS_PS, CATPT_PMCS_PS_D3HOT);
> +	udelay(50);

Ditto.

...

> +	/* set D0 */
> +	catpt_updatel_pci(cdev, PMCS, CATPT_PMCS_PS, 0);

Ditto.
Cezary Rojewski Aug. 17, 2020, 11:12 a.m. UTC | #2
On 2020-08-13 8:51 PM, Andy Shevchenko wrote:
> On Wed, Aug 12, 2020 at 10:57:42PM +0200, Cezary Rojewski wrote:
>> Implement dsp lifecycle functions such as core RESET and STALL,
>> SRAM power control and LP clock selection. This also adds functions for
>> handling transport over DW DMA controller.

Thanks for your input Andy!

>> +static void catpt_dma_transfer_complete(void *arg)
>> +{
>> +	struct catpt_dev *cdev = arg;
>> +
> 
>> +	dev_dbg(cdev->dev, "%s\n", __func__);
> 
> Noise. Somebody should provide either trace events, of get used to function tracer / perf.
> 

Agree. Don't have much expertise with DMA-engine and some stuff is based 
on existing stuff:
/sound/soc/intel/common/sst-firmware.c ::sst_dma_transfer_complete()

That's not somebody - that's CI. I don't mind ftracer, but our's CI 
logging is currently dmesg/ event traces -based.

Anyway, with log removed, catpt_dma_transfer_complete() ceases to exist too.

> 
>> +static bool catpt_dma_filter(struct dma_chan *chan, void *param)
>> +{
> 
>> +	return chan->device->dev == (struct device *)param;
> 
> Redundant casting, and please, consider to swap left and right side (in this
> case easily to find these and perhaps provide a generic helper function).
> 

Good point. Yeah, I've tried to find generic-helper before even adding 
this, but with no result.

> 
>> +#define CATPT_DMA_DEVID		1 /* dma engine used */
> 
> Not sure I understand what exactly this means.
> 

Well, you may choose either engine 0 or 1 for loading images. Reference 
solution which I'm basing catpt on - Windows driver equivalent - makes 
use of engine 1. Goal of this implementation is to align closely to 
stable Windows solution wherever possible to reduce maintainance cost.

> 
>> +#define CATPT_DMA_MAXBURST	0x3
> 
> We have DMA engine definitions for that, please avoid magic numbers.
> 

As with most of the dma stuff, based on existing:
/sound/soc/intel/common/sst-firmware.c SST_DSP_DMA_MAX_BURST

Ack.

>> +#define CATPT_DMA_DSP_ADDR_MASK	0xFFF00000
> 
> GENMASK()
> 

Ditto as above. Ack.

>> +	dma_cap_set(DMA_SLAVE, mask);
> 
> How this helps with mem2mem channel?
> 
> ...
> 
>> +	chan = dma_request_channel(mask, catpt_dma_filter, cdev->dev);
>> +	if (!chan) {
>> +		dev_err(cdev->dev, "request channel failed\n");
> 
>> +		dump_stack();
> 
> Huh?!
> 

I'm as surpriced as you : )
Remnant of pm_runtime + single platform_device (for adsp and dw both) 
debug. Removed in v5.

>> +		return ERR_PTR(-EPROBE_DEFER);
>> +	}
> 
> ...
> 
>> +	status = dma_wait_for_async_tx(desc);
> 
>> +	catpt_updatel_shim(cdev, HMDC,
>> +			   CATPT_HMDC_HDDA(CATPT_DMA_DEVID, chan->chan_id), 0);
> 
> Update even in erroneous case?
> 

Yes. This is based on stable Windows solution equivalent and get's 
updated even in failure case to disable access to HOST memory in demand 
more.

>> +	return (status == DMA_COMPLETE) ? 0 : -EPROTO;
> 
> ...
> 
>> +static void catpt_dsp_set_srampge(struct catpt_dev *cdev, struct resource *sram,
>> +				  unsigned long mask, unsigned long new)
>> +{
>> +	unsigned long old;
>> +	u32 off = sram->start;
>> +	u32 b = __ffs(mask);
>> +
>> +	old = catpt_readl_pci(cdev, VDRTCTL0) & mask;
> 
>> +	dev_dbg(cdev->dev, "SRAMPGE [0x%08lx] 0x%08lx -> 0x%08lx",
>> +		mask, old, new);
> 
> trace event / point?
> 

Well, I've just replaced these with dev_dbg (from trace events). Not 
many usages and caused all reg-related prints to be available in dmesg.

>> +	if (old == new)
>> +		return;
>> +
>> +	catpt_updatel_pci(cdev, VDRTCTL0, mask, new);
> 
>> +	udelay(60);
> 
> Delays should be commented.
> 

Similarly to previous review (comment regarding hw support), I'm afraid 
I don't have good comments for most occurences available.

I'll try to add something meaningful for at least some of these.

>> +
>> +	/*
>> +	 * dummy read as the very first access after block enable
>> +	 * to prevent byte loss in future operations
>> +	 */
>> +	for_each_clear_bit_from(b, &new, fls(mask)) {
> 
> fls_long()
> 

Ack.

>> +		u8 buf[4];
>> +
>> +		/* newly enabled: new bit=0 while old bit=1 */
>> +		if (test_bit(b, &old)) {
>> +			dev_dbg(cdev->dev, "sanitize block %ld: off 0x%08x\n",
> 
>> +				(b - __ffs(mask)), off);
> 
> Unneeded parentheses.
> 

Ack.

>> +			memcpy_fromio(buf, cdev->lpe_ba + off, sizeof(buf));
>> +		}
>> +		off += CATPT_MEMBLOCK_SIZE;
>> +	}
>> +}
> 
> ...
> 
>> +	/* offset value given mask's start and invert it as ON=b0 */
> 
>> +	new <<= __ffs(mask);
>> +	new = ~(new) & mask;
> 
> Unneeded parentheses.
> And perhaps in one line it will be better to understand:
> 
> 	new = ~(new << __ffs(mask)) & mask;
> 

Was called out in the past not to combine everything in one-line like if 
I'm to hide something from reviewer.

No problem with combining these together in v5.

>> +	if (waiti) {
>> +		/* wait for DSP to signal WAIT state */
>> +		ret = catpt_readl_poll_shim(cdev, ISD,
>> +					    reg, (reg & CATPT_ISD_DCPWM),
>> +					    500, 10000);
>> +		if (ret < 0) {
> 
>> +			dev_warn(cdev->dev, "await WAITI timeout\n");
> 
> Shouldn't be an error level?
> 

As this causes early return, I agree.

>> +			mutex_unlock(&cdev->clk_mutex);
>> +			return ret;
>> +		}
>> +	}
> 
> ...
> 
>> +int catpt_dsp_update_lpclock(struct catpt_dev *cdev)
>> +{
>> +	struct catpt_stream_runtime *stream;
> 
>> +	bool lp;
>> +
>> +	if (list_empty(&cdev->stream_list))
>> +		return catpt_dsp_select_lpclock(cdev, true, true);
>> +
>> +	lp = true;
>> +	list_for_each_entry(stream, &cdev->stream_list, node) {
>> +		if (stream->prepared) {
>> +			lp = false;
>> +			break;
>> +		}
>> +	}
>> +
>> +	return catpt_dsp_select_lpclock(cdev, lp, true);
> 
> Seems too much duplication.
> 
> 	struct catpt_stream_runtime *stream;
> 
> 	list_for_each_entry(stream, &cdev->stream_list, node) {
> 		if (stream->prepared)
> 			return catpt_dsp_select_lpclock(cdev, false, true);
> 	}
> 
> 	return catpt_dsp_select_lpclock(cdev, true, true);
> 
> 
> Better?
> 

list_first_entry (part of list_for_each_entry) expects list to be 
non-empty. ->streal_list may be empty when invoking 
catpt_dsp_update_lpclock().

>> +	/* set D3 */
>> +	catpt_updatel_pci(cdev, PMCS, CATPT_PMCS_PS, CATPT_PMCS_PS_D3HOT);
>> +	udelay(50);
> 
> Don't we have PCI core function for this?
> 
> ...
> 
>> +	/* set D0 */
>> +	catpt_updatel_pci(cdev, PMCS, CATPT_PMCS_PS, 0);
>> +	udelay(100);
> 
> Ditto.
> 
> ...
> 
>> +	/* set D3 */
>> +	catpt_updatel_pci(cdev, PMCS, CATPT_PMCS_PS, CATPT_PMCS_PS_D3HOT);
>> +	udelay(50);
> 
> Ditto.
> 
> ...
> 
>> +	/* set D0 */
>> +	catpt_updatel_pci(cdev, PMCS, CATPT_PMCS_PS, 0);
> 
> Ditto.
> 

Thanks to you now I know the correct answer: yes.
Ack for all of these. Good advice Andy, again!
Andy Shevchenko Aug. 18, 2020, 11:50 a.m. UTC | #3
On Mon, Aug 17, 2020 at 01:12:01PM +0200, Cezary Rojewski wrote:
> On 2020-08-13 8:51 PM, Andy Shevchenko wrote:
> > On Wed, Aug 12, 2020 at 10:57:42PM +0200, Cezary Rojewski wrote:
> > > Implement dsp lifecycle functions such as core RESET and STALL,
> > > SRAM power control and LP clock selection. This also adds functions for
> > > handling transport over DW DMA controller.
> 
> Thanks for your input Andy!

You're welcome!

...

> > > +#define CATPT_DMA_DEVID		1 /* dma engine used */
> > 
> > Not sure I understand what exactly this means.
> > 
> 
> Well, you may choose either engine 0 or 1 for loading images. Reference
> solution which I'm basing catpt on - Windows driver equivalent - makes use
> of engine 1. Goal of this implementation is to align closely to stable
> Windows solution wherever possible to reduce maintainance cost.

Please, give extended comment here.

...

> > > +	status = dma_wait_for_async_tx(desc);
> > 
> > > +	catpt_updatel_shim(cdev, HMDC,
> > > +			   CATPT_HMDC_HDDA(CATPT_DMA_DEVID, chan->chan_id), 0);
> > 
> > Update even in erroneous case?
> > 
> 
> Yes. This is based on stable Windows solution equivalent and get's updated
> even in failure case to disable access to HOST memory in demand more.

I guess this deserves a comment.

> > > +	return (status == DMA_COMPLETE) ? 0 : -EPROTO;

...

> > > +	new <<= __ffs(mask);
> > > +	new = ~(new) & mask;
> > 
> > Unneeded parentheses.
> > And perhaps in one line it will be better to understand:
> > 
> > 	new = ~(new << __ffs(mask)) & mask;
> > 
> 
> Was called out in the past not to combine everything in one-line like if I'm
> to hide something from reviewer.
> 
> No problem with combining these together in v5.

you also may consider to use u32_replace_bits() or so.

...

> > > +	bool lp;
> > > +
> > > +	if (list_empty(&cdev->stream_list))
> > > +		return catpt_dsp_select_lpclock(cdev, true, true);
> > > +
> > > +	lp = true;
> > > +	list_for_each_entry(stream, &cdev->stream_list, node) {
> > > +		if (stream->prepared) {
> > > +			lp = false;
> > > +			break;
> > > +		}
> > > +	}
> > > +
> > > +	return catpt_dsp_select_lpclock(cdev, lp, true);
> > 
> > Seems too much duplication.
> > 
> > 	struct catpt_stream_runtime *stream;
> > 
> > 	list_for_each_entry(stream, &cdev->stream_list, node) {
> > 		if (stream->prepared)
> > 			return catpt_dsp_select_lpclock(cdev, false, true);
> > 	}
> > 
> > 	return catpt_dsp_select_lpclock(cdev, true, true);
> > 
> > 
> > Better?
> 
> list_first_entry (part of list_for_each_entry) expects list to be non-empty.
> ->streal_list may be empty when invoking catpt_dsp_update_lpclock().

I didn't get this. Can you point out where is exactly problematic place?
Cezary Rojewski Aug. 19, 2020, 1:46 p.m. UTC | #4
On 2020-08-18 1:50 PM, Andy Shevchenko wrote:
> On Mon, Aug 17, 2020 at 01:12:01PM +0200, Cezary Rojewski wrote:
>> On 2020-08-13 8:51 PM, Andy Shevchenko wrote:
>>> On Wed, Aug 12, 2020 at 10:57:42PM +0200, Cezary Rojewski wrote:
>>>> Implement dsp lifecycle functions such as core RESET and STALL,
>>>> SRAM power control and LP clock selection. This also adds functions for
>>>> handling transport over DW DMA controller.
>>
>> Thanks for your input Andy!
> 
> You're welcome!
> 

>>>> +#define CATPT_DMA_DEVID		1 /* dma engine used */
>>>
>>> Not sure I understand what exactly this means.
>>>
>>
>> Well, you may choose either engine 0 or 1 for loading images. Reference
>> solution which I'm basing catpt on - Windows driver equivalent - makes use
>> of engine 1. Goal of this implementation is to align closely to stable
>> Windows solution wherever possible to reduce maintainance cost.
> 
> Please, give extended comment here.
> 

Sure, ack.

>>>> +	status = dma_wait_for_async_tx(desc);
>>>
>>>> +	catpt_updatel_shim(cdev, HMDC,
>>>> +			   CATPT_HMDC_HDDA(CATPT_DMA_DEVID, chan->chan_id), 0);
>>>
>>> Update even in erroneous case?
>>>
>>
>> Yes. This is based on stable Windows solution equivalent and get's updated
>> even in failure case to disable access to HOST memory in demand more.
> 
> I guess this deserves a comment.
> 

Ditto.

>>>> +	return (status == DMA_COMPLETE) ? 0 : -EPROTO;
> 
> ...
> 
>>>> +	new <<= __ffs(mask);
>>>> +	new = ~(new) & mask;
>>>
>>> Unneeded parentheses.
>>> And perhaps in one line it will be better to understand:
>>>
>>> 	new = ~(new << __ffs(mask)) & mask;
>>>
>>
>> Was called out in the past not to combine everything in one-line like if I'm
>> to hide something from reviewer.
>>
>> No problem with combining these together in v5.
> 
> you also may consider to use u32_replace_bits() or so.
> 

I'll check bitfields.h too, sure.

>>>> +	bool lp;
>>>> +
>>>> +	if (list_empty(&cdev->stream_list))
>>>> +		return catpt_dsp_select_lpclock(cdev, true, true);
>>>> +
>>>> +	lp = true;
>>>> +	list_for_each_entry(stream, &cdev->stream_list, node) {
>>>> +		if (stream->prepared) {
>>>> +			lp = false;
>>>> +			break;
>>>> +		}
>>>> +	}
>>>> +
>>>> +	return catpt_dsp_select_lpclock(cdev, lp, true);
>>>
>>> Seems too much duplication.
>>>
>>> 	struct catpt_stream_runtime *stream;
>>>
>>> 	list_for_each_entry(stream, &cdev->stream_list, node) {
>>> 		if (stream->prepared)
>>> 			return catpt_dsp_select_lpclock(cdev, false, true);
>>> 	}
>>>
>>> 	return catpt_dsp_select_lpclock(cdev, true, true);
>>>
>>>
>>> Better?
>>
>> list_first_entry (part of list_for_each_entry) expects list to be non-empty.
>> ->streal_list may be empty when invoking catpt_dsp_update_lpclock().
> 
> I didn't get this. Can you point out where is exactly problematic place?
> 

list_for_each_entry makes use of list_first_entry when initializing 
'pos' index variable. Documentation for list_first_entry reads: "Note, 
that list is expected to be not empty" so I'm validating list's status 
before moving on to the loop as stream_list may be empty when 
catpt_dsp_update_lpclock() gets called.
Andy Shevchenko Aug. 19, 2020, 2:21 p.m. UTC | #5
On Wed, Aug 19, 2020 at 03:46:30PM +0200, Cezary Rojewski wrote:
> On 2020-08-18 1:50 PM, Andy Shevchenko wrote:
> > On Mon, Aug 17, 2020 at 01:12:01PM +0200, Cezary Rojewski wrote:
> > > On 2020-08-13 8:51 PM, Andy Shevchenko wrote:
> > > > On Wed, Aug 12, 2020 at 10:57:42PM +0200, Cezary Rojewski wrote:

...

> > > > > +	bool lp;
> > > > > +
> > > > > +	if (list_empty(&cdev->stream_list))
> > > > > +		return catpt_dsp_select_lpclock(cdev, true, true);
> > > > > +
> > > > > +	lp = true;
> > > > > +	list_for_each_entry(stream, &cdev->stream_list, node) {
> > > > > +		if (stream->prepared) {
> > > > > +			lp = false;
> > > > > +			break;
> > > > > +		}
> > > > > +	}
> > > > > +
> > > > > +	return catpt_dsp_select_lpclock(cdev, lp, true);
> > > > 
> > > > Seems too much duplication.
> > > > 
> > > > 	struct catpt_stream_runtime *stream;
> > > > 
> > > > 	list_for_each_entry(stream, &cdev->stream_list, node) {
> > > > 		if (stream->prepared)
> > > > 			return catpt_dsp_select_lpclock(cdev, false, true);
> > > > 	}
> > > > 
> > > > 	return catpt_dsp_select_lpclock(cdev, true, true);
> > > > 
> > > > 
> > > > Better?
> > > 
> > > list_first_entry (part of list_for_each_entry) expects list to be non-empty.
> > > ->streal_list may be empty when invoking catpt_dsp_update_lpclock().
> > 
> > I didn't get this. Can you point out where is exactly problematic place?
> > 
> 
> list_for_each_entry makes use of list_first_entry when initializing 'pos'
> index variable.

Correct.

> Documentation for list_first_entry reads: "Note, that list
> is expected to be not empty"

Correct.

> so I'm validating list's status before moving
> on to the loop as stream_list may be empty when catpt_dsp_update_lpclock()
> gets called.

But here you missed the second part of the for-loop, i.e. exit conditional.

If your assumption (that list_for_each_*() is not empty-safe) is correct,
it would be disaster in global kernel source level.
Cezary Rojewski Aug. 19, 2020, 2:54 p.m. UTC | #6
On 2020-08-19 4:21 PM, Andy Shevchenko wrote:
> On Wed, Aug 19, 2020 at 03:46:30PM +0200, Cezary Rojewski wrote:
>> On 2020-08-18 1:50 PM, Andy Shevchenko wrote:
>>> On Mon, Aug 17, 2020 at 01:12:01PM +0200, Cezary Rojewski wrote:
>>>> On 2020-08-13 8:51 PM, Andy Shevchenko wrote:
>>>>> On Wed, Aug 12, 2020 at 10:57:42PM +0200, Cezary Rojewski wrote:

>>>>>> +	bool lp;
>>>>>> +
>>>>>> +	if (list_empty(&cdev->stream_list))
>>>>>> +		return catpt_dsp_select_lpclock(cdev, true, true);
>>>>>> +
>>>>>> +	lp = true;
>>>>>> +	list_for_each_entry(stream, &cdev->stream_list, node) {
>>>>>> +		if (stream->prepared) {
>>>>>> +			lp = false;
>>>>>> +			break;
>>>>>> +		}
>>>>>> +	}
>>>>>> +
>>>>>> +	return catpt_dsp_select_lpclock(cdev, lp, true);
>>>>>
>>>>> Seems too much duplication.
>>>>>
>>>>> 	struct catpt_stream_runtime *stream;
>>>>>
>>>>> 	list_for_each_entry(stream, &cdev->stream_list, node) {
>>>>> 		if (stream->prepared)
>>>>> 			return catpt_dsp_select_lpclock(cdev, false, true);
>>>>> 	}
>>>>>
>>>>> 	return catpt_dsp_select_lpclock(cdev, true, true);
>>>>>
>>>>>
>>>>> Better?
>>>>
>>>> list_first_entry (part of list_for_each_entry) expects list to be non-empty.
>>>> ->streal_list may be empty when invoking catpt_dsp_update_lpclock().
>>>
>>> I didn't get this. Can you point out where is exactly problematic place?
>>>
>>
>> list_for_each_entry makes use of list_first_entry when initializing 'pos'
>> index variable.
> 
> Correct.
> 
>> Documentation for list_first_entry reads: "Note, that list
>> is expected to be not empty"
> 
> Correct.
> 
>> so I'm validating list's status before moving
>> on to the loop as stream_list may be empty when catpt_dsp_update_lpclock()
>> gets called.
> 
> But here you missed the second part of the for-loop, i.e. exit conditional.
> 
> If your assumption (that list_for_each_*() is not empty-safe) is correct,
> it would be disaster in global kernel source level.
> 

We want no disasters here : )
safety-out. Ack.
Cezary Rojewski Aug. 20, 2020, 7:30 a.m. UTC | #7
On 2020-08-17 1:12 PM, Cezary Rojewski wrote:
> On 2020-08-13 8:51 PM, Andy Shevchenko wrote:
>> On Wed, Aug 12, 2020 at 10:57:42PM +0200, Cezary Rojewski wrote:

>>
>>> +#define CATPT_DMA_MAXBURST    0x3
>>
>> We have DMA engine definitions for that, please avoid magic numbers.
>>
> 
> As with most of the dma stuff, based on existing:
> /sound/soc/intel/common/sst-firmware.c SST_DSP_DMA_MAX_BURST
> 
> Ack.
> 

Actually, wasn't able to find anything _MAXBURST related in dmaengine.h. 
_BUSWIDTH_ have their constants defined there, true, but I'm already 
making use of these and this is dst/src_maxburst we're talking about. 
 From what I've seen in kernel sources, most usages are direct 
assignments: xxx_maxburst = Y;

>>> +    /* set D3 */
>>> +    catpt_updatel_pci(cdev, PMCS, CATPT_PMCS_PS, CATPT_PMCS_PS_D3HOT);
>>> +    udelay(50);
>>
>> Don't we have PCI core function for this?
>>
>> ...
>>
>>> +    /* set D0 */
>>> +    catpt_updatel_pci(cdev, PMCS, CATPT_PMCS_PS, 0);
>>> +    udelay(100);
>>
>> Ditto.
>>
>> ...
>>
>>> +    /* set D3 */
>>> +    catpt_updatel_pci(cdev, PMCS, CATPT_PMCS_PS, CATPT_PMCS_PS_D3HOT);
>>> +    udelay(50);
>>
>> Ditto.
>>
>> ...
>>
>>> +    /* set D0 */
>>> +    catpt_updatel_pci(cdev, PMCS, CATPT_PMCS_PS, 0);
>>
>> Ditto.
>>
> 
> Thanks to you now I know the correct answer: yes.
> Ack for all of these. Good advice Andy, again!

Similar situation occurred here. What we're dealing with is: instance of 
'struct platform_device' type, found on bus: acpi with PCI set as a 
parent device.

Scope found in DSDT:
	\_SB_.PCI0.ADSP
sysfs device path:
	/sys/devices/pci0000:00/INT3438:00
Within the latter _no_ standard utility files will be available e.g.: 
ability to dump PCI config space, bars and such.

I haven't found any functionality to extract "pci_companion" from a 
platform_device. What can be made use of is: PCI_D3hot and PCI_D0 enum 
constants, as pci_set_power_state() does not apply - expects struct 
pci_dev *.

Perhaps got misled by the function naming? catpt_updatel_xxx helpers: 
_xxx denotes specific ADSP device's mmio space. Almost all cases are 
covered by _pci and _shim.
Andy Shevchenko Aug. 20, 2020, 9 a.m. UTC | #8
On Thu, Aug 20, 2020 at 09:30:13AM +0200, Cezary Rojewski wrote:
> On 2020-08-17 1:12 PM, Cezary Rojewski wrote:
> > On 2020-08-13 8:51 PM, Andy Shevchenko wrote:
> > > On Wed, Aug 12, 2020 at 10:57:42PM +0200, Cezary Rojewski wrote:

...

> > > > +#define CATPT_DMA_MAXBURST    0x3
> > > 
> > > We have DMA engine definitions for that, please avoid magic numbers.
> > 
> > As with most of the dma stuff, based on existing:
> > /sound/soc/intel/common/sst-firmware.c SST_DSP_DMA_MAX_BURST
> > 
> > Ack.
> 
> Actually, wasn't able to find anything _MAXBURST related in dmaengine.h.
> _BUSWIDTH_ have their constants defined there, true, but I'm already making
> use of these and this is dst/src_maxburst we're talking about. From what
> I've seen in kernel sources, most usages are direct assignments:
> xxx_maxburst = Y;

Okay, and how 0x3 bytes can be a burst? Does DMA engine support this?

...

> > > > +    /* set D3 */
> > > > +    catpt_updatel_pci(cdev, PMCS, CATPT_PMCS_PS, CATPT_PMCS_PS_D3HOT);
> > > > +    udelay(50);
> > > 
> > > Don't we have PCI core function for this?
> > > 
> > > > +    /* set D0 */
> > > > +    catpt_updatel_pci(cdev, PMCS, CATPT_PMCS_PS, 0);
> > > > +    udelay(100);
> > > 
> > > Ditto.
> > > 
> > > > +    /* set D3 */
> > > > +    catpt_updatel_pci(cdev, PMCS, CATPT_PMCS_PS, CATPT_PMCS_PS_D3HOT);
> > > > +    udelay(50);
> > > 
> > > Ditto.
> > > 
> > > > +    /* set D0 */
> > > > +    catpt_updatel_pci(cdev, PMCS, CATPT_PMCS_PS, 0);
> > > 
> > > Ditto.
> > 
> > Thanks to you now I know the correct answer: yes.
> > Ack for all of these. Good advice Andy, again!
> 
> Similar situation occurred here. What we're dealing with is: instance of
> 'struct platform_device' type, found on bus: acpi with PCI set as a parent
> device.
> 
> Scope found in DSDT:
> 	\_SB_.PCI0.ADSP
> sysfs device path:
> 	/sys/devices/pci0000:00/INT3438:00
> Within the latter _no_ standard utility files will be available e.g.:
> ability to dump PCI config space, bars and such.

I see. Can you dump DSDT somewhere? We are interested in
PSx/PRx/PSE/PSW/PSC/PRE/PRW/ON/OFF (x=0..3) methods.

> I haven't found any functionality to extract "pci_companion" from a
> platform_device. What can be made use of is: PCI_D3hot and PCI_D0 enum
> constants, as pci_set_power_state() does not apply - expects struct pci_dev
> *.
> 
> Perhaps got misled by the function naming? catpt_updatel_xxx helpers: _xxx
> denotes specific ADSP device's mmio space. Almost all cases are covered by
> _pci and _shim.

If we really need to use these commands directly, utilize at least definitions
from PCI core, e.g. PCI_D0, PCI_D3hot, PCI_PM_CTRL.
Cezary Rojewski Aug. 24, 2020, 4:33 p.m. UTC | #9
On 2020-08-20 11:00 AM, Andy Shevchenko wrote:
> On Thu, Aug 20, 2020 at 09:30:13AM +0200, Cezary Rojewski wrote:
>> On 2020-08-17 1:12 PM, Cezary Rojewski wrote:
>>> On 2020-08-13 8:51 PM, Andy Shevchenko wrote:
>>>> On Wed, Aug 12, 2020 at 10:57:42PM +0200, Cezary Rojewski wrote:

>>>>> +#define CATPT_DMA_MAXBURST    0x3
>>>>
>>>> We have DMA engine definitions for that, please avoid magic numbers.
>>>
>>> As with most of the dma stuff, based on existing:
>>> /sound/soc/intel/common/sst-firmware.c SST_DSP_DMA_MAX_BURST
>>>
>>> Ack.
>>
>> Actually, wasn't able to find anything _MAXBURST related in dmaengine.h.
>> _BUSWIDTH_ have their constants defined there, true, but I'm already making
>> use of these and this is dst/src_maxburst we're talking about. From what
>> I've seen in kernel sources, most usages are direct assignments:
>> xxx_maxburst = Y;
> 
> Okay, and how 0x3 bytes can be a burst? Does DMA engine support this?
> 

That's a very good question. Early this morning starting digging.

I believe this stems from attempt of sst_dsp_dma_get_channel() to serve 
two masters - MID DMAC and DW DMAC - at the same time.

Intel MID DMAC has been first introduced in version v2.6.36-rc1 and 
subsequently removed in v4.1-rc1 due to lack of any usage throughout its 
whole life. Reference:
	https://cateee.net/lkddb/web-lkddb/INTEL_MID_DMAC.html

files to look for in said kernel versions:
	drivers/dma/intel_mid_dma.c
	drivers/dma/intel_mid_regs.h

As MID DMAC is supposed to handle Langwell PCH which is coupled with 
Atom CPUs my guess is that /atom/ was supposed to make use of said DMAC 
at certain point in time which actually never happened. This is backed 
up by the following:

/haswell/ makes use of DW DMAC but that wasn't always the case - 
solution, which was first introduced in v3.15-rc1 have had the 
sst_dma_new() and friends (wrappers calling dw_dma_probe()) implemented 
only since v3.19-rc1. So, there was a period in time where both 
solutions were not using any DMAC whatsoever. As stated, in v3.19-rc1 
/haswell/ moved to recommended flow where DW DMAC gets involved in image 
loading. The same cannot be said about /atom/ -or- /baytrail/ for that 
matter and that is why I believe MID DMAC got removed, eventually.

Now, one can notice the following when viewing older versions of 
/haswell/ e.g.:
	https://elixir.bootlin.com/linux/v4.0.9/source/sound/soc/intel/sst-firmware.c#L224

notice the comment mentioning why sst_dsp_dma_get_channel() is somewhat 
ambiguous:
	The Intel MID DMA engine driver needs the slave config set but
	Synopsis DMA engine driver safely ignores the slave config

And thus:
	dma_cap_set(DMA_SLAVE, mask);
is flagged only because of MID DMAC while causing no harm to DW. As MID 
no longer exists, so should DMA_SLAVE flagging. This should answer your 
question from:

On 2020-08-13 8:51 PM, Andy Shevchenko wrote:
 > On Wed, Aug 12, 2020 at 10:57:42PM +0200, Cezary Rojewski wrote:
 >> Implement dsp lifecycle functions such as core RESET and STALL,
 >> SRAM power control and LP clock selection. This also adds functions for
 >> handling transport over DW DMA controller.
 >
 >> +	dma_cap_set(DMA_SLAVE, mask);
 >
 > How this helps with mem2mem channel?
 >

In regard to maxburst, (SST_DSP_DMA_MAX_BURST 0x3) this is again mixture 
of MID & DW expectations - MID owns no conversion code, value assigned 
to src_/dst_maxburst is taken directly:

	https://elixir.bootlin.com/linux/v4.0.9/source/drivers/dma/intel_mid_dma.c#L495
vs
	https://elixir.bootlin.com/linux/v4.0.9/source/drivers/dma/dw/core.c#L965
notice convert_burst() (/drivers/dma/dw has seen several improvements 
and code isn't identical in newer kernels but functionality remains the 
same)

combine that knowledge with:
	enum dw_dma_msize {
		DW_DMA_MSIZE_1,
		DW_DMA_MSIZE_4,
		DW_DMA_MSIZE_8,
		DW_DMA_MSIZE_16,

from:
	https://elixir.bootlin.com/linux/v4.0.9/source/drivers/dma/dw/regs.h#L135

and one can safely assume maxburst is mistakenly set for DW - direct 
enum constant is used, ignoring the fact that is it DW code which 
converts provided value behind the scenes to appropriate one. Currently 
we end up with:
fls(0x3) -> 2;
2 - 2 = 0 (conversion method) -> DW_DMA_MSIZE_1

Conclusion: I believe DW_DMA_MSIZE_16 (0x3) is the correct one i.e.:
	src_maxburst = 16;
	dst_maxburst = 16;
should be present within catpt_dma_request_config_chan().

>>
>> Similar situation occurred here. What we're dealing with is: instance of
>> 'struct platform_device' type, found on bus: acpi with PCI set as a parent
>> device.
>>
>> Scope found in DSDT:
>> 	\_SB_.PCI0.ADSP
>> sysfs device path:
>> 	/sys/devices/pci0000:00/INT3438:00
>> Within the latter _no_ standard utility files will be available e.g.:
>> ability to dump PCI config space, bars and such.
> 
> I see. Can you dump DSDT somewhere? We are interested in
> PSx/PRx/PSE/PSW/PSC/PRE/PRW/ON/OFF (x=0..3) methods.
> 
>> I haven't found any functionality to extract "pci_companion" from a
>> platform_device. What can be made use of is: PCI_D3hot and PCI_D0 enum
>> constants, as pci_set_power_state() does not apply - expects struct pci_dev
>> *.
>>
>> Perhaps got misled by the function naming? catpt_updatel_xxx helpers: _xxx
>> denotes specific ADSP device's mmio space. Almost all cases are covered by
>> _pci and _shim.
> 
> If we really need to use these commands directly, utilize at least definitions
> from PCI core, e.g. PCI_D0, PCI_D3hot, PCI_PM_CTRL.
> 

Kudos to you, Andy, for taking time and debugging ACPI tables from our 
BDW machines.
Unfortunately explicit _updatel_pci for d3hot/d0 will have to remain as 
there is no other way to cause explicit power_state change for the device.

Another question though: PCI_PM_CTRL. In order for me to make use of 
this, "pm_cap" member would have to be declared for my device. As this 
is no struct pci_dev, catpt has currently no separate member for that 
purpose. I don't believe you want me to add that field into struct's 
declaration.
Second option is to define constant for pm_cap offset aka 0x80 within 
registers.h and then do the operations as follows:
	catpt_updatel_pci(cdev, CATPT_PM_CAP + PCI_PM_CTRL, ...)

However, in such case I won't be able to make use of current version of 
_updatel_pci() as definition of that macro allows me to skip prefix and 
type implicitly - PMCS (the rest is appended automatically).
Maybe let's leave it within registers.h altogether so I can actually 
keep using said macro?

Czarek
Andy Shevchenko Aug. 25, 2020, 1:16 p.m. UTC | #10
On Mon, Aug 24, 2020 at 06:33:17PM +0200, Cezary Rojewski wrote:
> On 2020-08-20 11:00 AM, Andy Shevchenko wrote:
> > On Thu, Aug 20, 2020 at 09:30:13AM +0200, Cezary Rojewski wrote:
> > > On 2020-08-17 1:12 PM, Cezary Rojewski wrote:
> > > > On 2020-08-13 8:51 PM, Andy Shevchenko wrote:
> > > > > On Wed, Aug 12, 2020 at 10:57:42PM +0200, Cezary Rojewski wrote:
> 
> > > > > > +#define CATPT_DMA_MAXBURST    0x3
> > > > > 
> > > > > We have DMA engine definitions for that, please avoid magic numbers.
> > > > 
> > > > As with most of the dma stuff, based on existing:
> > > > /sound/soc/intel/common/sst-firmware.c SST_DSP_DMA_MAX_BURST
> > > > 
> > > > Ack.
> > > 
> > > Actually, wasn't able to find anything _MAXBURST related in dmaengine.h.
> > > _BUSWIDTH_ have their constants defined there, true, but I'm already making
> > > use of these and this is dst/src_maxburst we're talking about. From what
> > > I've seen in kernel sources, most usages are direct assignments:
> > > xxx_maxburst = Y;
> > 
> > Okay, and how 0x3 bytes can be a burst? Does DMA engine support this?
> > 
> 
> That's a very good question. Early this morning starting digging.
> 
> I believe this stems from attempt of sst_dsp_dma_get_channel() to serve two
> masters - MID DMAC and DW DMAC - at the same time.
> 
> Intel MID DMAC has been first introduced in version v2.6.36-rc1 and
> subsequently removed in v4.1-rc1 due to lack of any usage throughout its
> whole life. Reference:
> 	https://cateee.net/lkddb/web-lkddb/INTEL_MID_DMAC.html
> 
> files to look for in said kernel versions:
> 	drivers/dma/intel_mid_dma.c
> 	drivers/dma/intel_mid_regs.h
> 
> As MID DMAC is supposed to handle Langwell PCH which is coupled with Atom
> CPUs my guess is that /atom/ was supposed to make use of said DMAC at
> certain point in time which actually never happened. This is backed up by
> the following:
> 
> /haswell/ makes use of DW DMAC but that wasn't always the case - solution,
> which was first introduced in v3.15-rc1 have had the sst_dma_new() and
> friends (wrappers calling dw_dma_probe()) implemented only since v3.19-rc1.
> So, there was a period in time where both solutions were not using any DMAC
> whatsoever. As stated, in v3.19-rc1 /haswell/ moved to recommended flow
> where DW DMAC gets involved in image loading. The same cannot be said about
> /atom/ -or- /baytrail/ for that matter and that is why I believe MID DMAC
> got removed, eventually.

Yes, I have removed it because it's a copy cat of Synopsys DesignWare. At Intel
at that time Vinod, who was developing that driver, had probably no idea that
he is reinventing the wheel. This theory is supported by (internal)
documentation for Medfield which has no words Synopsys, DesignWare in it.

> Now, one can notice the following when viewing older versions of /haswell/
> e.g.:
> 	https://elixir.bootlin.com/linux/v4.0.9/source/sound/soc/intel/sst-firmware.c#L224
> 
> notice the comment mentioning why sst_dsp_dma_get_channel() is somewhat
> ambiguous:
> 	The Intel MID DMA engine driver needs the slave config set but
> 	Synopsis DMA engine driver safely ignores the slave config
> 
> And thus:
> 	dma_cap_set(DMA_SLAVE, mask);
> is flagged only because of MID DMAC while causing no harm to DW. As MID no
> longer exists, so should DMA_SLAVE flagging. This should answer your
> question from:
> 
> On 2020-08-13 8:51 PM, Andy Shevchenko wrote:
> > On Wed, Aug 12, 2020 at 10:57:42PM +0200, Cezary Rojewski wrote:
> >> Implement dsp lifecycle functions such as core RESET and STALL,
> >> SRAM power control and LP clock selection. This also adds functions for
> >> handling transport over DW DMA controller.
> >
> >> +	dma_cap_set(DMA_SLAVE, mask);
> >
> > How this helps with mem2mem channel?
> >
> 
> In regard to maxburst, (SST_DSP_DMA_MAX_BURST 0x3) this is again mixture of
> MID & DW expectations - MID owns no conversion code, value assigned to
> src_/dst_maxburst is taken directly:
> 
> 	https://elixir.bootlin.com/linux/v4.0.9/source/drivers/dma/intel_mid_dma.c#L495
> vs
> 	https://elixir.bootlin.com/linux/v4.0.9/source/drivers/dma/dw/core.c#L965
> notice convert_burst() (/drivers/dma/dw has seen several improvements and
> code isn't identical in newer kernels but functionality remains the same)

Correct.

> combine that knowledge with:
> 	enum dw_dma_msize {
> 		DW_DMA_MSIZE_1,
> 		DW_DMA_MSIZE_4,
> 		DW_DMA_MSIZE_8,
> 		DW_DMA_MSIZE_16,
> 
> from:
> 	https://elixir.bootlin.com/linux/v4.0.9/source/drivers/dma/dw/regs.h#L135
> 
> and one can safely assume maxburst is mistakenly set for DW - direct enum
> constant is used, ignoring the fact that is it DW code which converts
> provided value behind the scenes to appropriate one. Currently we end up
> with:
> fls(0x3) -> 2;
> 2 - 2 = 0 (conversion method) -> DW_DMA_MSIZE_1
> 
> Conclusion: I believe DW_DMA_MSIZE_16 (0x3) is the correct one i.e.:
> 	src_maxburst = 16;
> 	dst_maxburst = 16;
> should be present within catpt_dma_request_config_chan().

Yes!

> > > Similar situation occurred here. What we're dealing with is: instance of
> > > 'struct platform_device' type, found on bus: acpi with PCI set as a parent
> > > device.
> > > 
> > > Scope found in DSDT:
> > > 	\_SB_.PCI0.ADSP
> > > sysfs device path:
> > > 	/sys/devices/pci0000:00/INT3438:00
> > > Within the latter _no_ standard utility files will be available e.g.:
> > > ability to dump PCI config space, bars and such.
> > 
> > I see. Can you dump DSDT somewhere? We are interested in
> > PSx/PRx/PSE/PSW/PSC/PRE/PRW/ON/OFF (x=0..3) methods.
> > 
> > > I haven't found any functionality to extract "pci_companion" from a
> > > platform_device. What can be made use of is: PCI_D3hot and PCI_D0 enum
> > > constants, as pci_set_power_state() does not apply - expects struct pci_dev
> > > *.
> > > 
> > > Perhaps got misled by the function naming? catpt_updatel_xxx helpers: _xxx
> > > denotes specific ADSP device's mmio space. Almost all cases are covered by
> > > _pci and _shim.
> > 
> > If we really need to use these commands directly, utilize at least definitions
> > from PCI core, e.g. PCI_D0, PCI_D3hot, PCI_PM_CTRL.
> > 
> 
> Kudos to you, Andy, for taking time and debugging ACPI tables from our BDW
> machines.
> Unfortunately explicit _updatel_pci for d3hot/d0 will have to remain as
> there is no other way to cause explicit power_state change for the device.

I see, thanks for checking this.

> Another question though: PCI_PM_CTRL. In order for me to make use of this,
> "pm_cap" member would have to be declared for my device. As this is no
> struct pci_dev, catpt has currently no separate member for that purpose. I
> don't believe you want me to add that field into struct's declaration.
> Second option is to define constant for pm_cap offset aka 0x80 within
> registers.h and then do the operations as follows:
> 	catpt_updatel_pci(cdev, CATPT_PM_CAP + PCI_PM_CTRL, ...)

> However, in such case I won't be able to make use of current version of
> _updatel_pci() as definition of that macro allows me to skip prefix and type
> implicitly - PMCS (the rest is appended automatically).
> Maybe let's leave it within registers.h altogether so I can actually keep
> using said macro?

Basically what you do with accessing PCI configuration space via these methods
(catpt_update_pci(), etc) is something repetitive / similar to what xHCI DbC
support code does. I recommend to spend some time to look for similarities here
(catpt) and there (PCI core, xHCI DbC, etc) and, if we were lucky, derive
common helpers for traverse the capability list in more generalized way.
Andy Shevchenko Aug. 25, 2020, 1:23 p.m. UTC | #11
On Tue, Aug 25, 2020 at 04:16:15PM +0300, Andy Shevchenko wrote:
> On Mon, Aug 24, 2020 at 06:33:17PM +0200, Cezary Rojewski wrote:
> > On 2020-08-20 11:00 AM, Andy Shevchenko wrote:

...

> > Another question though: PCI_PM_CTRL. In order for me to make use of this,
> > "pm_cap" member would have to be declared for my device. As this is no
> > struct pci_dev, catpt has currently no separate member for that purpose. I
> > don't believe you want me to add that field into struct's declaration.
> > Second option is to define constant for pm_cap offset aka 0x80 within
> > registers.h and then do the operations as follows:
> > 	catpt_updatel_pci(cdev, CATPT_PM_CAP + PCI_PM_CTRL, ...)
> 
> > However, in such case I won't be able to make use of current version of
> > _updatel_pci() as definition of that macro allows me to skip prefix and type
> > implicitly - PMCS (the rest is appended automatically).
> > Maybe let's leave it within registers.h altogether so I can actually keep
> > using said macro?
> 
> Basically what you do with accessing PCI configuration space via these methods
> (catpt_update_pci(), etc) is something repetitive / similar to what xHCI DbC
> support code does. I recommend to spend some time to look for similarities here
> (catpt) and there (PCI core, xHCI DbC, etc) and, if we were lucky, derive
> common helpers for traverse the capability list in more generalized way.

Throwing the idea loud: perhaps we may have something like regmap-pci.c to
access PCI configuration space and make regmap API to take care of which IO
accessors (and locking) will be used.
Cezary Rojewski Aug. 27, 2020, 10:06 a.m. UTC | #12
On 2020-08-25 3:16 PM, Andy Shevchenko wrote:
> On Mon, Aug 24, 2020 at 06:33:17PM +0200, Cezary Rojewski wrote:
>> On 2020-08-20 11:00 AM, Andy Shevchenko wrote:

...

>> Another question though: PCI_PM_CTRL. In order for me to make use of this,
>> "pm_cap" member would have to be declared for my device. As this is no
>> struct pci_dev, catpt has currently no separate member for that purpose. I
>> don't believe you want me to add that field into struct's declaration.
>> Second option is to define constant for pm_cap offset aka 0x80 within
>> registers.h and then do the operations as follows:
>> 	catpt_updatel_pci(cdev, CATPT_PM_CAP + PCI_PM_CTRL, ...)
> 
>> However, in such case I won't be able to make use of current version of
>> _updatel_pci() as definition of that macro allows me to skip prefix and type
>> implicitly - PMCS (the rest is appended automatically).
>> Maybe let's leave it within registers.h altogether so I can actually keep
>> using said macro?
> 
> Basically what you do with accessing PCI configuration space via these methods
> (catpt_update_pci(), etc) is something repetitive / similar to what xHCI DbC
> support code does. I recommend to spend some time to look for similarities here
> (catpt) and there (PCI core, xHCI DbC, etc) and, if we were lucky, derive
> common helpers for traverse the capability list in more generalized way.
> 

I wouldn't call direct-access a repetitive procedure, i.e. had procedure 
for enumerating PCI capabilities list been implemented individually by 
every PCI device type, then one can describe that as repetitiveness. 
Here, we are dealing with no procedure at all, just a writel & readl.

About xHCI, I believe you meant: xhci_find_next_ext_cap()
	https://elixir.bootlin.com/linux/latest/source/drivers/usb/host/xhci-ext-caps.h#L97

in case of PCI that's: pci_find_next_capability(), __pci_find_next_cap() 
and friends. pci_find_next_capability() is pci_dev dependent while most 
of the rest pci_bus instead. We fail both dependencies in catpt case.

xhci_find_next_ext_cap search method seems xHCI-specific, notice the 
0x10 offset for HCCPARAMS1 and then the left-shift-by-2. PCI doesn't do 
that when enumerating capabilities, instead it checks Capabilities 
List-bit within Status reg and then begins iterating given the start pos 
- Capability Pointer, usually 0x34. Abstracting these (if even possible) 
would end up with 80% code gluing two different worlds with 20% left 
doing the actual job. Fact that those two are separated increases code 
readability.

While catpt is of platform_device type located on acpi bus, beneath 
there's a (incomplete?) description of PCI device.

PCI config
catpt_acpi_probe00000000: 9cb68086 00100006 04010003 00000000
catpt_acpi_probe00000010: fe000000 fe100000 00000000 00000000
catpt_acpi_probe00000020: 00000000 00000000 00000000 00000000
catpt_acpi_probe00000030: 00000000 00000080 00000000 00000100

PCI base + 0x80
catpt_acpi_probe00000000: 40030001 0000000b 00000000 00000000
catpt_acpi_probe00000010: 00000000 00000000 00000000 00000000
catpt_acpi_probe00000020: fffffffd 00000000 80000fff 00000000
catpt_acpi_probe00000030: 00000000 00000000 00000000 00000000

Capabilities List-bit is set, start pos from Capabilitiy Pointer equals 
0x80. What we have here is singular list of capabilities - PM as the 
only element. Following is the important DWORD (_PM_CTRL) - 0xb tells us 
that device is currently in D3hot. For LPT/WPT ADSP basically all other 
PM bits are hardwired to 0 or not supported.

So, quite frankly, had the BIOS offered correct ADSP device description, 
we wouldn't be dealing with ACPI device/ACPI bus at all. This has been 
corrected from SKL+ ADSP onward. To answer the immediate question: no, 
device of id 0x9cb6 won't be present within /sys/bus/pci/devices/ (cat 
'./device' for id for every entry). Even converted catpt driver from 
acpi to pci just to make sure.

I don't mind adding new constant within register.h for transparency:
	#define CATPT_PCI_PMCAPID 0x80
	#define CATPT_PCI_PMCS (CATPT_PCI_PMCAPID + PCI_PM_CTRL)

Current status for PM catpt_updatel_pci:
	catpt_updatel_pci(cdev, PMCS, PCI_PM_CTRL_STATE_MASK, PCI_D3hot)
	catpt_updatel_pci(cdev, PMCS, PCI_PM_CTRL_STATE_MASK, PCI_D0)
which looks very good to me.
diff mbox series

Patch

diff --git a/sound/soc/intel/catpt/dsp.c b/sound/soc/intel/catpt/dsp.c
new file mode 100644
index 000000000000..1474e2e11672
--- /dev/null
+++ b/sound/soc/intel/catpt/dsp.c
@@ -0,0 +1,497 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright(c) 2020 Intel Corporation. All rights reserved.
+//
+// Author: Cezary Rojewski <cezary.rojewski@intel.com>
+//
+
+#include <linux/dma-mapping.h>
+#include <linux/acpi_dma.h>
+#include <linux/firmware.h>
+#include "core.h"
+#include "messages.h"
+#include "registers.h"
+#include "trace.h"
+
+static void catpt_dma_transfer_complete(void *arg)
+{
+	struct catpt_dev *cdev = arg;
+
+	dev_dbg(cdev->dev, "%s\n", __func__);
+}
+
+static bool catpt_dma_filter(struct dma_chan *chan, void *param)
+{
+	return chan->device->dev == (struct device *)param;
+}
+
+#define CATPT_DMA_DEVID		1 /* dma engine used */
+#define CATPT_DMA_MAXBURST	0x3
+#define CATPT_DMA_DSP_ADDR_MASK	0xFFF00000
+
+struct dma_chan *catpt_dma_request_config_chan(struct catpt_dev *cdev)
+{
+	struct dma_slave_config config;
+	struct dma_chan *chan;
+	dma_cap_mask_t mask;
+	int ret;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+	dma_cap_set(DMA_MEMCPY, mask);
+
+	chan = dma_request_channel(mask, catpt_dma_filter, cdev->dev);
+	if (!chan) {
+		dev_err(cdev->dev, "request channel failed\n");
+		dump_stack();
+		return ERR_PTR(-EPROBE_DEFER);
+	}
+
+	memset(&config, 0, sizeof(config));
+	config.direction = DMA_MEM_TO_DEV;
+	config.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	config.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	config.src_maxburst = CATPT_DMA_MAXBURST;
+	config.dst_maxburst = CATPT_DMA_MAXBURST;
+
+	ret = dmaengine_slave_config(chan, &config);
+	if (ret) {
+		dev_err(cdev->dev, "slave config failed: %d\n", ret);
+		dma_release_channel(chan);
+		return ERR_PTR(ret);
+	}
+
+	return chan;
+}
+
+static int catpt_dma_memcpy(struct catpt_dev *cdev, struct dma_chan *chan,
+			    dma_addr_t dst_addr, dma_addr_t src_addr,
+			    size_t size)
+{
+	struct dma_async_tx_descriptor *desc;
+	enum dma_status status;
+
+	desc = dmaengine_prep_dma_memcpy(chan, dst_addr, src_addr, size,
+					 DMA_CTRL_ACK);
+	if (!desc) {
+		dev_err(cdev->dev, "prep dma memcpy failed\n");
+		return -EIO;
+	}
+
+	/* enable demand mode for dma channel */
+	catpt_updatel_shim(cdev, HMDC,
+			   CATPT_HMDC_HDDA(CATPT_DMA_DEVID, chan->chan_id),
+			   CATPT_HMDC_HDDA(CATPT_DMA_DEVID, chan->chan_id));
+	desc->callback = catpt_dma_transfer_complete;
+	desc->callback_param = cdev;
+	dmaengine_submit(desc);
+
+	status = dma_wait_for_async_tx(desc);
+	catpt_updatel_shim(cdev, HMDC,
+			   CATPT_HMDC_HDDA(CATPT_DMA_DEVID, chan->chan_id), 0);
+
+	return (status == DMA_COMPLETE) ? 0 : -EPROTO;
+}
+
+int catpt_dma_memcpy_todsp(struct catpt_dev *cdev, struct dma_chan *chan,
+			   dma_addr_t dst_addr, dma_addr_t src_addr,
+			   size_t size)
+{
+	return catpt_dma_memcpy(cdev, chan, dst_addr | CATPT_DMA_DSP_ADDR_MASK,
+				src_addr, size);
+}
+
+int catpt_dma_memcpy_fromdsp(struct catpt_dev *cdev, struct dma_chan *chan,
+			     dma_addr_t dst_addr, dma_addr_t src_addr,
+			     size_t size)
+{
+	return catpt_dma_memcpy(cdev, chan, dst_addr,
+				src_addr | CATPT_DMA_DSP_ADDR_MASK, size);
+}
+
+int catpt_dmac_probe(struct catpt_dev *cdev)
+{
+	struct dw_dma_chip *dmac;
+	struct resource res;
+	int ret;
+
+	dmac = devm_kzalloc(cdev->dev, sizeof(*dmac), GFP_KERNEL);
+	if (!dmac)
+		return -ENOMEM;
+
+	memset(&res, 0, sizeof(res));
+	res.start = cdev->lpe_base +
+		    cdev->spec->host_dma_offset[CATPT_DMA_DEVID];
+	res.end = res.start + (CATPT_DMA_REGS_SIZE - 1);
+	res.flags = IORESOURCE_MEM;
+	dmac->dev = cdev->dev;
+	dmac->irq = cdev->irq;
+
+	dmac->regs = devm_ioremap_resource(cdev->dev, &res);
+	if (IS_ERR(dmac->regs))
+		return PTR_ERR(dmac->regs);
+
+	ret = dma_coerce_mask_and_coherent(cdev->dev, DMA_BIT_MASK(31));
+	if (ret < 0)
+		return ret;
+	/*
+	 * Caller is responsible for putting device in D0 to allow
+	 * for I/O and memory access before probing DW
+	 */
+	ret = dw_dma_probe(dmac);
+	if (ret < 0)
+		return ret;
+
+	cdev->dmac = dmac;
+	return 0;
+}
+
+void catpt_dmac_remove(struct catpt_dev *cdev)
+{
+	/*
+	 * As do_dma_remove() juggles with pm_runtime_get_xxx() and
+	 * pm_runtime_put_xxx() while both ADSP and DW 'devices' are part of
+	 * the same module, caller makes sure pm_runtime_disable() is invoked
+	 * before removing DW to prevent postmortem resume and suspend
+	 */
+	dw_dma_remove(cdev->dmac);
+}
+
+static void catpt_dsp_set_srampge(struct catpt_dev *cdev, struct resource *sram,
+				  unsigned long mask, unsigned long new)
+{
+	unsigned long old;
+	u32 off = sram->start;
+	u32 b = __ffs(mask);
+
+	old = catpt_readl_pci(cdev, VDRTCTL0) & mask;
+	dev_dbg(cdev->dev, "SRAMPGE [0x%08lx] 0x%08lx -> 0x%08lx",
+		mask, old, new);
+
+	if (old == new)
+		return;
+
+	catpt_updatel_pci(cdev, VDRTCTL0, mask, new);
+	udelay(60);
+
+	/*
+	 * dummy read as the very first access after block enable
+	 * to prevent byte loss in future operations
+	 */
+	for_each_clear_bit_from(b, &new, fls(mask)) {
+		u8 buf[4];
+
+		/* newly enabled: new bit=0 while old bit=1 */
+		if (test_bit(b, &old)) {
+			dev_dbg(cdev->dev, "sanitize block %ld: off 0x%08x\n",
+				(b - __ffs(mask)), off);
+			memcpy_fromio(buf, cdev->lpe_ba + off, sizeof(buf));
+		}
+		off += CATPT_MEMBLOCK_SIZE;
+	}
+}
+
+void catpt_dsp_update_srampge(struct catpt_dev *cdev, struct resource *sram,
+			      unsigned long mask)
+{
+	struct resource *res;
+	unsigned long new = 0;
+
+	/* flag all busy blocks */
+	for (res = sram->child; res; res = res->sibling) {
+		u32 h, l;
+
+		h = (res->end - sram->start) / CATPT_MEMBLOCK_SIZE;
+		l = (res->start - sram->start) / CATPT_MEMBLOCK_SIZE;
+		new |= GENMASK(h, l);
+	}
+
+	/* offset value given mask's start and invert it as ON=b0 */
+	new <<= __ffs(mask);
+	new = ~(new) & mask;
+
+	/* disable core clock gating */
+	catpt_updatel_pci(cdev, VDRTCTL2, CATPT_VDRTCTL2_DCLCGE, 0);
+
+	catpt_dsp_set_srampge(cdev, sram, mask, new);
+
+	/* enable core clock gating */
+	catpt_updatel_pci(cdev, VDRTCTL2, CATPT_VDRTCTL2_DCLCGE,
+			  CATPT_VDRTCTL2_DCLCGE);
+}
+
+int catpt_dsp_stall(struct catpt_dev *cdev, bool stall)
+{
+	u32 reg, val;
+
+	val = stall ? CATPT_CS_STALL : 0;
+	catpt_updatel_shim(cdev, CS1, CATPT_CS_STALL, val);
+
+	return catpt_readl_poll_shim(cdev, CS1,
+				     reg, (reg & CATPT_CS_STALL) == val,
+				     500, 10000);
+}
+
+static int catpt_dsp_reset(struct catpt_dev *cdev, bool reset)
+{
+	u32 reg, val;
+
+	val = reset ? CATPT_CS_RST : 0;
+	catpt_updatel_shim(cdev, CS1, CATPT_CS_RST, val);
+
+	return catpt_readl_poll_shim(cdev, CS1,
+				     reg, (reg & CATPT_CS_RST) == val,
+				     500, 10000);
+}
+
+void lpt_dsp_pll_shutdown(struct catpt_dev *cdev, bool enable)
+{
+	u32 val;
+
+	val = enable ? LPT_VDRTCTL0_APLLSE : 0;
+	catpt_updatel_pci(cdev, VDRTCTL0, LPT_VDRTCTL0_APLLSE, val);
+}
+
+void wpt_dsp_pll_shutdown(struct catpt_dev *cdev, bool enable)
+{
+	u32 val;
+
+	val = enable ? WPT_VDRTCTL2_APLLSE : 0;
+	catpt_updatel_pci(cdev, VDRTCTL2, WPT_VDRTCTL2_APLLSE, val);
+}
+
+static int catpt_dsp_select_lpclock(struct catpt_dev *cdev, bool lp, bool waiti)
+{
+	u32 mask, reg, val;
+	int ret;
+
+	mutex_lock(&cdev->clk_mutex);
+
+	val = lp ? CATPT_CS_LPCS : 0;
+	reg = catpt_readl_shim(cdev, CS1) & CATPT_CS_LPCS;
+	dev_dbg(cdev->dev, "LPCS [0x%08lx] 0x%08x -> 0x%08x",
+		CATPT_CS_LPCS, reg, val);
+
+	if (reg == val) {
+		mutex_unlock(&cdev->clk_mutex);
+		return 0;
+	}
+
+	if (waiti) {
+		/* wait for DSP to signal WAIT state */
+		ret = catpt_readl_poll_shim(cdev, ISD,
+					    reg, (reg & CATPT_ISD_DCPWM),
+					    500, 10000);
+		if (ret < 0) {
+			dev_warn(cdev->dev, "await WAITI timeout\n");
+			mutex_unlock(&cdev->clk_mutex);
+			return ret;
+		}
+	}
+
+	ret = catpt_readl_poll_shim(cdev, CLKCTL,
+				    reg, !(reg & CATPT_CLKCTL_CFCIP),
+				    500, 10000);
+	if (ret < 0)
+		dev_warn(cdev->dev, "clock change still in progress\n");
+
+	/* default to DSP core & audio fabric high clock */
+	val |= CATPT_CS_DCS_HIGH;
+	mask = CATPT_CS_LPCS | CATPT_CS_DCS;
+	catpt_updatel_shim(cdev, CS1, mask, val);
+
+	ret = catpt_readl_poll_shim(cdev, CLKCTL,
+				    reg, !(reg & CATPT_CLKCTL_CFCIP),
+				    500, 10000);
+	if (ret < 0)
+		dev_warn(cdev->dev, "clock change still in progress\n");
+
+	/* update PLL accordingly */
+	cdev->spec->pll_shutdown(cdev, lp);
+
+	mutex_unlock(&cdev->clk_mutex);
+	return 0;
+}
+
+int catpt_dsp_update_lpclock(struct catpt_dev *cdev)
+{
+	struct catpt_stream_runtime *stream;
+	bool lp;
+
+	if (list_empty(&cdev->stream_list))
+		return catpt_dsp_select_lpclock(cdev, true, true);
+
+	lp = true;
+	list_for_each_entry(stream, &cdev->stream_list, node) {
+		if (stream->prepared) {
+			lp = false;
+			break;
+		}
+	}
+
+	return catpt_dsp_select_lpclock(cdev, lp, true);
+}
+
+/* bring registers to their defaults as HW won't reset itself */
+static void catpt_dsp_set_regs_defaults(struct catpt_dev *cdev)
+{
+	int i;
+
+	catpt_writel_shim(cdev, CS1, CATPT_CS_DEFAULT);
+	catpt_writel_shim(cdev, ISC, CATPT_ISC_DEFAULT);
+	catpt_writel_shim(cdev, ISD, CATPT_ISD_DEFAULT);
+	catpt_writel_shim(cdev, IMC, CATPT_IMC_DEFAULT);
+	catpt_writel_shim(cdev, IMD, CATPT_IMD_DEFAULT);
+	catpt_writel_shim(cdev, IPCC, CATPT_IPCC_DEFAULT);
+	catpt_writel_shim(cdev, IPCD, CATPT_IPCD_DEFAULT);
+	catpt_writel_shim(cdev, CLKCTL, CATPT_CLKCTL_DEFAULT);
+	catpt_writel_shim(cdev, CS2, CATPT_CS2_DEFAULT);
+	catpt_writel_shim(cdev, LTRC, CATPT_LTRC_DEFAULT);
+	catpt_writel_shim(cdev, HMDC, CATPT_HMDC_DEFAULT);
+
+	for (i = 0; i < CATPT_SSP_COUNT; i++) {
+		catpt_writel_ssp(cdev, i, SSC0, CATPT_SSP_SSC0_DEFAULT);
+		catpt_writel_ssp(cdev, i, SSC1, CATPT_SSP_SSC1_DEFAULT);
+		catpt_writel_ssp(cdev, i, SSS, CATPT_SSP_SSS_DEFAULT);
+		catpt_writel_ssp(cdev, i, SSIT, CATPT_SSP_SSIT_DEFAULT);
+		catpt_writel_ssp(cdev, i, SSD, CATPT_SSP_SSD_DEFAULT);
+		catpt_writel_ssp(cdev, i, SSTO, CATPT_SSP_SSTO_DEFAULT);
+		catpt_writel_ssp(cdev, i, SSPSP, CATPT_SSP_SSPSP_DEFAULT);
+		catpt_writel_ssp(cdev, i, SSTSA, CATPT_SSP_SSTSA_DEFAULT);
+		catpt_writel_ssp(cdev, i, SSRSA, CATPT_SSP_SSRSA_DEFAULT);
+		catpt_writel_ssp(cdev, i, SSTSS, CATPT_SSP_SSTSS_DEFAULT);
+		catpt_writel_ssp(cdev, i, SSC2, CATPT_SSP_SSC2_DEFAULT);
+		catpt_writel_ssp(cdev, i, SSPSP2, CATPT_SSP_SSPSP2_DEFAULT);
+	}
+}
+
+int lpt_dsp_power_down(struct catpt_dev *cdev)
+{
+	catpt_dsp_reset(cdev, true);
+
+	/* set 24Mhz clock for both SSPs */
+	catpt_updatel_shim(cdev, CS1, CATPT_CS_SBCS(0) | CATPT_CS_SBCS(1),
+			   CATPT_CS_SBCS(0) | CATPT_CS_SBCS(1));
+	catpt_dsp_select_lpclock(cdev, true, false);
+
+	/* DRAM power gating all */
+	catpt_dsp_set_srampge(cdev, &cdev->dram, cdev->spec->dram_mask,
+			      cdev->spec->dram_mask);
+	catpt_dsp_set_srampge(cdev, &cdev->iram, cdev->spec->iram_mask,
+			      cdev->spec->iram_mask);
+
+	/* set D3 */
+	catpt_updatel_pci(cdev, PMCS, CATPT_PMCS_PS, CATPT_PMCS_PS_D3HOT);
+	udelay(50);
+
+	return 0;
+}
+
+int lpt_dsp_power_up(struct catpt_dev *cdev)
+{
+	/* SRAM power gating none */
+	catpt_dsp_set_srampge(cdev, &cdev->dram, cdev->spec->dram_mask, 0);
+	catpt_dsp_set_srampge(cdev, &cdev->iram, cdev->spec->iram_mask, 0);
+
+	/* set D0 */
+	catpt_updatel_pci(cdev, PMCS, CATPT_PMCS_PS, 0);
+	udelay(100);
+
+	catpt_dsp_select_lpclock(cdev, false, false);
+	catpt_updatel_shim(cdev, CS1,
+			CATPT_CS_SBCS(0) | CATPT_CS_SBCS(1),
+			CATPT_CS_SBCS(0) | CATPT_CS_SBCS(1));
+	udelay(50);
+
+	catpt_dsp_reset(cdev, false);
+	/* generate int deassert msg to fix inversed int logic */
+	catpt_updatel_shim(cdev, IMC, CATPT_IMC_IPCDB | CATPT_IMC_IPCCD, 0);
+
+	return 0;
+}
+
+int wpt_dsp_power_down(struct catpt_dev *cdev)
+{
+	u32 mask, val;
+
+	/* disable core clock gating */
+	catpt_updatel_pci(cdev, VDRTCTL2, CATPT_VDRTCTL2_DCLCGE, 0);
+
+	catpt_dsp_reset(cdev, true);
+	/* set 24Mhz clock for both SSPs */
+	catpt_updatel_shim(cdev, CS1, CATPT_CS_SBCS(0) | CATPT_CS_SBCS(1),
+			   CATPT_CS_SBCS(0) | CATPT_CS_SBCS(1));
+	catpt_dsp_select_lpclock(cdev, true, false);
+	/* disable MCLK */
+	catpt_updatel_shim(cdev, CLKCTL, CATPT_CLKCTL_SMOS, 0);
+
+	catpt_dsp_set_regs_defaults(cdev);
+
+	/* switch clock gating */
+	mask = CATPT_VDRTCTL2_CGEALL & (~CATPT_VDRTCTL2_DCLCGE);
+	val = mask & (~CATPT_VDRTCTL2_DTCGE);
+	catpt_updatel_pci(cdev, VDRTCTL2, mask, val);
+	/* enable DTCGE separatelly */
+	catpt_updatel_pci(cdev, VDRTCTL2, CATPT_VDRTCTL2_DTCGE,
+			  CATPT_VDRTCTL2_DTCGE);
+
+	/* SRAM power gating all */
+	catpt_dsp_set_srampge(cdev, &cdev->dram, cdev->spec->dram_mask,
+			      cdev->spec->dram_mask);
+	catpt_dsp_set_srampge(cdev, &cdev->iram, cdev->spec->iram_mask,
+			      cdev->spec->iram_mask);
+	mask = WPT_VDRTCTL0_D3SRAMPGD | WPT_VDRTCTL0_D3PGD;
+	catpt_updatel_pci(cdev, VDRTCTL0, mask, WPT_VDRTCTL0_D3PGD);
+
+	/* set D3 */
+	catpt_updatel_pci(cdev, PMCS, CATPT_PMCS_PS, CATPT_PMCS_PS_D3HOT);
+	udelay(50);
+
+	/* enable core clock gating */
+	catpt_updatel_pci(cdev, VDRTCTL2, CATPT_VDRTCTL2_DCLCGE,
+			  CATPT_VDRTCTL2_DCLCGE);
+	udelay(50);
+
+	return 0;
+}
+
+int wpt_dsp_power_up(struct catpt_dev *cdev)
+{
+	u32 mask, val;
+
+	/* disable core clock gating */
+	catpt_updatel_pci(cdev, VDRTCTL2, CATPT_VDRTCTL2_DCLCGE, 0);
+
+	/* switch clock gating */
+	mask = CATPT_VDRTCTL2_CGEALL & (~CATPT_VDRTCTL2_DCLCGE);
+	val = mask & (~CATPT_VDRTCTL2_DTCGE);
+	catpt_updatel_pci(cdev, VDRTCTL2, mask, val);
+
+	/* set D0 */
+	catpt_updatel_pci(cdev, PMCS, CATPT_PMCS_PS, 0);
+
+	/* SRAM power gating none */
+	mask = WPT_VDRTCTL0_D3SRAMPGD | WPT_VDRTCTL0_D3PGD;
+	catpt_updatel_pci(cdev, VDRTCTL0, mask, mask);
+	catpt_dsp_set_srampge(cdev, &cdev->dram, cdev->spec->dram_mask, 0);
+	catpt_dsp_set_srampge(cdev, &cdev->iram, cdev->spec->iram_mask, 0);
+
+	catpt_dsp_set_regs_defaults(cdev);
+
+	/* restore MCLK */
+	catpt_updatel_shim(cdev, CLKCTL, CATPT_CLKCTL_SMOS, CATPT_CLKCTL_SMOS);
+	catpt_dsp_select_lpclock(cdev, false, false);
+	/* set 24Mhz clock for both SSPs */
+	catpt_updatel_shim(cdev, CS1, CATPT_CS_SBCS(0) | CATPT_CS_SBCS(1),
+			   CATPT_CS_SBCS(0) | CATPT_CS_SBCS(1));
+	catpt_dsp_reset(cdev, false);
+
+	/* enable core clock gating */
+	catpt_updatel_pci(cdev, VDRTCTL2, CATPT_VDRTCTL2_DCLCGE,
+			  CATPT_VDRTCTL2_DCLCGE);
+
+	/* generate int deassert msg to fix inversed int logic */
+	catpt_updatel_shim(cdev, IMC, CATPT_IMC_IPCDB | CATPT_IMC_IPCCD, 0);
+
+	return 0;
+}