diff mbox

[7/9] drm/radeon: add VCE 1.0 support v4

Message ID 1431374515-2042-7-git-send-email-deathsimple@vodafone.de (mailing list archive)
State New, archived
Headers show

Commit Message

Christian König May 11, 2015, 8:01 p.m. UTC
From: Christian König <christian.koenig@amd.com>

Initial support for VCE 1.0 using newest firmware.

v2: rebased
v3: fix for TN
v4: fix FW size calculation

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/radeon/ni.c          |  46 ++++++++++++
 drivers/gpu/drm/radeon/radeon.h      |   1 +
 drivers/gpu/drm/radeon/radeon_asic.c |  17 +++++
 drivers/gpu/drm/radeon/radeon_asic.h |   3 +
 drivers/gpu/drm/radeon/radeon_vce.c  |  23 +++++-
 drivers/gpu/drm/radeon/si.c          |  46 ++++++++++++
 drivers/gpu/drm/radeon/sid.h         |   1 +
 drivers/gpu/drm/radeon/vce_v1_0.c    | 140 +++++++++++++++++++++++++++++++++++
 8 files changed, 274 insertions(+), 3 deletions(-)

Comments

Christian König Aug. 12, 2015, 9:16 p.m. UTC | #1
On 13.08.2015 08:36, Lucas Stach wrote:
> Am Donnerstag, den 13.08.2015, 15:18 +0900 schrieb Michel Dänzer:
>> On 13.08.2015 15:03, Lucas Stach wrote:
>>> Hi Christian,
>>>
>>> this commit is causing a boot regression with v4.2-rcX on my Richland
>>> APU (CHIP_ARUBA) based laptop. I didn't have time yet to track down
>>> where exactly it is going wrong, but I bisected it down to this single
>>> commit.
>>>
>>> I don't have the VCE firmware installed on this system, so from a quick
>>> look at the code I would expect it to drop out pretty early and just
>>> leave the VCE unconfigured, but otherwise keep things working as before.
>>> This is unfortunately not the case.
>> If the radeon driver is built into the kernel (or loaded from the
>> initrd?), the attempt to load the firmware might take a long time to
>> time out.
>>
> Gah. Thanks, I was too impatient to wait for the firmware loading to
> time out. In fact this is a standard Fedora kernel config, so radeon is
> a module, but it is built into the initrd.
>
> So it's not really readeons fault, but one more iteration of the fact
> that anything involving firmware loading is just horribly inconvenient.
> Especially if it's firmware for an optional component.

Yeah, exactly. The timeout for loading the firmware is really long on 
both the standard Fedora as well as Ubuntu kernels. Not sure if that's 
the default or just their configuration.

While it doesn't have the highest priority for us I usually still do 
tests if working without firmware still works once or twice during the 
upstreaming process. So if you really find that the box doesn't work 
without the firmware leave me a note and I will fix it.

Best regards,
Christian.

>
> Regards,
> Lucas
>
Lucas Stach Aug. 13, 2015, 6:03 a.m. UTC | #2
Hi Christian,

this commit is causing a boot regression with v4.2-rcX on my Richland
APU (CHIP_ARUBA) based laptop. I didn't have time yet to track down
where exactly it is going wrong, but I bisected it down to this single
commit.

I don't have the VCE firmware installed on this system, so from a quick
look at the code I would expect it to drop out pretty early and just
leave the VCE unconfigured, but otherwise keep things working as before.
This is unfortunately not the case.

Maybe you can take a look at what's wrong here. We are already pretty
late in the release cycle and I'm unsure if I can find any more time to
look into the issue before the final release.

Thanks,
Lucas

Am Montag, den 11.05.2015, 22:01 +0200 schrieb Christian König:
> From: Christian König <christian.koenig@amd.com>
> 
> Initial support for VCE 1.0 using newest firmware.
> 
> v2: rebased
> v3: fix for TN
> v4: fix FW size calculation
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/radeon/ni.c          |  46 ++++++++++++
>  drivers/gpu/drm/radeon/radeon.h      |   1 +
>  drivers/gpu/drm/radeon/radeon_asic.c |  17 +++++
>  drivers/gpu/drm/radeon/radeon_asic.h |   3 +
>  drivers/gpu/drm/radeon/radeon_vce.c  |  23 +++++-
>  drivers/gpu/drm/radeon/si.c          |  46 ++++++++++++
>  drivers/gpu/drm/radeon/sid.h         |   1 +
>  drivers/gpu/drm/radeon/vce_v1_0.c    | 140 +++++++++++++++++++++++++++++++++++
>  8 files changed, 274 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
> index 32f5f03..faffca3 100644
> --- a/drivers/gpu/drm/radeon/ni.c
> +++ b/drivers/gpu/drm/radeon/ni.c
> @@ -2040,6 +2040,25 @@ static int cayman_startup(struct radeon_device *rdev)
>  	if (r)
>  		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
>  
> +	if (rdev->family == CHIP_ARUBA) {
> +		r = radeon_vce_resume(rdev);
> +		if (!r)
> +			r = vce_v1_0_resume(rdev);
> +
> +		if (!r)
> +			r = radeon_fence_driver_start_ring(rdev,
> +							   TN_RING_TYPE_VCE1_INDEX);
> +		if (!r)
> +			r = radeon_fence_driver_start_ring(rdev,
> +							   TN_RING_TYPE_VCE2_INDEX);
> +
> +		if (r) {
> +			dev_err(rdev->dev, "VCE init error (%d).\n", r);
> +			rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
> +			rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
> +		}
> +	}
> +
>  	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
>  	if (r) {
>  		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
> @@ -2117,6 +2136,19 @@ static int cayman_startup(struct radeon_device *rdev)
>  			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
>  	}
>  
> +	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
> +	if (ring->ring_size)
> +		r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0);
> +
> +	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
> +	if (ring->ring_size)
> +		r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0);
> +
> +	if (!r)
> +		r = vce_v1_0_init(rdev);
> +	else if (r != -ENOENT)
> +		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
> +
>  	r = radeon_ib_pool_init(rdev);
>  	if (r) {
>  		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
> @@ -2272,6 +2304,19 @@ int cayman_init(struct radeon_device *rdev)
>  		r600_ring_init(rdev, ring, 4096);
>  	}
>  
> +	if (rdev->family == CHIP_ARUBA) {
> +		r = radeon_vce_init(rdev);
> +		if (!r) {
> +			ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
> +			ring->ring_obj = NULL;
> +			r600_ring_init(rdev, ring, 4096);
> +
> +			ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
> +			ring->ring_obj = NULL;
> +			r600_ring_init(rdev, ring, 4096);
> +		}
> +	}
> +
>  	rdev->ih.ring_obj = NULL;
>  	r600_ih_ring_init(rdev, 64 * 1024);
>  
> @@ -2325,6 +2370,7 @@ void cayman_fini(struct radeon_device *rdev)
>  	radeon_irq_kms_fini(rdev);
>  	uvd_v1_0_fini(rdev);
>  	radeon_uvd_fini(rdev);
> +	radeon_vce_fini(rdev);
>  	cayman_pcie_gart_fini(rdev);
>  	r600_vram_scratch_fini(rdev);
>  	radeon_gem_fini(rdev);
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 38603b1..59480fd 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -1719,6 +1719,7 @@ struct radeon_vce {
>  	struct drm_file		*filp[RADEON_MAX_VCE_HANDLES];
>  	unsigned		img_size[RADEON_MAX_VCE_HANDLES];
>  	struct delayed_work	idle_work;
> +	uint32_t		keyselect;
>  };
>  
>  int radeon_vce_init(struct radeon_device *rdev);
> diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
> index b37b22bd..eaf909e 100644
> --- a/drivers/gpu/drm/radeon/radeon_asic.c
> +++ b/drivers/gpu/drm/radeon/radeon_asic.c
> @@ -1761,6 +1761,19 @@ static struct radeon_asic cayman_asic = {
>  	},
>  };
>  
> +static struct radeon_asic_ring trinity_vce_ring = {
> +	.ib_execute = &radeon_vce_ib_execute,
> +	.emit_fence = &radeon_vce_fence_emit,
> +	.emit_semaphore = &radeon_vce_semaphore_emit,
> +	.cs_parse = &radeon_vce_cs_parse,
> +	.ring_test = &radeon_vce_ring_test,
> +	.ib_test = &radeon_vce_ib_test,
> +	.is_lockup = &radeon_ring_test_lockup,
> +	.get_rptr = &vce_v1_0_get_rptr,
> +	.get_wptr = &vce_v1_0_get_wptr,
> +	.set_wptr = &vce_v1_0_set_wptr,
> +};
> +
>  static struct radeon_asic trinity_asic = {
>  	.init = &cayman_init,
>  	.fini = &cayman_fini,
> @@ -1794,6 +1807,8 @@ static struct radeon_asic trinity_asic = {
>  		[R600_RING_TYPE_DMA_INDEX] = &cayman_dma_ring,
>  		[CAYMAN_RING_TYPE_DMA1_INDEX] = &cayman_dma_ring,
>  		[R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring,
> +		[TN_RING_TYPE_VCE1_INDEX] = &trinity_vce_ring,
> +		[TN_RING_TYPE_VCE2_INDEX] = &trinity_vce_ring,
>  	},
>  	.irq = {
>  		.set = &evergreen_irq_set,
> @@ -1930,6 +1945,8 @@ static struct radeon_asic si_asic = {
>  		[R600_RING_TYPE_DMA_INDEX] = &si_dma_ring,
>  		[CAYMAN_RING_TYPE_DMA1_INDEX] = &si_dma_ring,
>  		[R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring,
> +		[TN_RING_TYPE_VCE1_INDEX] = &trinity_vce_ring,
> +		[TN_RING_TYPE_VCE2_INDEX] = &trinity_vce_ring,
>  	},
>  	.irq = {
>  		.set = &si_irq_set,
> diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
> index 629f291..e0aa332 100644
> --- a/drivers/gpu/drm/radeon/radeon_asic.h
> +++ b/drivers/gpu/drm/radeon/radeon_asic.h
> @@ -972,6 +972,9 @@ uint32_t vce_v1_0_get_wptr(struct radeon_device *rdev,
>  			   struct radeon_ring *ring);
>  void vce_v1_0_set_wptr(struct radeon_device *rdev,
>  		       struct radeon_ring *ring);
> +int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data);
> +unsigned vce_v1_0_bo_size(struct radeon_device *rdev);
> +int vce_v1_0_resume(struct radeon_device *rdev);
>  int vce_v1_0_init(struct radeon_device *rdev);
>  int vce_v1_0_start(struct radeon_device *rdev);
>  
> diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c
> index aa5d9ba..574f62b 100644
> --- a/drivers/gpu/drm/radeon/radeon_vce.c
> +++ b/drivers/gpu/drm/radeon/radeon_vce.c
> @@ -38,8 +38,10 @@
>  #define VCE_IDLE_TIMEOUT_MS	1000
>  
>  /* Firmware Names */
> +#define FIRMWARE_TAHITI	"radeon/TAHITI_vce.bin"
>  #define FIRMWARE_BONAIRE	"radeon/BONAIRE_vce.bin"
>  
> +MODULE_FIRMWARE(FIRMWARE_TAHITI);
>  MODULE_FIRMWARE(FIRMWARE_BONAIRE);
>  
>  static void radeon_vce_idle_work_handler(struct work_struct *work);
> @@ -63,6 +65,14 @@ int radeon_vce_init(struct radeon_device *rdev)
>  	INIT_DELAYED_WORK(&rdev->vce.idle_work, radeon_vce_idle_work_handler);
>  
>  	switch (rdev->family) {
> +	case CHIP_TAHITI:
> +	case CHIP_PITCAIRN:
> +	case CHIP_VERDE:
> +	case CHIP_OLAND:
> +	case CHIP_ARUBA:
> +		fw_name = FIRMWARE_TAHITI;
> +		break;
> +
>  	case CHIP_BONAIRE:
>  	case CHIP_KAVERI:
>  	case CHIP_KABINI:
> @@ -125,7 +135,10 @@ int radeon_vce_init(struct radeon_device *rdev)
>  
>  	/* allocate firmware, stack and heap BO */
>  
> -	size = vce_v2_0_bo_size(rdev);
> +	if (rdev->family < CHIP_BONAIRE)
> +		size = vce_v1_0_bo_size(rdev);
> +	else
> +		size = vce_v2_0_bo_size(rdev);
>  	r = radeon_bo_create(rdev, size, PAGE_SIZE, true,
>  			     RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL,
>  			     &rdev->vce.vcpu_bo);
> @@ -226,13 +239,17 @@ int radeon_vce_resume(struct radeon_device *rdev)
>  		return r;
>  	}
>  
> -	memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size);
> +	memset(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo));
> +	if (rdev->family < CHIP_BONAIRE)
> +		r = vce_v1_0_load_fw(rdev, cpu_addr);
> +	else
> +		memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size);
>  
>  	radeon_bo_kunmap(rdev->vce.vcpu_bo);
>  
>  	radeon_bo_unreserve(rdev->vce.vcpu_bo);
>  
> -	return 0;
> +	return r;
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
> index 6ff78bc..5ca08e3 100644
> --- a/drivers/gpu/drm/radeon/si.c
> +++ b/drivers/gpu/drm/radeon/si.c
> @@ -6907,6 +6907,22 @@ static int si_startup(struct radeon_device *rdev)
>  			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
>  	}
>  
> +	r = radeon_vce_resume(rdev);
> +	if (!r) {
> +		r = vce_v1_0_resume(rdev);
> +		if (!r)
> +			r = radeon_fence_driver_start_ring(rdev,
> +							   TN_RING_TYPE_VCE1_INDEX);
> +		if (!r)
> +			r = radeon_fence_driver_start_ring(rdev,
> +							   TN_RING_TYPE_VCE2_INDEX);
> +	}
> +	if (r) {
> +		dev_err(rdev->dev, "VCE init error (%d).\n", r);
> +		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
> +		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
> +	}
> +
>  	/* Enable IRQ */
>  	if (!rdev->irq.installed) {
>  		r = radeon_irq_kms_init(rdev);
> @@ -6975,6 +6991,23 @@ static int si_startup(struct radeon_device *rdev)
>  		}
>  	}
>  
> +	r = -ENOENT;
> +
> +	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
> +	if (ring->ring_size)
> +		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
> +				     VCE_CMD_NO_OP);
> +
> +	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
> +	if (ring->ring_size)
> +		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
> +				     VCE_CMD_NO_OP);
> +
> +	if (!r)
> +		r = vce_v1_0_init(rdev);
> +	else if (r != -ENOENT)
> +		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
> +
>  	r = radeon_ib_pool_init(rdev);
>  	if (r) {
>  		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
> @@ -7033,6 +7066,7 @@ int si_suspend(struct radeon_device *rdev)
>  	if (rdev->has_uvd) {
>  		uvd_v1_0_fini(rdev);
>  		radeon_uvd_suspend(rdev);
> +		radeon_vce_suspend(rdev);
>  	}
>  	si_fini_pg(rdev);
>  	si_fini_cg(rdev);
> @@ -7140,6 +7174,17 @@ int si_init(struct radeon_device *rdev)
>  		}
>  	}
>  
> +	r = radeon_vce_init(rdev);
> +	if (!r) {
> +		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
> +		ring->ring_obj = NULL;
> +		r600_ring_init(rdev, ring, 4096);
> +
> +		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
> +		ring->ring_obj = NULL;
> +		r600_ring_init(rdev, ring, 4096);
> +	}
> +
>  	rdev->ih.ring_obj = NULL;
>  	r600_ih_ring_init(rdev, 64 * 1024);
>  
> @@ -7191,6 +7236,7 @@ void si_fini(struct radeon_device *rdev)
>  	if (rdev->has_uvd) {
>  		uvd_v1_0_fini(rdev);
>  		radeon_uvd_fini(rdev);
> +		radeon_vce_fini(rdev);
>  	}
>  	si_pcie_gart_fini(rdev);
>  	r600_vram_scratch_fini(rdev);
> diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
> index 1630440..4823a07 100644
> --- a/drivers/gpu/drm/radeon/sid.h
> +++ b/drivers/gpu/drm/radeon/sid.h
> @@ -1879,6 +1879,7 @@
>  #define VCE_VCPU_CACHE_SIZE1				0x20030
>  #define VCE_VCPU_CACHE_OFFSET2				0x20034
>  #define VCE_VCPU_CACHE_SIZE2				0x20038
> +#define VCE_VCPU_SCRATCH7				0x200dc
>  #define VCE_SOFT_RESET					0x20120
>  #define 	VCE_ECPU_SOFT_RESET			(1 << 0)
>  #define 	VCE_FME_SOFT_RESET			(1 << 2)
> diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c
> index b44d9c8..81dd39b 100644
> --- a/drivers/gpu/drm/radeon/vce_v1_0.c
> +++ b/drivers/gpu/drm/radeon/vce_v1_0.c
> @@ -31,6 +31,23 @@
>  #include "radeon_asic.h"
>  #include "sid.h"
>  
> +#define VCE_V1_0_FW_SIZE	(256 * 1024)
> +#define VCE_V1_0_STACK_SIZE	(64 * 1024)
> +#define VCE_V1_0_DATA_SIZE	(7808 * (RADEON_MAX_VCE_HANDLES + 1))
> +
> +struct vce_v1_0_fw_signature
> +{
> +	int32_t off;
> +	uint32_t len;
> +	int32_t num;
> +	struct {
> +		uint32_t chip_id;
> +		uint32_t keyselect;
> +		uint32_t nonce[4];
> +		uint32_t sigval[4];
> +	} val[8];
> +};
> +
>  /**
>   * vce_v1_0_get_rptr - get read pointer
>   *
> @@ -82,6 +99,129 @@ void vce_v1_0_set_wptr(struct radeon_device *rdev,
>  		WREG32(VCE_RB_WPTR2, ring->wptr);
>  }
>  
> +int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data)
> +{
> +	struct vce_v1_0_fw_signature *sign = (void*)rdev->vce_fw->data;
> +	uint32_t chip_id;
> +	int i;
> +
> +	switch (rdev->family) {
> +	case CHIP_TAHITI:
> +		chip_id = 0x01000014;
> +		break;
> +	case CHIP_VERDE:
> +		chip_id = 0x01000015;
> +		break;
> +	case CHIP_PITCAIRN:
> +	case CHIP_OLAND:
> +		chip_id = 0x01000016;
> +		break;
> +	case CHIP_ARUBA:
> +		chip_id = 0x01000017;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	for (i = 0; i < sign->num; ++i) {
> +		if (sign->val[i].chip_id == chip_id)
> +			break;
> +	}
> +
> +	if (i == sign->num)
> +		return -EINVAL;
> +
> +	data += (256 - 64) / 4;
> +	data[0] = sign->val[i].nonce[0];
> +	data[1] = sign->val[i].nonce[1];
> +	data[2] = sign->val[i].nonce[2];
> +	data[3] = sign->val[i].nonce[3];
> +	data[4] = sign->len + 64;
> +
> +	memset(&data[5], 0, 44);
> +	memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign));
> +
> +	data += data[4] / 4;
> +	data[0] = sign->val[i].sigval[0];
> +	data[1] = sign->val[i].sigval[1];
> +	data[2] = sign->val[i].sigval[2];
> +	data[3] = sign->val[i].sigval[3];
> +
> +	rdev->vce.keyselect = sign->val[i].keyselect;
> +
> +	return 0;
> +}
> +
> +unsigned vce_v1_0_bo_size(struct radeon_device *rdev)
> +{
> +	WARN_ON(VCE_V1_0_FW_SIZE < rdev->vce_fw->size);
> +	return VCE_V1_0_FW_SIZE + VCE_V1_0_STACK_SIZE + VCE_V1_0_DATA_SIZE;
> +}
> +
> +int vce_v1_0_resume(struct radeon_device *rdev)
> +{
> +	uint64_t addr = rdev->vce.gpu_addr;
> +	uint32_t size;
> +	int i;
> +
> +	WREG32_P(VCE_CLOCK_GATING_A, 0, ~(1 << 16));
> +	WREG32_P(VCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
> +	WREG32_P(VCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
> +	WREG32(VCE_CLOCK_GATING_B, 0);
> +
> +	WREG32_P(VCE_LMI_FW_PERIODIC_CTRL, 0x4, ~0x4);
> +
> +	WREG32(VCE_LMI_CTRL, 0x00398000);
> +	WREG32_P(VCE_LMI_CACHE_CTRL, 0x0, ~0x1);
> +	WREG32(VCE_LMI_SWAP_CNTL, 0);
> +	WREG32(VCE_LMI_SWAP_CNTL1, 0);
> +	WREG32(VCE_LMI_VM_CTRL, 0);
> +
> +	WREG32(VCE_VCPU_SCRATCH7, RADEON_MAX_VCE_HANDLES);
> +
> +	addr += 256;
> +	size = VCE_V1_0_FW_SIZE;
> +	WREG32(VCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff);
> +	WREG32(VCE_VCPU_CACHE_SIZE0, size);
> +
> +	addr += size;
> +	size = VCE_V1_0_STACK_SIZE;
> +	WREG32(VCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff);
> +	WREG32(VCE_VCPU_CACHE_SIZE1, size);
> +
> +	addr += size;
> +	size = VCE_V1_0_DATA_SIZE;
> +	WREG32(VCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff);
> +	WREG32(VCE_VCPU_CACHE_SIZE2, size);
> +
> +	WREG32_P(VCE_LMI_CTRL2, 0x0, ~0x100);
> +
> +	WREG32(VCE_LMI_FW_START_KEYSEL, rdev->vce.keyselect);
> +
> +	for (i = 0; i < 10; ++i) {
> +		mdelay(10);
> +		if (RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_DONE)
> +			break;
> +	}
> +
> +	if (i == 10)
> +		return -ETIMEDOUT;
> +
> +	if (!(RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_PASS))
> +		return -EINVAL;
> +
> +	for (i = 0; i < 10; ++i) {
> +		mdelay(10);
> +		if (!(RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_BUSY))
> +			break;
> +	}
> +
> +	if (i == 10)
> +		return -ETIMEDOUT;
> +
> +	return 0;
> +}
> +
>  /**
>   * vce_v1_0_start - start VCE block
>   *
Michel Dänzer Aug. 13, 2015, 6:18 a.m. UTC | #3
On 13.08.2015 15:03, Lucas Stach wrote:
> Hi Christian,
> 
> this commit is causing a boot regression with v4.2-rcX on my Richland
> APU (CHIP_ARUBA) based laptop. I didn't have time yet to track down
> where exactly it is going wrong, but I bisected it down to this single
> commit.
> 
> I don't have the VCE firmware installed on this system, so from a quick
> look at the code I would expect it to drop out pretty early and just
> leave the VCE unconfigured, but otherwise keep things working as before.
> This is unfortunately not the case.

If the radeon driver is built into the kernel (or loaded from the
initrd?), the attempt to load the firmware might take a long time to
time out.

Please provide more information about the symptoms, e.g. any dmesg
output etc.
Lucas Stach Aug. 13, 2015, 6:36 a.m. UTC | #4
Am Donnerstag, den 13.08.2015, 15:18 +0900 schrieb Michel Dänzer:
> On 13.08.2015 15:03, Lucas Stach wrote:
> > Hi Christian,
> > 
> > this commit is causing a boot regression with v4.2-rcX on my Richland
> > APU (CHIP_ARUBA) based laptop. I didn't have time yet to track down
> > where exactly it is going wrong, but I bisected it down to this single
> > commit.
> > 
> > I don't have the VCE firmware installed on this system, so from a quick
> > look at the code I would expect it to drop out pretty early and just
> > leave the VCE unconfigured, but otherwise keep things working as before.
> > This is unfortunately not the case.
> 
> If the radeon driver is built into the kernel (or loaded from the
> initrd?), the attempt to load the firmware might take a long time to
> time out.
> 
Gah. Thanks, I was too impatient to wait for the firmware loading to
time out. In fact this is a standard Fedora kernel config, so radeon is
a module, but it is built into the initrd.

So it's not really readeons fault, but one more iteration of the fact
that anything involving firmware loading is just horribly inconvenient.
Especially if it's firmware for an optional component.

Regards,
Lucas
diff mbox

Patch

diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 32f5f03..faffca3 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -2040,6 +2040,25 @@  static int cayman_startup(struct radeon_device *rdev)
 	if (r)
 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
 
+	if (rdev->family == CHIP_ARUBA) {
+		r = radeon_vce_resume(rdev);
+		if (!r)
+			r = vce_v1_0_resume(rdev);
+
+		if (!r)
+			r = radeon_fence_driver_start_ring(rdev,
+							   TN_RING_TYPE_VCE1_INDEX);
+		if (!r)
+			r = radeon_fence_driver_start_ring(rdev,
+							   TN_RING_TYPE_VCE2_INDEX);
+
+		if (r) {
+			dev_err(rdev->dev, "VCE init error (%d).\n", r);
+			rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
+			rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
+		}
+	}
+
 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
 	if (r) {
 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
@@ -2117,6 +2136,19 @@  static int cayman_startup(struct radeon_device *rdev)
 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
 	}
 
+	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
+	if (ring->ring_size)
+		r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0);
+
+	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
+	if (ring->ring_size)
+		r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0);
+
+	if (!r)
+		r = vce_v1_0_init(rdev);
+	else if (r != -ENOENT)
+		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
+
 	r = radeon_ib_pool_init(rdev);
 	if (r) {
 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -2272,6 +2304,19 @@  int cayman_init(struct radeon_device *rdev)
 		r600_ring_init(rdev, ring, 4096);
 	}
 
+	if (rdev->family == CHIP_ARUBA) {
+		r = radeon_vce_init(rdev);
+		if (!r) {
+			ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
+			ring->ring_obj = NULL;
+			r600_ring_init(rdev, ring, 4096);
+
+			ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
+			ring->ring_obj = NULL;
+			r600_ring_init(rdev, ring, 4096);
+		}
+	}
+
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -2325,6 +2370,7 @@  void cayman_fini(struct radeon_device *rdev)
 	radeon_irq_kms_fini(rdev);
 	uvd_v1_0_fini(rdev);
 	radeon_uvd_fini(rdev);
+	radeon_vce_fini(rdev);
 	cayman_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 38603b1..59480fd 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1719,6 +1719,7 @@  struct radeon_vce {
 	struct drm_file		*filp[RADEON_MAX_VCE_HANDLES];
 	unsigned		img_size[RADEON_MAX_VCE_HANDLES];
 	struct delayed_work	idle_work;
+	uint32_t		keyselect;
 };
 
 int radeon_vce_init(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index b37b22bd..eaf909e 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -1761,6 +1761,19 @@  static struct radeon_asic cayman_asic = {
 	},
 };
 
+static struct radeon_asic_ring trinity_vce_ring = {
+	.ib_execute = &radeon_vce_ib_execute,
+	.emit_fence = &radeon_vce_fence_emit,
+	.emit_semaphore = &radeon_vce_semaphore_emit,
+	.cs_parse = &radeon_vce_cs_parse,
+	.ring_test = &radeon_vce_ring_test,
+	.ib_test = &radeon_vce_ib_test,
+	.is_lockup = &radeon_ring_test_lockup,
+	.get_rptr = &vce_v1_0_get_rptr,
+	.get_wptr = &vce_v1_0_get_wptr,
+	.set_wptr = &vce_v1_0_set_wptr,
+};
+
 static struct radeon_asic trinity_asic = {
 	.init = &cayman_init,
 	.fini = &cayman_fini,
@@ -1794,6 +1807,8 @@  static struct radeon_asic trinity_asic = {
 		[R600_RING_TYPE_DMA_INDEX] = &cayman_dma_ring,
 		[CAYMAN_RING_TYPE_DMA1_INDEX] = &cayman_dma_ring,
 		[R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring,
+		[TN_RING_TYPE_VCE1_INDEX] = &trinity_vce_ring,
+		[TN_RING_TYPE_VCE2_INDEX] = &trinity_vce_ring,
 	},
 	.irq = {
 		.set = &evergreen_irq_set,
@@ -1930,6 +1945,8 @@  static struct radeon_asic si_asic = {
 		[R600_RING_TYPE_DMA_INDEX] = &si_dma_ring,
 		[CAYMAN_RING_TYPE_DMA1_INDEX] = &si_dma_ring,
 		[R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring,
+		[TN_RING_TYPE_VCE1_INDEX] = &trinity_vce_ring,
+		[TN_RING_TYPE_VCE2_INDEX] = &trinity_vce_ring,
 	},
 	.irq = {
 		.set = &si_irq_set,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 629f291..e0aa332 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -972,6 +972,9 @@  uint32_t vce_v1_0_get_wptr(struct radeon_device *rdev,
 			   struct radeon_ring *ring);
 void vce_v1_0_set_wptr(struct radeon_device *rdev,
 		       struct radeon_ring *ring);
+int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data);
+unsigned vce_v1_0_bo_size(struct radeon_device *rdev);
+int vce_v1_0_resume(struct radeon_device *rdev);
 int vce_v1_0_init(struct radeon_device *rdev);
 int vce_v1_0_start(struct radeon_device *rdev);
 
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c
index aa5d9ba..574f62b 100644
--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c
@@ -38,8 +38,10 @@ 
 #define VCE_IDLE_TIMEOUT_MS	1000
 
 /* Firmware Names */
+#define FIRMWARE_TAHITI	"radeon/TAHITI_vce.bin"
 #define FIRMWARE_BONAIRE	"radeon/BONAIRE_vce.bin"
 
+MODULE_FIRMWARE(FIRMWARE_TAHITI);
 MODULE_FIRMWARE(FIRMWARE_BONAIRE);
 
 static void radeon_vce_idle_work_handler(struct work_struct *work);
@@ -63,6 +65,14 @@  int radeon_vce_init(struct radeon_device *rdev)
 	INIT_DELAYED_WORK(&rdev->vce.idle_work, radeon_vce_idle_work_handler);
 
 	switch (rdev->family) {
+	case CHIP_TAHITI:
+	case CHIP_PITCAIRN:
+	case CHIP_VERDE:
+	case CHIP_OLAND:
+	case CHIP_ARUBA:
+		fw_name = FIRMWARE_TAHITI;
+		break;
+
 	case CHIP_BONAIRE:
 	case CHIP_KAVERI:
 	case CHIP_KABINI:
@@ -125,7 +135,10 @@  int radeon_vce_init(struct radeon_device *rdev)
 
 	/* allocate firmware, stack and heap BO */
 
-	size = vce_v2_0_bo_size(rdev);
+	if (rdev->family < CHIP_BONAIRE)
+		size = vce_v1_0_bo_size(rdev);
+	else
+		size = vce_v2_0_bo_size(rdev);
 	r = radeon_bo_create(rdev, size, PAGE_SIZE, true,
 			     RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL,
 			     &rdev->vce.vcpu_bo);
@@ -226,13 +239,17 @@  int radeon_vce_resume(struct radeon_device *rdev)
 		return r;
 	}
 
-	memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size);
+	memset(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo));
+	if (rdev->family < CHIP_BONAIRE)
+		r = vce_v1_0_load_fw(rdev, cpu_addr);
+	else
+		memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size);
 
 	radeon_bo_kunmap(rdev->vce.vcpu_bo);
 
 	radeon_bo_unreserve(rdev->vce.vcpu_bo);
 
-	return 0;
+	return r;
 }
 
 /**
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 6ff78bc..5ca08e3 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -6907,6 +6907,22 @@  static int si_startup(struct radeon_device *rdev)
 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
 	}
 
+	r = radeon_vce_resume(rdev);
+	if (!r) {
+		r = vce_v1_0_resume(rdev);
+		if (!r)
+			r = radeon_fence_driver_start_ring(rdev,
+							   TN_RING_TYPE_VCE1_INDEX);
+		if (!r)
+			r = radeon_fence_driver_start_ring(rdev,
+							   TN_RING_TYPE_VCE2_INDEX);
+	}
+	if (r) {
+		dev_err(rdev->dev, "VCE init error (%d).\n", r);
+		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
+		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
+	}
+
 	/* Enable IRQ */
 	if (!rdev->irq.installed) {
 		r = radeon_irq_kms_init(rdev);
@@ -6975,6 +6991,23 @@  static int si_startup(struct radeon_device *rdev)
 		}
 	}
 
+	r = -ENOENT;
+
+	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
+	if (ring->ring_size)
+		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
+				     VCE_CMD_NO_OP);
+
+	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
+	if (ring->ring_size)
+		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
+				     VCE_CMD_NO_OP);
+
+	if (!r)
+		r = vce_v1_0_init(rdev);
+	else if (r != -ENOENT)
+		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
+
 	r = radeon_ib_pool_init(rdev);
 	if (r) {
 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -7033,6 +7066,7 @@  int si_suspend(struct radeon_device *rdev)
 	if (rdev->has_uvd) {
 		uvd_v1_0_fini(rdev);
 		radeon_uvd_suspend(rdev);
+		radeon_vce_suspend(rdev);
 	}
 	si_fini_pg(rdev);
 	si_fini_cg(rdev);
@@ -7140,6 +7174,17 @@  int si_init(struct radeon_device *rdev)
 		}
 	}
 
+	r = radeon_vce_init(rdev);
+	if (!r) {
+		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
+		ring->ring_obj = NULL;
+		r600_ring_init(rdev, ring, 4096);
+
+		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
+		ring->ring_obj = NULL;
+		r600_ring_init(rdev, ring, 4096);
+	}
+
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -7191,6 +7236,7 @@  void si_fini(struct radeon_device *rdev)
 	if (rdev->has_uvd) {
 		uvd_v1_0_fini(rdev);
 		radeon_uvd_fini(rdev);
+		radeon_vce_fini(rdev);
 	}
 	si_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
index 1630440..4823a07 100644
--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h
@@ -1879,6 +1879,7 @@ 
 #define VCE_VCPU_CACHE_SIZE1				0x20030
 #define VCE_VCPU_CACHE_OFFSET2				0x20034
 #define VCE_VCPU_CACHE_SIZE2				0x20038
+#define VCE_VCPU_SCRATCH7				0x200dc
 #define VCE_SOFT_RESET					0x20120
 #define 	VCE_ECPU_SOFT_RESET			(1 << 0)
 #define 	VCE_FME_SOFT_RESET			(1 << 2)
diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c
index b44d9c8..81dd39b 100644
--- a/drivers/gpu/drm/radeon/vce_v1_0.c
+++ b/drivers/gpu/drm/radeon/vce_v1_0.c
@@ -31,6 +31,23 @@ 
 #include "radeon_asic.h"
 #include "sid.h"
 
+#define VCE_V1_0_FW_SIZE	(256 * 1024)
+#define VCE_V1_0_STACK_SIZE	(64 * 1024)
+#define VCE_V1_0_DATA_SIZE	(7808 * (RADEON_MAX_VCE_HANDLES + 1))
+
+struct vce_v1_0_fw_signature
+{
+	int32_t off;
+	uint32_t len;
+	int32_t num;
+	struct {
+		uint32_t chip_id;
+		uint32_t keyselect;
+		uint32_t nonce[4];
+		uint32_t sigval[4];
+	} val[8];
+};
+
 /**
  * vce_v1_0_get_rptr - get read pointer
  *
@@ -82,6 +99,129 @@  void vce_v1_0_set_wptr(struct radeon_device *rdev,
 		WREG32(VCE_RB_WPTR2, ring->wptr);
 }
 
+int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data)
+{
+	struct vce_v1_0_fw_signature *sign = (void*)rdev->vce_fw->data;
+	uint32_t chip_id;
+	int i;
+
+	switch (rdev->family) {
+	case CHIP_TAHITI:
+		chip_id = 0x01000014;
+		break;
+	case CHIP_VERDE:
+		chip_id = 0x01000015;
+		break;
+	case CHIP_PITCAIRN:
+	case CHIP_OLAND:
+		chip_id = 0x01000016;
+		break;
+	case CHIP_ARUBA:
+		chip_id = 0x01000017;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	for (i = 0; i < sign->num; ++i) {
+		if (sign->val[i].chip_id == chip_id)
+			break;
+	}
+
+	if (i == sign->num)
+		return -EINVAL;
+
+	data += (256 - 64) / 4;
+	data[0] = sign->val[i].nonce[0];
+	data[1] = sign->val[i].nonce[1];
+	data[2] = sign->val[i].nonce[2];
+	data[3] = sign->val[i].nonce[3];
+	data[4] = sign->len + 64;
+
+	memset(&data[5], 0, 44);
+	memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign));
+
+	data += data[4] / 4;
+	data[0] = sign->val[i].sigval[0];
+	data[1] = sign->val[i].sigval[1];
+	data[2] = sign->val[i].sigval[2];
+	data[3] = sign->val[i].sigval[3];
+
+	rdev->vce.keyselect = sign->val[i].keyselect;
+
+	return 0;
+}
+
+unsigned vce_v1_0_bo_size(struct radeon_device *rdev)
+{
+	WARN_ON(VCE_V1_0_FW_SIZE < rdev->vce_fw->size);
+	return VCE_V1_0_FW_SIZE + VCE_V1_0_STACK_SIZE + VCE_V1_0_DATA_SIZE;
+}
+
+int vce_v1_0_resume(struct radeon_device *rdev)
+{
+	uint64_t addr = rdev->vce.gpu_addr;
+	uint32_t size;
+	int i;
+
+	WREG32_P(VCE_CLOCK_GATING_A, 0, ~(1 << 16));
+	WREG32_P(VCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
+	WREG32_P(VCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
+	WREG32(VCE_CLOCK_GATING_B, 0);
+
+	WREG32_P(VCE_LMI_FW_PERIODIC_CTRL, 0x4, ~0x4);
+
+	WREG32(VCE_LMI_CTRL, 0x00398000);
+	WREG32_P(VCE_LMI_CACHE_CTRL, 0x0, ~0x1);
+	WREG32(VCE_LMI_SWAP_CNTL, 0);
+	WREG32(VCE_LMI_SWAP_CNTL1, 0);
+	WREG32(VCE_LMI_VM_CTRL, 0);
+
+	WREG32(VCE_VCPU_SCRATCH7, RADEON_MAX_VCE_HANDLES);
+
+	addr += 256;
+	size = VCE_V1_0_FW_SIZE;
+	WREG32(VCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff);
+	WREG32(VCE_VCPU_CACHE_SIZE0, size);
+
+	addr += size;
+	size = VCE_V1_0_STACK_SIZE;
+	WREG32(VCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff);
+	WREG32(VCE_VCPU_CACHE_SIZE1, size);
+
+	addr += size;
+	size = VCE_V1_0_DATA_SIZE;
+	WREG32(VCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff);
+	WREG32(VCE_VCPU_CACHE_SIZE2, size);
+
+	WREG32_P(VCE_LMI_CTRL2, 0x0, ~0x100);
+
+	WREG32(VCE_LMI_FW_START_KEYSEL, rdev->vce.keyselect);
+
+	for (i = 0; i < 10; ++i) {
+		mdelay(10);
+		if (RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_DONE)
+			break;
+	}
+
+	if (i == 10)
+		return -ETIMEDOUT;
+
+	if (!(RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_PASS))
+		return -EINVAL;
+
+	for (i = 0; i < 10; ++i) {
+		mdelay(10);
+		if (!(RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_BUSY))
+			break;
+	}
+
+	if (i == 10)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
 /**
  * vce_v1_0_start - start VCE block
  *