diff mbox series

[v2,04/15] drm/panthor: Add the device logical block

Message ID 20230809165330.2451699-5-boris.brezillon@collabora.com (mailing list archive)
State New, archived
Headers show
Series drm: Add a driver for FW-based Mali GPUs | expand

Commit Message

Boris Brezillon Aug. 9, 2023, 4:53 p.m. UTC
The panthor driver is designed in a modular way, where each logical
block is dealing with a specific HW-block or software feature. In order
for those blocks to communicate with each other, we need a central
panthor_device collecting all the blocks, and exposing some common
features, like interrupt handling, power management, reset, ...

This what this panthor_device logical block is about.

v2:
- Rename the driver (pancsf -> panthor)
- Change the license (GPL2 -> MIT + GPL2)
- Split the driver addition commit
- Add devfreq/PM support
- Use drm_dev_{unplug,enter,exit}() to provide safe device removal

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
 drivers/gpu/drm/panthor/panthor_device.c | 479 +++++++++++++++++++++++
 drivers/gpu/drm/panthor/panthor_device.h | 354 +++++++++++++++++
 2 files changed, 833 insertions(+)
 create mode 100644 drivers/gpu/drm/panthor/panthor_device.c
 create mode 100644 drivers/gpu/drm/panthor/panthor_device.h

Comments

Steven Price Aug. 11, 2023, 3:47 p.m. UTC | #1
On 09/08/2023 17:53, Boris Brezillon wrote:
> The panthor driver is designed in a modular way, where each logical
> block is dealing with a specific HW-block or software feature. In order
> for those blocks to communicate with each other, we need a central
> panthor_device collecting all the blocks, and exposing some common
> features, like interrupt handling, power management, reset, ...
> 
> This what this panthor_device logical block is about.
> 
> v2:
> - Rename the driver (pancsf -> panthor)
> - Change the license (GPL2 -> MIT + GPL2)
> - Split the driver addition commit
> - Add devfreq/PM support
> - Use drm_dev_{unplug,enter,exit}() to provide safe device removal
> 
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> ---
>  drivers/gpu/drm/panthor/panthor_device.c | 479 +++++++++++++++++++++++
>  drivers/gpu/drm/panthor/panthor_device.h | 354 +++++++++++++++++
>  2 files changed, 833 insertions(+)
>  create mode 100644 drivers/gpu/drm/panthor/panthor_device.c
>  create mode 100644 drivers/gpu/drm/panthor/panthor_device.h
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
> new file mode 100644
> index 000000000000..15f102116fa0
> --- /dev/null
> +++ b/drivers/gpu/drm/panthor/panthor_device.c
> @@ -0,0 +1,479 @@
> +// SPDX-License-Identifier: GPL-2.0 or MIT
> +/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
> +/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
> +/* Copyright 2023 Collabora ltd. */
> +
> +#include <linux/clk.h>
> +#include <linux/reset.h>
> +#include <linux/platform_device.h>
> +#include <linux/pm_domain.h>
> +#include <linux/pm_runtime.h>
> +#include <linux/regulator/consumer.h>
> +
> +#include <drm/drm_drv.h>
> +#include <drm/drm_managed.h>
> +
> +#include "panthor_sched.h"
> +#include "panthor_device.h"
> +#include "panthor_devfreq.h"
> +#include "panthor_gpu.h"
> +#include "panthor_fw.h"
> +#include "panthor_mmu.h"
> +#include "panthor_regs.h"
> +
> +static int panthor_clk_init(struct panthor_device *ptdev)
> +{
> +	ptdev->clks.core = devm_clk_get(ptdev->base.dev, NULL);
> +	if (IS_ERR(ptdev->clks.core)) {
> +		drm_err(&ptdev->base, "get 'core' clock failed %ld\n",
> +			PTR_ERR(ptdev->clks.core));

I suspect it would be a good idea to use dev_err_probe() here (and
below) as I believe devm_clk_get can return -EPROBE_DEFER.

> +		return PTR_ERR(ptdev->clks.core);
> +	}
> +
> +	ptdev->clks.stacks = devm_clk_get_optional(ptdev->base.dev, "stacks");
> +	if (IS_ERR(ptdev->clks.stacks)) {
> +		drm_err(&ptdev->base, "get 'stacks' clock failed %ld\n",
> +			PTR_ERR(ptdev->clks.stacks));
> +		return PTR_ERR(ptdev->clks.stacks);
> +	}
> +
> +	ptdev->clks.coregroup = devm_clk_get_optional(ptdev->base.dev, "coregroup");
> +	if (IS_ERR(ptdev->clks.coregroup)) {
> +		drm_err(&ptdev->base, "get 'coregroup' clock failed %ld\n",
> +			PTR_ERR(ptdev->clks.coregroup));
> +		return PTR_ERR(ptdev->clks.coregroup);
> +	}
> +
> +	drm_info(&ptdev->base, "clock rate = %lu\n", clk_get_rate(ptdev->clks.core));
> +	return 0;
> +}
> +
> +void panthor_device_unplug(struct panthor_device *ptdev)
> +{
> +	/* FIXME: This is racy. */

Can we fix this? From a quick look it seems like a sequence like below
should avoid the race.

	if (!drm_dev_enter())
		/* Already unplugged */
		return;
	ptdev->base.unplugged = true;
	drm_dev_exit();

Although possibly that should be in the DRM core rather than open-coded
here.

> +	if (drm_dev_is_unplugged(&ptdev->base))
> +		return;
> +
> +	drm_WARN_ON(&ptdev->base, pm_runtime_get_sync(ptdev->base.dev) < 0);
> +
> +	/* Call drm_dev_unplug() so any access to HW block happening after
> +	 * that point get rejected.
> +	 */
> +	drm_dev_unplug(&ptdev->base);
> +
> +	/* Now, try to cleanly shutdown the GPU before the device resources
> +	 * get reclaimed.
> +	 */
> +	panthor_sched_unplug(ptdev);
> +	panthor_fw_unplug(ptdev);
> +	panthor_mmu_unplug(ptdev);
> +	panthor_gpu_unplug(ptdev);
> +
> +	pm_runtime_dont_use_autosuspend(ptdev->base.dev);
> +	pm_runtime_put_sync_suspend(ptdev->base.dev);
> +}
> +
> +static void panthor_device_reset_cleanup(struct drm_device *ddev, void *data)
> +{
> +	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
> +
> +	cancel_work_sync(&ptdev->reset.work);
> +	destroy_workqueue(ptdev->reset.wq);
> +}
> +
> +static void panthor_device_reset_work(struct work_struct *work)
> +{
> +	struct panthor_device *ptdev = container_of(work, struct panthor_device, reset.work);
> +	int ret, cookie;
> +
> +	if (!drm_dev_enter(&ptdev->base, &cookie))
> +		return;
> +
> +	panthor_sched_pre_reset(ptdev);
> +	panthor_fw_pre_reset(ptdev, true);
> +	panthor_mmu_pre_reset(ptdev);
> +	panthor_gpu_soft_reset(ptdev);
> +	panthor_gpu_l2_power_on(ptdev);
> +	panthor_mmu_post_reset(ptdev);
> +	ret = panthor_fw_post_reset(ptdev);
> +	if (ret)
> +		goto out;
> +
> +	atomic_set(&ptdev->reset.pending, 0);
> +	panthor_sched_post_reset(ptdev);
> +	drm_dev_exit(cookie);
> +
> +out:
> +	if (ret) {

This looks like a race condition too - is there a need for a
drm_dev_exit_and_unplug() function?

> +		panthor_device_unplug(ptdev);
> +		drm_err(&ptdev->base, "Failed to boot MCU after reset, making device unusable.");
> +	}
> +}
> +
> +static bool panthor_device_is_initialized(struct panthor_device *ptdev)
> +{
> +	return !!ptdev->scheduler;
> +}
> +
> +static void panthor_device_free_page(struct drm_device *ddev, void *data)
> +{
> +	free_page((unsigned long)data);
> +}
> +
> +int panthor_device_init(struct panthor_device *ptdev)
> +{
> +	struct resource *res;
> +	struct page *p;
> +	int ret;
> +
> +	ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT;
> +
> +	drmm_mutex_init(&ptdev->base, &ptdev->pm.lock);
> +	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
> +	p = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +	if (!p)
> +		return -ENOMEM;
> +
> +	ptdev->pm.dummy_latest_flush = page_address(p);
> +	ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_free_page,
> +				       ptdev->pm.dummy_latest_flush);
> +	if (ret)
> +		return ret;
> +
> +	/* Set the dummy page to the default LATEST_FLUSH value. This
> +	 * will be updated on the next suspend.
> +	 */
> +	*ptdev->pm.dummy_latest_flush = CSF_GPU_LATEST_FLUSH_ID_DEFAULT;

I see why this register default value was defined. Although I'm not sure
it has any benefit over just using zero... If the GPU is off when user
space reads the FLUSH_ID then the GPU's caches are definitely empty so
any flush ID is valid.

Interestingly looking at kbase it seems to use an initial value of 1
(POWER_DOWN_LATEST_FLUSH_VALUE). I guess zero is less ideal because
FLUSH_CACHE2 would then unconditionally do a flush.

> +
> +	INIT_WORK(&ptdev->reset.work, panthor_device_reset_work);
> +	ptdev->reset.wq = alloc_ordered_workqueue("panthor-reset-wq", 0);
> +	if (!ptdev->reset.wq)
> +		return -ENOMEM;
> +
> +	ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_reset_cleanup, NULL);
> +	if (ret)
> +		return ret;
> +
> +	ret = panthor_clk_init(ptdev);
> +	if (ret)
> +		return ret;
> +
> +	ret = panthor_devfreq_init(ptdev);
> +	if (ret)
> +		return ret;
> +
> +	ptdev->iomem = devm_platform_get_and_ioremap_resource(to_platform_device(ptdev->base.dev),
> +							      0, &res);
> +	if (IS_ERR(ptdev->iomem))
> +		return PTR_ERR(ptdev->iomem);
> +
> +	ptdev->phys_addr = res->start;
> +
> +	ret = devm_pm_runtime_enable(ptdev->base.dev);
> +	if (ret)
> +		return ret;
> +
> +	ret = pm_runtime_resume_and_get(ptdev->base.dev);
> +	if (ret)
> +		return ret;
> +
> +	ret = panthor_gpu_init(ptdev);
> +	if (ret)
> +		goto err_rpm_put;
> +
> +	ret = panthor_mmu_init(ptdev);
> +	if (ret)
> +		goto err_rpm_put;
> +
> +	ret = panthor_fw_init(ptdev);
> +	if (ret)
> +		goto err_rpm_put;
> +
> +	ret = panthor_sched_init(ptdev);
> +	if (ret)
> +		goto err_rpm_put;
> +
> +	/* ~3 frames */
> +	pm_runtime_set_autosuspend_delay(ptdev->base.dev, 50);
> +	pm_runtime_use_autosuspend(ptdev->base.dev);
> +	pm_runtime_put_autosuspend(ptdev->base.dev);
> +	return 0;
> +
> +err_rpm_put:
> +	pm_runtime_put_sync_suspend(ptdev->base.dev);
> +	return ret;
> +}
> +
> +#define PANTHOR_EXCEPTION(id) \
> +	[DRM_PANTHOR_EXCEPTION_ ## id] = { \
> +		.name = #id, \
> +	}
> +
> +struct panthor_exception_info {
> +	const char *name;
> +};
> +
> +static const struct panthor_exception_info panthor_exception_infos[] = {
> +	PANTHOR_EXCEPTION(OK),
> +	PANTHOR_EXCEPTION(TERMINATED),
> +	PANTHOR_EXCEPTION(KABOOM),
> +	PANTHOR_EXCEPTION(EUREKA),
> +	PANTHOR_EXCEPTION(ACTIVE),
> +	PANTHOR_EXCEPTION(CS_RES_TERM),
> +	PANTHOR_EXCEPTION(CS_CONFIG_FAULT),
> +	PANTHOR_EXCEPTION(CS_ENDPOINT_FAULT),
> +	PANTHOR_EXCEPTION(CS_BUS_FAULT),
> +	PANTHOR_EXCEPTION(CS_INSTR_INVALID),
> +	PANTHOR_EXCEPTION(CS_CALL_STACK_OVERFLOW),
> +	PANTHOR_EXCEPTION(CS_INHERIT_FAULT),
> +	PANTHOR_EXCEPTION(INSTR_INVALID_PC),
> +	PANTHOR_EXCEPTION(INSTR_INVALID_ENC),
> +	PANTHOR_EXCEPTION(INSTR_BARRIER_FAULT),
> +	PANTHOR_EXCEPTION(DATA_INVALID_FAULT),
> +	PANTHOR_EXCEPTION(TILE_RANGE_FAULT),
> +	PANTHOR_EXCEPTION(ADDR_RANGE_FAULT),
> +	PANTHOR_EXCEPTION(IMPRECISE_FAULT),
> +	PANTHOR_EXCEPTION(OOM),
> +	PANTHOR_EXCEPTION(CSF_FW_INTERNAL_ERROR),
> +	PANTHOR_EXCEPTION(CSF_RES_EVICTION_TIMEOUT),
> +	PANTHOR_EXCEPTION(GPU_BUS_FAULT),
> +	PANTHOR_EXCEPTION(GPU_SHAREABILITY_FAULT),
> +	PANTHOR_EXCEPTION(SYS_SHAREABILITY_FAULT),
> +	PANTHOR_EXCEPTION(GPU_CACHEABILITY_FAULT),
> +	PANTHOR_EXCEPTION(TRANSLATION_FAULT_0),
> +	PANTHOR_EXCEPTION(TRANSLATION_FAULT_1),
> +	PANTHOR_EXCEPTION(TRANSLATION_FAULT_2),
> +	PANTHOR_EXCEPTION(TRANSLATION_FAULT_3),
> +	PANTHOR_EXCEPTION(TRANSLATION_FAULT_4),
> +	PANTHOR_EXCEPTION(PERM_FAULT_0),
> +	PANTHOR_EXCEPTION(PERM_FAULT_1),
> +	PANTHOR_EXCEPTION(PERM_FAULT_2),
> +	PANTHOR_EXCEPTION(PERM_FAULT_3),
> +	PANTHOR_EXCEPTION(ACCESS_FLAG_1),
> +	PANTHOR_EXCEPTION(ACCESS_FLAG_2),
> +	PANTHOR_EXCEPTION(ACCESS_FLAG_3),
> +	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_IN),
> +	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT0),
> +	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT1),
> +	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT2),
> +	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT3),
> +	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_0),
> +	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_1),
> +	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_2),
> +	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_3),
> +};
> +
> +const char *panthor_exception_name(struct panthor_device *ptdev, u32 exception_code)
> +{
> +	if (drm_WARN_ON(&ptdev->base,

I'm not convinced this should be a WARN_ON as I suspect it's probably
possible to inject values from user space (although I'm not completely
sure on that). It's certainly not a driver error as such if we can't
decode the value.

> +			exception_code >= ARRAY_SIZE(panthor_exception_infos) ||
> +			!panthor_exception_infos[exception_code].name))
> +		return "Unknown exception type";
> +
> +	return panthor_exception_infos[exception_code].name;
> +}
> +
> +static vm_fault_t panthor_mmio_vm_fault(struct vm_fault *vmf)
> +{
> +	struct vm_area_struct *vma = vmf->vma;
> +	struct panthor_device *ptdev = vma->vm_private_data;
> +	u64 id = vma->vm_pgoff << PAGE_SHIFT;
> +	unsigned long pfn;
> +	pgprot_t pgprot;
> +	vm_fault_t ret;
> +	bool active;
> +	int cookie;
> +
> +	if (!drm_dev_enter(&ptdev->base, &cookie))
> +		return VM_FAULT_SIGBUS;
> +
> +	mutex_lock(&ptdev->pm.lock);
> +	active = atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE;
> +
> +	switch (id) {
> +	case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
> +		if (active)
> +			pfn = __phys_to_pfn(ptdev->phys_addr + CSF_GPU_LATEST_FLUSH_ID);
> +		else
> +			pfn = virt_to_pfn(ptdev->pm.dummy_latest_flush);
> +		break;
> +
> +	default:
> +		ret = VM_FAULT_SIGBUS;
> +		goto out_unlock;
> +	}
> +
> +	pgprot = vma->vm_page_prot;
> +	if (active)
> +		pgprot = pgprot_noncached(pgprot);
> +
> +	ret = vmf_insert_pfn_prot(vma, vmf->address, pfn, pgprot);
> +
> +out_unlock:
> +	mutex_unlock(&ptdev->pm.lock);
> +	drm_dev_exit(cookie);
> +	return ret;
> +}
> +
> +static const struct vm_operations_struct panthor_mmio_vm_ops = {
> +	.fault = panthor_mmio_vm_fault,
> +};
> +
> +int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct *vma)
> +{
> +	u64 id = vma->vm_pgoff << PAGE_SHIFT;
> +
> +	switch (id) {
> +	case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
> +		if (vma->vm_end - vma->vm_start != PAGE_SIZE ||
> +		    (vma->vm_flags & (VM_WRITE | VM_EXEC)))
> +			return -EINVAL;
> +
> +		break;
> +
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	/* Defer actual mapping to the fault handler. */
> +	vma->vm_private_data = ptdev;
> +	vma->vm_ops = &panthor_mmio_vm_ops;
> +	vm_flags_set(vma,
> +		     VM_IO | VM_DONTCOPY | VM_DONTEXPAND |
> +		     VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP);
> +	return 0;
> +}
> +
> +#ifdef CONFIG_PM
> +int panthor_device_resume(struct device *dev)
> +{
> +	struct panthor_device *ptdev = dev_get_drvdata(dev);
> +	int ret, cookie;
> +
> +	mutex_lock(&ptdev->pm.lock);
> +	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_RESUMING);
> +
> +	ret = clk_prepare_enable(ptdev->clks.core);
> +	if (ret)
> +		goto err_unlock;
> +
> +	ret = clk_prepare_enable(ptdev->clks.stacks);
> +	if (ret)
> +		goto err_disable_core_clk;
> +
> +	ret = clk_prepare_enable(ptdev->clks.coregroup);
> +	if (ret)
> +		goto err_disable_stacks_clk;
> +
> +	ret = panthor_devfreq_resume(ptdev);
> +	if (ret)
> +		goto err_disable_coregroup_clk;
> +
> +	if (panthor_device_is_initialized(ptdev) &&
> +	    drm_dev_enter(&ptdev->base, &cookie)) {
> +		panthor_gpu_resume(ptdev);
> +		panthor_mmu_resume(ptdev);
> +		ret = drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev));
> +		if (!ret)
> +			panthor_sched_resume(ptdev);
> +
> +		drm_dev_exit(cookie);
> +
> +		if (ret)
> +			goto err_devfreq_suspend;
> +	}
> +
> +	/* Clear all IOMEM mappings pointing to this device after we've
> +	 * resumed. This way the fake mappings pointing to the dummy pages
> +	 * are removed and the real iomem mapping will be restored on next
> +	 * access.
> +	 */
> +	unmap_mapping_range(ptdev->base.anon_inode->i_mapping,
> +			    DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
> +	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE);

Is the ordering here correct? I think we need to set ACTIVE before the
unmap_mapping_range otherwise there is a (very small) race where user
space could fault the page (and get the dummy mapping) before the
atomic_set.

Hmm, actually we have the pm.lock, so no this isn't racy. In which case
is there a good reason that you're using atomics? I can see two accesses
which aren't protected by pm.lock:

  * the early out in panthor_device_suspend() - which could easily be
moved inside the lock.

  * panthor_device_schedule_reset() - this looks racy (the power down
could happen immediately after the atomic_read()), so I suspect it would
be better moving the check into panthor_device_reset_work() and
performing it with the pm.lock held.

> +	if (atomic_read(&ptdev->reset.pending))
> +		queue_work(ptdev->reset.wq, &ptdev->reset.work);
> +
> +	mutex_unlock(&ptdev->pm.lock);
> +	return 0;
> +
> +err_devfreq_suspend:
> +	panthor_devfreq_suspend(ptdev);
> +
> +err_disable_coregroup_clk:
> +	clk_disable_unprepare(ptdev->clks.coregroup);
> +
> +err_disable_stacks_clk:
> +	clk_disable_unprepare(ptdev->clks.stacks);
> +
> +err_disable_core_clk:
> +	clk_disable_unprepare(ptdev->clks.core);
> +
> +err_unlock:
> +	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
> +	mutex_unlock(&ptdev->pm.lock);
> +	return ret;
> +}
> +
> +int panthor_device_suspend(struct device *dev)
> +{
> +	struct panthor_device *ptdev = dev_get_drvdata(dev);
> +	int ret, cookie;
> +
> +	if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE)
> +		return 0;
> +
> +	mutex_lock(&ptdev->pm.lock);
> +	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDING);
> +
> +	/* Clear all IOMEM mappings pointing to this device before we
> +	 * shutdown the power-domain and clocks. Failing to do that results
> +	 * in external aborts when the process accesses the iomem region.
> +	 */
> +	unmap_mapping_range(ptdev->base.anon_inode->i_mapping,
> +			    DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
> +
> +	if (panthor_device_is_initialized(ptdev) &&
> +	    drm_dev_enter(&ptdev->base, &cookie)) {
> +		cancel_work_sync(&ptdev->reset.work);
> +
> +		/* We prepare everything as if we were resetting the GPU.
> +		 * The end of the reset will happen in the resume path though.
> +		 */
> +		panthor_sched_suspend(ptdev);
> +		panthor_fw_suspend(ptdev);
> +		panthor_mmu_suspend(ptdev);
> +		panthor_gpu_suspend(ptdev);
> +		drm_dev_exit(cookie);
> +	}
> +
> +	ret = panthor_devfreq_suspend(ptdev);
> +	if (ret) {
> +		if (panthor_device_is_initialized(ptdev) &&
> +		    drm_dev_enter(&ptdev->base, &cookie)) {
> +			panthor_gpu_resume(ptdev);
> +			panthor_mmu_resume(ptdev);
> +			drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev));
> +			panthor_sched_resume(ptdev);
> +			drm_dev_exit(cookie);
> +		}
> +
> +		atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE);
> +		goto out_unlock;
> +	}
> +
> +	/* Before we suspend, update the dummy_latest_flush page, so accesses
> +	 * to this dummy page return the value the HW would have returned.
> +	 */
> +	*ptdev->pm.dummy_latest_flush = gpu_read(ptdev, CSF_GPU_LATEST_FLUSH_ID);

As above, I don't believe it is important for user space to know the
value the HW would have returned during a suspend. Indeed if the
hardware was successfully suspended the flush ID is likely to be reset -
so this would be inaccurate. However any value should be safe if the
work was prepared while the GPU was off as the caches will be empty.

> +
> +	clk_disable_unprepare(ptdev->clks.coregroup);
> +	clk_disable_unprepare(ptdev->clks.stacks);
> +	clk_disable_unprepare(ptdev->clks.core);
> +	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
> +
> +out_unlock:
> +	mutex_unlock(&ptdev->pm.lock);
> +	return ret;
> +}
> +#endif
> diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
> new file mode 100644
> index 000000000000..e0e1be263eb9
> --- /dev/null
> +++ b/drivers/gpu/drm/panthor/panthor_device.h
> @@ -0,0 +1,354 @@
> +/* SPDX-License-Identifier: GPL-2.0 or MIT */
> +/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
> +/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
> +/* Copyright 2023 Collabora ltd. */
> +
> +#ifndef __PANTHOR_DEVICE_H__
> +#define __PANTHOR_DEVICE_H__
> +
> +#include <linux/atomic.h>
> +#include <linux/io-pgtable.h>
> +#include <linux/regulator/consumer.h>
> +#include <linux/spinlock.h>
> +#include <drm/drm_device.h>
> +#include <drm/drm_mm.h>
> +#include <drm/gpu_scheduler.h>
> +#include <drm/panthor_drm.h>
> +
> +struct panthor_csf;
> +struct panthor_csf_ctx;
> +struct panthor_device;
> +struct panthor_gpu;
> +struct panthor_group_pool;
> +struct panthor_heap_pool;
> +struct panthor_job;
> +struct panthor_mmu;
> +struct panthor_fw;
> +struct panthor_perfcnt;
> +struct panthor_vm;
> +struct panthor_vm_pool;
> +
> +/**
> + * enum panthor_device_pm_state - PM state
> + */
> +enum panthor_device_pm_state {
> +	/** @PANTHOR_DEVICE_PM_STATE_SUSPENDED: Device is suspended. */
> +	PANTHOR_DEVICE_PM_STATE_SUSPENDED = 0,
> +
> +	/** @PANTHOR_DEVICE_PM_STATE_RESUMING: Device is being resumed. */
> +	PANTHOR_DEVICE_PM_STATE_RESUMING,
> +
> +	/** @PANTHOR_DEVICE_PM_STATE_ACTIVE: Device is active. */
> +	PANTHOR_DEVICE_PM_STATE_ACTIVE,
> +
> +	/** @PANTHOR_DEVICE_PM_STATE_SUSPENDING: Device is being suspended. */
> +	PANTHOR_DEVICE_PM_STATE_SUSPENDING,
> +};
> +
> +/**
> + * struct panthor_irq - IRQ data
> + *
> + * Used to automate IRQ handling for the 3 different IRQs we have in this driver.
> + */
> +struct panthor_irq {
> +	/** @ptdev: Panthor device */
> +	struct panthor_device *ptdev;
> +
> +	/** @irq: IRQ number. */
> +	int irq;
> +
> +	/** @mask: Current mask being applied to xxx_INT_MASK. */
> +	u32 mask;
> +
> +	/** @suspended: Set to true when the IRQ is suspended. */
> +	atomic_t suspended;
> +};
> +
> +/**
> + * struct panthor_device - Panthor device
> + */
> +struct panthor_device {
> +	/** @base: Base drm_device. */
> +	struct drm_device base;
> +
> +	/** @phys_addr: Physical address of the iomem region. */
> +	phys_addr_t phys_addr;
> +
> +	/** @iomem: CPU mapping of the IOMEM region. */
> +	void __iomem *iomem;
> +
> +	/** @clks: GPU clocks. */
> +	struct {
> +		/** @core: Core clock. */
> +		struct clk *core;
> +
> +		/** @stacks: Stacks clock. This clock is optional. */
> +		struct clk *stacks;
> +
> +		/** @coregroup: Core group clock. This clock is optional. */
> +		struct clk *coregroup;
> +	} clks;
> +
> +	/** @coherent: True if the CPU/GPU are memory coherent. */
> +	bool coherent;
> +
> +	/** @gpu_info: GPU information. */
> +	struct drm_panthor_gpu_info gpu_info;
> +
> +	/** @csif_info: Command stream interface information. */
> +	struct drm_panthor_csif_info csif_info;
> +
> +	/** @gpu: GPU management data. */
> +	struct panthor_gpu *gpu;
> +
> +	/** @fw: FW management data. */
> +	struct panthor_fw *fw;
> +
> +	/** @mmu: MMU management data. */
> +	struct panthor_mmu *mmu;
> +
> +	/** @scheduler: Scheduler management data. */
> +	struct panthor_scheduler *scheduler;
> +
> +	/** @devfreq: Device frequency scaling management data. */
> +	struct panthor_devfreq *devfreq;
> +
> +	/** @reset: Reset related fields. */
> +	struct {
> +		/** @wq: Ordered worqueud used to schedule reset operations. */
> +		struct workqueue_struct *wq;
> +
> +		/** @work: Reset work. */
> +		struct work_struct work;
> +
> +		/** @pending: Set to true if a reset is pending. */
> +		atomic_t pending;
> +	} reset;
> +
> +	/** @pm: Power management related data. */
> +	struct {
> +		/** @state: Power state, see panthor_device_pm_state. */
> +		atomic_t state;
> +
> +		/**
> +		 * @lock: Lock protecting the suspend/resume operations.
> +		 *
> +		 * This is needed to ensure we map the dummy IO pages when
> +		 * the device is being suspended, and the real IO pages when
> +		 * the device is being resumed. We can't just do with the
> +		 * state atomicity to deal with this race.
> +		 */
> +		struct mutex lock;
> +
> +		/**
> +		 * @dummy_latest_flush: Dummy LATEST_FLUSH page.
> +		 *
> +		 * Used to replace the real LATEST_FLUSH page when the GPU
> +		 * is suspended.
> +		 */
> +		u32 *dummy_latest_flush;
> +	} pm;
> +};
> +
> +/**
> + * struct panthor_file - Panthor file
> + */
> +struct panthor_file {
> +	/** @ptdev: Device attached to this file. */
> +	struct panthor_device *ptdev;
> +
> +	/** @vms: VM pool attached to this file. */
> +	struct panthor_vm_pool *vms;
> +
> +	/** @groups: Scheduling group pool attached to this file. */
> +	struct panthor_group_pool *groups;
> +};
> +
> +int panthor_device_init(struct panthor_device *ptdev);
> +void panthor_device_unplug(struct panthor_device *ptdev);
> +
> +/**
> + * panthor_device_schedule_reset() - Schedules a reset operation
> + */
> +static inline void panthor_device_schedule_reset(struct panthor_device *ptdev)
> +{
> +	if (atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE &&

As above - this is a racy check. Although it might be safe because of
the cancel_work_sync() call in panthor_device_suspend(). But if we get
rid of this check we don't need the atomic variable.

> +	    !atomic_cmpxchg(&ptdev->reset.pending, 0, 1))
> +		queue_work(ptdev->reset.wq, &ptdev->reset.work);
> +}
> +
> +/**
> + * panthor_device_reset_is_pending() - Checks if a reset is pending.
> + *
> + * Return: true if a reset is pending, false otherwise.
> + */
> +static inline bool panthor_device_reset_is_pending(struct panthor_device *ptdev)
> +{
> +	return atomic_read(&ptdev->reset.pending) != 0;
> +}
> +
> +int panthor_device_mmap_io(struct panthor_device *ptdev,
> +			   struct vm_area_struct *vma);
> +
> +int panthor_device_resume(struct device *dev);
> +int panthor_device_suspend(struct device *dev);
> +
> +enum drm_panthor_exception_type {
> +	DRM_PANTHOR_EXCEPTION_OK = 0x00,
> +	DRM_PANTHOR_EXCEPTION_TERMINATED = 0x04,
> +	DRM_PANTHOR_EXCEPTION_KABOOM = 0x05,
> +	DRM_PANTHOR_EXCEPTION_EUREKA = 0x06,
> +	DRM_PANTHOR_EXCEPTION_ACTIVE = 0x08,
> +	DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f,
> +	DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f,
> +	DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40,
> +	DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44,
> +	DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48,
> +	DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49,
> +	DRM_PANTHOR_EXCEPTION_CS_CALL_STACK_OVERFLOW = 0x4a,
> +	DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT = 0x4b,
> +	DRM_PANTHOR_EXCEPTION_INSTR_INVALID_PC = 0x50,
> +	DRM_PANTHOR_EXCEPTION_INSTR_INVALID_ENC = 0x51,
> +	DRM_PANTHOR_EXCEPTION_INSTR_BARRIER_FAULT = 0x55,
> +	DRM_PANTHOR_EXCEPTION_DATA_INVALID_FAULT = 0x58,
> +	DRM_PANTHOR_EXCEPTION_TILE_RANGE_FAULT = 0x59,
> +	DRM_PANTHOR_EXCEPTION_ADDR_RANGE_FAULT = 0x5a,
> +	DRM_PANTHOR_EXCEPTION_IMPRECISE_FAULT = 0x5b,
> +	DRM_PANTHOR_EXCEPTION_OOM = 0x60,
> +	DRM_PANTHOR_EXCEPTION_CSF_FW_INTERNAL_ERROR = 0x68,
> +	DRM_PANTHOR_EXCEPTION_CSF_RES_EVICTION_TIMEOUT = 0x69,
> +	DRM_PANTHOR_EXCEPTION_GPU_BUS_FAULT = 0x80,
> +	DRM_PANTHOR_EXCEPTION_GPU_SHAREABILITY_FAULT = 0x88,
> +	DRM_PANTHOR_EXCEPTION_SYS_SHAREABILITY_FAULT = 0x89,
> +	DRM_PANTHOR_EXCEPTION_GPU_CACHEABILITY_FAULT = 0x8a,
> +	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_0 = 0xc0,
> +	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_1 = 0xc1,
> +	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_2 = 0xc2,
> +	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_3 = 0xc3,
> +	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_4 = 0xc4,
> +	DRM_PANTHOR_EXCEPTION_PERM_FAULT_0 = 0xc8,
> +	DRM_PANTHOR_EXCEPTION_PERM_FAULT_1 = 0xc9,
> +	DRM_PANTHOR_EXCEPTION_PERM_FAULT_2 = 0xca,
> +	DRM_PANTHOR_EXCEPTION_PERM_FAULT_3 = 0xcb,
> +	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_1 = 0xd9,
> +	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_2 = 0xda,
> +	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_3 = 0xdb,
> +	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_IN = 0xe0,
> +	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT0 = 0xe4,
> +	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT1 = 0xe5,
> +	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT2 = 0xe6,
> +	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT3 = 0xe7,
> +	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_0 = 0xe8,
> +	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_1 = 0xe9,
> +	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_2 = 0xea,
> +	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_3 = 0xeb,
> +};
> +
> +/**
> + * panthor_exception_is_fault() - Checks if an exception is a fault.
> + *
> + * Return: true if the exception is a fault, false otherwise.
> + */
> +static inline bool
> +panthor_exception_is_fault(u32 exception_code)
> +{
> +	return exception_code > DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT;
> +}
> +
> +const char *panthor_exception_name(struct panthor_device *ptdev,
> +				   u32 exception_code);
> +
> +/**
> + * PANTHOR_IRQ_HANDLER() - Define interrupt handlers and the interrupt
> + * registration function.
> + *
> + * The boiler-plate to gracefully deal with shared interrupts is
> + * auto-generated. All you have to do is call PANTHOR_IRQ_HANDLER()
> + * just after you actual handler. The handler prototype is:
s/you/your/ or probably s/you/the/ since we don't expect people to be
adding more ;)

> + *
> + * void (*handler)(struct panthor_device *, u32 status);
> + */
> +#define PANTHOR_IRQ_HANDLER(__name, __reg_prefix, __handler)					\
> +static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data)			\
> +{												\
> +	struct panthor_irq *pirq = data;							\
> +	struct panthor_device *ptdev = pirq->ptdev;						\

Maybe I'm missing something, but I was expecting a check here for if the
irq has been suspended and to avoid the register reads if it was.
Otherwise I'm not entirely sure I follow what all this code is for.

Steve

> +												\
> +	if (!gpu_read(ptdev, __reg_prefix ## _INT_STAT))					\
> +		return IRQ_NONE;								\
> +												\
> +	gpu_write(ptdev, __reg_prefix ## _INT_MASK, 0);						\
> +	return IRQ_WAKE_THREAD;									\
> +}												\
> +												\
> +static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *data)		\
> +{												\
> +	struct panthor_irq *pirq = data;							\
> +	struct panthor_device *ptdev = pirq->ptdev;						\
> +	irqreturn_t ret = IRQ_NONE;								\
> +												\
> +	while (true) {										\
> +		u32 status = gpu_read(ptdev, __reg_prefix ## _INT_RAWSTAT) & pirq->mask;	\
> +												\
> +		if (!status)									\
> +			break;									\
> +												\
> +		gpu_write(ptdev, __reg_prefix ## _INT_CLEAR, status);				\
> +												\
> +		__handler(ptdev, status);							\
> +		ret = IRQ_HANDLED;								\
> +	}											\
> +												\
> +	if (!atomic_read(&pirq->suspended))							\
> +		gpu_write(ptdev, __reg_prefix ## _INT_MASK, pirq->mask);			\
> +												\
> +	return ret;										\
> +}												\
> +												\
> +static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq)			\
> +{												\
> +	int cookie;										\
> +												\
> +	atomic_set(&pirq->suspended, true);							\
> +												\
> +	if (drm_dev_enter(&pirq->ptdev->base, &cookie)) {					\
> +		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0);				\
> +		synchronize_irq(pirq->irq);							\
> +		drm_dev_exit(cookie);								\
> +	}											\
> +												\
> +	pirq->mask = 0;										\
> +}												\
> +												\
> +static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask)	\
> +{												\
> +	int cookie;										\
> +												\
> +	atomic_set(&pirq->suspended, false);							\
> +	pirq->mask = mask;									\
> +												\
> +	if (drm_dev_enter(&pirq->ptdev->base, &cookie)) {					\
> +		gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask);			\
> +		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, mask);			\
> +		drm_dev_exit(cookie);								\
> +	}											\
> +}												\
> +												\
> +static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev,			\
> +					      struct panthor_irq *pirq,				\
> +					      int irq, u32 mask)				\
> +{												\
> +	pirq->ptdev = ptdev;									\
> +	pirq->irq = irq;									\
> +	panthor_ ## __name ## _irq_resume(pirq, mask);						\
> +												\
> +	return devm_request_threaded_irq(ptdev->base.dev, irq,					\
> +					 panthor_ ## __name ## _irq_raw_handler,		\
> +					 panthor_ ## __name ## _irq_threaded_handler,		\
> +					 IRQF_SHARED, KBUILD_MODNAME "-" # __name,		\
> +					 pirq);							\
> +}
> +
> +extern struct workqueue_struct *panthor_cleanup_wq;
> +
> +#endif
Boris Brezillon Aug. 29, 2023, 2 p.m. UTC | #2
On Fri, 11 Aug 2023 16:47:56 +0100
Steven Price <steven.price@arm.com> wrote:

> On 09/08/2023 17:53, Boris Brezillon wrote:
> > The panthor driver is designed in a modular way, where each logical
> > block is dealing with a specific HW-block or software feature. In order
> > for those blocks to communicate with each other, we need a central
> > panthor_device collecting all the blocks, and exposing some common
> > features, like interrupt handling, power management, reset, ...
> > 
> > This what this panthor_device logical block is about.
> > 
> > v2:
> > - Rename the driver (pancsf -> panthor)
> > - Change the license (GPL2 -> MIT + GPL2)
> > - Split the driver addition commit
> > - Add devfreq/PM support
> > - Use drm_dev_{unplug,enter,exit}() to provide safe device removal
> > 
> > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> > ---
> >  drivers/gpu/drm/panthor/panthor_device.c | 479 +++++++++++++++++++++++
> >  drivers/gpu/drm/panthor/panthor_device.h | 354 +++++++++++++++++
> >  2 files changed, 833 insertions(+)
> >  create mode 100644 drivers/gpu/drm/panthor/panthor_device.c
> >  create mode 100644 drivers/gpu/drm/panthor/panthor_device.h
> > 
> > diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
> > new file mode 100644
> > index 000000000000..15f102116fa0
> > --- /dev/null
> > +++ b/drivers/gpu/drm/panthor/panthor_device.c
> > @@ -0,0 +1,479 @@
> > +// SPDX-License-Identifier: GPL-2.0 or MIT
> > +/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
> > +/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
> > +/* Copyright 2023 Collabora ltd. */
> > +
> > +#include <linux/clk.h>
> > +#include <linux/reset.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/pm_domain.h>
> > +#include <linux/pm_runtime.h>
> > +#include <linux/regulator/consumer.h>
> > +
> > +#include <drm/drm_drv.h>
> > +#include <drm/drm_managed.h>
> > +
> > +#include "panthor_sched.h"
> > +#include "panthor_device.h"
> > +#include "panthor_devfreq.h"
> > +#include "panthor_gpu.h"
> > +#include "panthor_fw.h"
> > +#include "panthor_mmu.h"
> > +#include "panthor_regs.h"
> > +
> > +static int panthor_clk_init(struct panthor_device *ptdev)
> > +{
> > +	ptdev->clks.core = devm_clk_get(ptdev->base.dev, NULL);
> > +	if (IS_ERR(ptdev->clks.core)) {
> > +		drm_err(&ptdev->base, "get 'core' clock failed %ld\n",
> > +			PTR_ERR(ptdev->clks.core));  
> 
> I suspect it would be a good idea to use dev_err_probe() here (and
> below) as I believe devm_clk_get can return -EPROBE_DEFER.

Nice, didn't know there was a logging function that was silencing
probe-defer errors.

> 
> > +		return PTR_ERR(ptdev->clks.core);
> > +	}
> > +
> > +	ptdev->clks.stacks = devm_clk_get_optional(ptdev->base.dev, "stacks");
> > +	if (IS_ERR(ptdev->clks.stacks)) {
> > +		drm_err(&ptdev->base, "get 'stacks' clock failed %ld\n",
> > +			PTR_ERR(ptdev->clks.stacks));
> > +		return PTR_ERR(ptdev->clks.stacks);
> > +	}
> > +
> > +	ptdev->clks.coregroup = devm_clk_get_optional(ptdev->base.dev, "coregroup");
> > +	if (IS_ERR(ptdev->clks.coregroup)) {
> > +		drm_err(&ptdev->base, "get 'coregroup' clock failed %ld\n",
> > +			PTR_ERR(ptdev->clks.coregroup));
> > +		return PTR_ERR(ptdev->clks.coregroup);
> > +	}
> > +
> > +	drm_info(&ptdev->base, "clock rate = %lu\n", clk_get_rate(ptdev->clks.core));
> > +	return 0;
> > +}
> > +
> > +void panthor_device_unplug(struct panthor_device *ptdev)
> > +{
> > +	/* FIXME: This is racy. */  
> 
> Can we fix this? From a quick look it seems like a sequence like below
> should avoid the race.
> 
> 	if (!drm_dev_enter())
> 		/* Already unplugged */
> 		return;
> 	ptdev->base.unplugged = true;
> 	drm_dev_exit();
> 
> Although possibly that should be in the DRM core rather than open-coded
> here.

Are you sure that's protecting us against two concurrent calls to
drm_dev_unplug() (drm_dev_enter() is taking a read-lock)? And that's not
the only thing I need actually. If there are 2 threads entering
panthor_device_unplug(), I need to make sure the one who losts (arrived
after unplugged was set to false) is waiting for all operations after
the drm_dev_unplug() call to be done, otherwise we might return from
platform_driver->remove() before the unplug cleanups are done, and
there might still be threads/workqueues accessing device resources
while/after they get released by the device-model.

> 
> > +	if (drm_dev_is_unplugged(&ptdev->base))
> > +		return;
> > +
> > +	drm_WARN_ON(&ptdev->base, pm_runtime_get_sync(ptdev->base.dev) < 0);
> > +
> > +	/* Call drm_dev_unplug() so any access to HW block happening after
> > +	 * that point get rejected.
> > +	 */
> > +	drm_dev_unplug(&ptdev->base);
> > +
> > +	/* Now, try to cleanly shutdown the GPU before the device resources
> > +	 * get reclaimed.
> > +	 */
> > +	panthor_sched_unplug(ptdev);
> > +	panthor_fw_unplug(ptdev);
> > +	panthor_mmu_unplug(ptdev);
> > +	panthor_gpu_unplug(ptdev);
> > +
> > +	pm_runtime_dont_use_autosuspend(ptdev->base.dev);
> > +	pm_runtime_put_sync_suspend(ptdev->base.dev);
> > +}
> > +
> > +static void panthor_device_reset_cleanup(struct drm_device *ddev, void *data)
> > +{
> > +	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
> > +
> > +	cancel_work_sync(&ptdev->reset.work);
> > +	destroy_workqueue(ptdev->reset.wq);
> > +}
> > +
> > +static void panthor_device_reset_work(struct work_struct *work)
> > +{
> > +	struct panthor_device *ptdev = container_of(work, struct panthor_device, reset.work);
> > +	int ret, cookie;
> > +
> > +	if (!drm_dev_enter(&ptdev->base, &cookie))
> > +		return;
> > +
> > +	panthor_sched_pre_reset(ptdev);
> > +	panthor_fw_pre_reset(ptdev, true);
> > +	panthor_mmu_pre_reset(ptdev);
> > +	panthor_gpu_soft_reset(ptdev);
> > +	panthor_gpu_l2_power_on(ptdev);
> > +	panthor_mmu_post_reset(ptdev);
> > +	ret = panthor_fw_post_reset(ptdev);
> > +	if (ret)
> > +		goto out;
> > +
> > +	atomic_set(&ptdev->reset.pending, 0);
> > +	panthor_sched_post_reset(ptdev);
> > +	drm_dev_exit(cookie);
> > +
> > +out:
> > +	if (ret) {  
> 
> This looks like a race condition too - is there a need for a
> drm_dev_exit_and_unplug() function?

drm_dev_exit() is just releasing the read-lock. drm_dev_unplug()
waits for all readers to be done and sets the unplugged value to true.
So we only get readers/writer synchronization here, but nothing doing
writer/writer sync. I guess the drm core leaves that to drivers, given
drm_dev_unplug() is usually called from xxx_driver->remove() hook, on
which serialization is guaranteed by the device-model.

TLDR; yes, it's racy, but I don't think drm_dev_exit_and_unplug() would
help solve the existing race.

It's worth noting that we currently have only 2 paths calling
panthor_device_unplug(): the platform_driver->remove() hook and the
reset worker. Calling drm_dev_unplug() might not be the right thing to
do, I just thought it was a good match to reflect the fact the device
becomes inaccessible, without adding yet another kind of device-lost
field.

> 
> > +		panthor_device_unplug(ptdev);
> > +		drm_err(&ptdev->base, "Failed to boot MCU after reset, making device unusable.");
> > +	}
> > +}
> > +
> > +static bool panthor_device_is_initialized(struct panthor_device *ptdev)
> > +{
> > +	return !!ptdev->scheduler;
> > +}
> > +
> > +static void panthor_device_free_page(struct drm_device *ddev, void *data)
> > +{
> > +	free_page((unsigned long)data);
> > +}
> > +
> > +int panthor_device_init(struct panthor_device *ptdev)
> > +{
> > +	struct resource *res;
> > +	struct page *p;
> > +	int ret;
> > +
> > +	ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT;
> > +
> > +	drmm_mutex_init(&ptdev->base, &ptdev->pm.lock);
> > +	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
> > +	p = alloc_page(GFP_KERNEL | __GFP_ZERO);
> > +	if (!p)
> > +		return -ENOMEM;
> > +
> > +	ptdev->pm.dummy_latest_flush = page_address(p);
> > +	ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_free_page,
> > +				       ptdev->pm.dummy_latest_flush);
> > +	if (ret)
> > +		return ret;
> > +
> > +	/* Set the dummy page to the default LATEST_FLUSH value. This
> > +	 * will be updated on the next suspend.
> > +	 */
> > +	*ptdev->pm.dummy_latest_flush = CSF_GPU_LATEST_FLUSH_ID_DEFAULT;  
> 
> I see why this register default value was defined. Although I'm not sure
> it has any benefit over just using zero... If the GPU is off when user
> space reads the FLUSH_ID then the GPU's caches are definitely empty so
> any flush ID is valid.

Zero means we'll force a cache flush for all CS that were created while
the device was suspended, that's not ideal.

> 
> Interestingly looking at kbase it seems to use an initial value of 1
> (POWER_DOWN_LATEST_FLUSH_VALUE). I guess zero is less ideal because
> FLUSH_CACHE2 would then unconditionally do a flush.

I guess a value of 1 would work. It just means we'll get a spurious
flush if the CS is submitted after 32 flushes happened, on the other
hand we also a spurious flush on the first submitted CS when we use
POWER_DOWN_LATEST_FLUSH_VALUE. I'll switch to 1, drop the default def,
and update the comment accordingly.

> 
> > +
> > +	INIT_WORK(&ptdev->reset.work, panthor_device_reset_work);
> > +	ptdev->reset.wq = alloc_ordered_workqueue("panthor-reset-wq", 0);
> > +	if (!ptdev->reset.wq)
> > +		return -ENOMEM;
> > +
> > +	ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_reset_cleanup, NULL);
> > +	if (ret)
> > +		return ret;
> > +
> > +	ret = panthor_clk_init(ptdev);
> > +	if (ret)
> > +		return ret;
> > +
> > +	ret = panthor_devfreq_init(ptdev);
> > +	if (ret)
> > +		return ret;
> > +
> > +	ptdev->iomem = devm_platform_get_and_ioremap_resource(to_platform_device(ptdev->base.dev),
> > +							      0, &res);
> > +	if (IS_ERR(ptdev->iomem))
> > +		return PTR_ERR(ptdev->iomem);
> > +
> > +	ptdev->phys_addr = res->start;
> > +
> > +	ret = devm_pm_runtime_enable(ptdev->base.dev);
> > +	if (ret)
> > +		return ret;
> > +
> > +	ret = pm_runtime_resume_and_get(ptdev->base.dev);
> > +	if (ret)
> > +		return ret;
> > +
> > +	ret = panthor_gpu_init(ptdev);
> > +	if (ret)
> > +		goto err_rpm_put;
> > +
> > +	ret = panthor_mmu_init(ptdev);
> > +	if (ret)
> > +		goto err_rpm_put;
> > +
> > +	ret = panthor_fw_init(ptdev);
> > +	if (ret)
> > +		goto err_rpm_put;
> > +
> > +	ret = panthor_sched_init(ptdev);
> > +	if (ret)
> > +		goto err_rpm_put;
> > +
> > +	/* ~3 frames */
> > +	pm_runtime_set_autosuspend_delay(ptdev->base.dev, 50);
> > +	pm_runtime_use_autosuspend(ptdev->base.dev);
> > +	pm_runtime_put_autosuspend(ptdev->base.dev);
> > +	return 0;
> > +
> > +err_rpm_put:
> > +	pm_runtime_put_sync_suspend(ptdev->base.dev);
> > +	return ret;
> > +}
> > +
> > +#define PANTHOR_EXCEPTION(id) \
> > +	[DRM_PANTHOR_EXCEPTION_ ## id] = { \
> > +		.name = #id, \
> > +	}
> > +
> > +struct panthor_exception_info {
> > +	const char *name;
> > +};
> > +
> > +static const struct panthor_exception_info panthor_exception_infos[] = {
> > +	PANTHOR_EXCEPTION(OK),
> > +	PANTHOR_EXCEPTION(TERMINATED),
> > +	PANTHOR_EXCEPTION(KABOOM),
> > +	PANTHOR_EXCEPTION(EUREKA),
> > +	PANTHOR_EXCEPTION(ACTIVE),
> > +	PANTHOR_EXCEPTION(CS_RES_TERM),
> > +	PANTHOR_EXCEPTION(CS_CONFIG_FAULT),
> > +	PANTHOR_EXCEPTION(CS_ENDPOINT_FAULT),
> > +	PANTHOR_EXCEPTION(CS_BUS_FAULT),
> > +	PANTHOR_EXCEPTION(CS_INSTR_INVALID),
> > +	PANTHOR_EXCEPTION(CS_CALL_STACK_OVERFLOW),
> > +	PANTHOR_EXCEPTION(CS_INHERIT_FAULT),
> > +	PANTHOR_EXCEPTION(INSTR_INVALID_PC),
> > +	PANTHOR_EXCEPTION(INSTR_INVALID_ENC),
> > +	PANTHOR_EXCEPTION(INSTR_BARRIER_FAULT),
> > +	PANTHOR_EXCEPTION(DATA_INVALID_FAULT),
> > +	PANTHOR_EXCEPTION(TILE_RANGE_FAULT),
> > +	PANTHOR_EXCEPTION(ADDR_RANGE_FAULT),
> > +	PANTHOR_EXCEPTION(IMPRECISE_FAULT),
> > +	PANTHOR_EXCEPTION(OOM),
> > +	PANTHOR_EXCEPTION(CSF_FW_INTERNAL_ERROR),
> > +	PANTHOR_EXCEPTION(CSF_RES_EVICTION_TIMEOUT),
> > +	PANTHOR_EXCEPTION(GPU_BUS_FAULT),
> > +	PANTHOR_EXCEPTION(GPU_SHAREABILITY_FAULT),
> > +	PANTHOR_EXCEPTION(SYS_SHAREABILITY_FAULT),
> > +	PANTHOR_EXCEPTION(GPU_CACHEABILITY_FAULT),
> > +	PANTHOR_EXCEPTION(TRANSLATION_FAULT_0),
> > +	PANTHOR_EXCEPTION(TRANSLATION_FAULT_1),
> > +	PANTHOR_EXCEPTION(TRANSLATION_FAULT_2),
> > +	PANTHOR_EXCEPTION(TRANSLATION_FAULT_3),
> > +	PANTHOR_EXCEPTION(TRANSLATION_FAULT_4),
> > +	PANTHOR_EXCEPTION(PERM_FAULT_0),
> > +	PANTHOR_EXCEPTION(PERM_FAULT_1),
> > +	PANTHOR_EXCEPTION(PERM_FAULT_2),
> > +	PANTHOR_EXCEPTION(PERM_FAULT_3),
> > +	PANTHOR_EXCEPTION(ACCESS_FLAG_1),
> > +	PANTHOR_EXCEPTION(ACCESS_FLAG_2),
> > +	PANTHOR_EXCEPTION(ACCESS_FLAG_3),
> > +	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_IN),
> > +	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT0),
> > +	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT1),
> > +	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT2),
> > +	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT3),
> > +	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_0),
> > +	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_1),
> > +	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_2),
> > +	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_3),
> > +};
> > +
> > +const char *panthor_exception_name(struct panthor_device *ptdev, u32 exception_code)
> > +{
> > +	if (drm_WARN_ON(&ptdev->base,  
> 
> I'm not convinced this should be a WARN_ON as I suspect it's probably
> possible to inject values from user space (although I'm not completely
> sure on that).

Normally no (it's something returned by the FW), unless userspace gets
access to the kernel <-> FW interface, which would be worrisome :-).

> It's certainly not a driver error as such if we can't
> decode the value.

Ack on dropping the WARN_ON().

> 
> > +			exception_code >= ARRAY_SIZE(panthor_exception_infos) ||
> > +			!panthor_exception_infos[exception_code].name))
> > +		return "Unknown exception type";
> > +
> > +	return panthor_exception_infos[exception_code].name;
> > +}
> > +
> > +static vm_fault_t panthor_mmio_vm_fault(struct vm_fault *vmf)
> > +{
> > +	struct vm_area_struct *vma = vmf->vma;
> > +	struct panthor_device *ptdev = vma->vm_private_data;
> > +	u64 id = vma->vm_pgoff << PAGE_SHIFT;
> > +	unsigned long pfn;
> > +	pgprot_t pgprot;
> > +	vm_fault_t ret;
> > +	bool active;
> > +	int cookie;
> > +
> > +	if (!drm_dev_enter(&ptdev->base, &cookie))
> > +		return VM_FAULT_SIGBUS;
> > +
> > +	mutex_lock(&ptdev->pm.lock);
> > +	active = atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE;
> > +
> > +	switch (id) {
> > +	case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
> > +		if (active)
> > +			pfn = __phys_to_pfn(ptdev->phys_addr + CSF_GPU_LATEST_FLUSH_ID);
> > +		else
> > +			pfn = virt_to_pfn(ptdev->pm.dummy_latest_flush);
> > +		break;
> > +
> > +	default:
> > +		ret = VM_FAULT_SIGBUS;
> > +		goto out_unlock;
> > +	}
> > +
> > +	pgprot = vma->vm_page_prot;
> > +	if (active)
> > +		pgprot = pgprot_noncached(pgprot);
> > +
> > +	ret = vmf_insert_pfn_prot(vma, vmf->address, pfn, pgprot);
> > +
> > +out_unlock:
> > +	mutex_unlock(&ptdev->pm.lock);
> > +	drm_dev_exit(cookie);
> > +	return ret;
> > +}
> > +
> > +static const struct vm_operations_struct panthor_mmio_vm_ops = {
> > +	.fault = panthor_mmio_vm_fault,
> > +};
> > +
> > +int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct *vma)
> > +{
> > +	u64 id = vma->vm_pgoff << PAGE_SHIFT;
> > +
> > +	switch (id) {
> > +	case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
> > +		if (vma->vm_end - vma->vm_start != PAGE_SIZE ||
> > +		    (vma->vm_flags & (VM_WRITE | VM_EXEC)))
> > +			return -EINVAL;
> > +
> > +		break;
> > +
> > +	default:
> > +		return -EINVAL;
> > +	}
> > +
> > +	/* Defer actual mapping to the fault handler. */
> > +	vma->vm_private_data = ptdev;
> > +	vma->vm_ops = &panthor_mmio_vm_ops;
> > +	vm_flags_set(vma,
> > +		     VM_IO | VM_DONTCOPY | VM_DONTEXPAND |
> > +		     VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP);
> > +	return 0;
> > +}
> > +
> > +#ifdef CONFIG_PM
> > +int panthor_device_resume(struct device *dev)
> > +{
> > +	struct panthor_device *ptdev = dev_get_drvdata(dev);
> > +	int ret, cookie;
> > +
> > +	mutex_lock(&ptdev->pm.lock);
> > +	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_RESUMING);
> > +
> > +	ret = clk_prepare_enable(ptdev->clks.core);
> > +	if (ret)
> > +		goto err_unlock;
> > +
> > +	ret = clk_prepare_enable(ptdev->clks.stacks);
> > +	if (ret)
> > +		goto err_disable_core_clk;
> > +
> > +	ret = clk_prepare_enable(ptdev->clks.coregroup);
> > +	if (ret)
> > +		goto err_disable_stacks_clk;
> > +
> > +	ret = panthor_devfreq_resume(ptdev);
> > +	if (ret)
> > +		goto err_disable_coregroup_clk;
> > +
> > +	if (panthor_device_is_initialized(ptdev) &&
> > +	    drm_dev_enter(&ptdev->base, &cookie)) {
> > +		panthor_gpu_resume(ptdev);
> > +		panthor_mmu_resume(ptdev);
> > +		ret = drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev));
> > +		if (!ret)
> > +			panthor_sched_resume(ptdev);
> > +
> > +		drm_dev_exit(cookie);
> > +
> > +		if (ret)
> > +			goto err_devfreq_suspend;
> > +	}
> > +
> > +	/* Clear all IOMEM mappings pointing to this device after we've
> > +	 * resumed. This way the fake mappings pointing to the dummy pages
> > +	 * are removed and the real iomem mapping will be restored on next
> > +	 * access.
> > +	 */
> > +	unmap_mapping_range(ptdev->base.anon_inode->i_mapping,
> > +			    DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
> > +	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE);  
> 
> Is the ordering here correct? I think we need to set ACTIVE before the
> unmap_mapping_range otherwise there is a (very small) race where user
> space could fault the page (and get the dummy mapping) before the
> atomic_set.

We take the pm.lock in panthor_mmio_vm_fault().

> 
> Hmm, actually we have the pm.lock, so no this isn't racy. In which case
> is there a good reason that you're using atomics? I can see two accesses
> which aren't protected by pm.lock:
> 
>   * the early out in panthor_device_suspend() - which could easily be
> moved inside the lock.

When we're in suspend() we are the one in control of the pm.state, so
no race expected here.

> 
>   * panthor_device_schedule_reset() - this looks racy (the power down
> could happen immediately after the atomic_read()), so I suspect it would
> be better moving the check into panthor_device_reset_work() and
> performing it with the pm.lock held.

I think the main reason for it being an atomic is because I didn't
have PM locking in the initial implementation, but I ended adding
locking at some point because I didn't really have choice. I thought
the race didn't exist because of the workqueue synchronization/work
cancellation that happens in panthor_sched_suspend(), but I see now
that it's not protecting us (thread queuing the job could be paused
just after checking the PM state and resumed after the suspend
happened). This being said, we might have a lock ordering issue if we
take the PM lock in that path (I need to check that).

> 
> > +	if (atomic_read(&ptdev->reset.pending))
> > +		queue_work(ptdev->reset.wq, &ptdev->reset.work);
> > +
> > +	mutex_unlock(&ptdev->pm.lock);
> > +	return 0;
> > +
> > +err_devfreq_suspend:
> > +	panthor_devfreq_suspend(ptdev);
> > +
> > +err_disable_coregroup_clk:
> > +	clk_disable_unprepare(ptdev->clks.coregroup);
> > +
> > +err_disable_stacks_clk:
> > +	clk_disable_unprepare(ptdev->clks.stacks);
> > +
> > +err_disable_core_clk:
> > +	clk_disable_unprepare(ptdev->clks.core);
> > +
> > +err_unlock:
> > +	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
> > +	mutex_unlock(&ptdev->pm.lock);
> > +	return ret;
> > +}
> > +
> > +int panthor_device_suspend(struct device *dev)
> > +{
> > +	struct panthor_device *ptdev = dev_get_drvdata(dev);
> > +	int ret, cookie;
> > +
> > +	if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE)
> > +		return 0;
> > +
> > +	mutex_lock(&ptdev->pm.lock);
> > +	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDING);
> > +
> > +	/* Clear all IOMEM mappings pointing to this device before we
> > +	 * shutdown the power-domain and clocks. Failing to do that results
> > +	 * in external aborts when the process accesses the iomem region.
> > +	 */
> > +	unmap_mapping_range(ptdev->base.anon_inode->i_mapping,
> > +			    DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
> > +
> > +	if (panthor_device_is_initialized(ptdev) &&
> > +	    drm_dev_enter(&ptdev->base, &cookie)) {
> > +		cancel_work_sync(&ptdev->reset.work);
> > +
> > +		/* We prepare everything as if we were resetting the GPU.
> > +		 * The end of the reset will happen in the resume path though.
> > +		 */
> > +		panthor_sched_suspend(ptdev);
> > +		panthor_fw_suspend(ptdev);
> > +		panthor_mmu_suspend(ptdev);
> > +		panthor_gpu_suspend(ptdev);
> > +		drm_dev_exit(cookie);
> > +	}
> > +
> > +	ret = panthor_devfreq_suspend(ptdev);
> > +	if (ret) {
> > +		if (panthor_device_is_initialized(ptdev) &&
> > +		    drm_dev_enter(&ptdev->base, &cookie)) {
> > +			panthor_gpu_resume(ptdev);
> > +			panthor_mmu_resume(ptdev);
> > +			drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev));
> > +			panthor_sched_resume(ptdev);
> > +			drm_dev_exit(cookie);
> > +		}
> > +
> > +		atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE);
> > +		goto out_unlock;
> > +	}
> > +
> > +	/* Before we suspend, update the dummy_latest_flush page, so accesses
> > +	 * to this dummy page return the value the HW would have returned.
> > +	 */
> > +	*ptdev->pm.dummy_latest_flush = gpu_read(ptdev, CSF_GPU_LATEST_FLUSH_ID);  
> 
> As above, I don't believe it is important for user space to know the
> value the HW would have returned during a suspend. Indeed if the
> hardware was successfully suspended the flush ID is likely to be reset -
> so this would be inaccurate. However any value should be safe if the
> work was prepared while the GPU was off as the caches will be empty.

Agreed.

> 
> > +
> > +	clk_disable_unprepare(ptdev->clks.coregroup);
> > +	clk_disable_unprepare(ptdev->clks.stacks);
> > +	clk_disable_unprepare(ptdev->clks.core);
> > +	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
> > +
> > +out_unlock:
> > +	mutex_unlock(&ptdev->pm.lock);
> > +	return ret;
> > +}
> > +#endif
> > diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
> > new file mode 100644
> > index 000000000000..e0e1be263eb9
> > --- /dev/null
> > +++ b/drivers/gpu/drm/panthor/panthor_device.h
> > @@ -0,0 +1,354 @@
> > +/* SPDX-License-Identifier: GPL-2.0 or MIT */
> > +/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
> > +/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
> > +/* Copyright 2023 Collabora ltd. */
> > +
> > +#ifndef __PANTHOR_DEVICE_H__
> > +#define __PANTHOR_DEVICE_H__
> > +
> > +#include <linux/atomic.h>
> > +#include <linux/io-pgtable.h>
> > +#include <linux/regulator/consumer.h>
> > +#include <linux/spinlock.h>
> > +#include <drm/drm_device.h>
> > +#include <drm/drm_mm.h>
> > +#include <drm/gpu_scheduler.h>
> > +#include <drm/panthor_drm.h>
> > +
> > +struct panthor_csf;
> > +struct panthor_csf_ctx;
> > +struct panthor_device;
> > +struct panthor_gpu;
> > +struct panthor_group_pool;
> > +struct panthor_heap_pool;
> > +struct panthor_job;
> > +struct panthor_mmu;
> > +struct panthor_fw;
> > +struct panthor_perfcnt;
> > +struct panthor_vm;
> > +struct panthor_vm_pool;
> > +
> > +/**
> > + * enum panthor_device_pm_state - PM state
> > + */
> > +enum panthor_device_pm_state {
> > +	/** @PANTHOR_DEVICE_PM_STATE_SUSPENDED: Device is suspended. */
> > +	PANTHOR_DEVICE_PM_STATE_SUSPENDED = 0,
> > +
> > +	/** @PANTHOR_DEVICE_PM_STATE_RESUMING: Device is being resumed. */
> > +	PANTHOR_DEVICE_PM_STATE_RESUMING,
> > +
> > +	/** @PANTHOR_DEVICE_PM_STATE_ACTIVE: Device is active. */
> > +	PANTHOR_DEVICE_PM_STATE_ACTIVE,
> > +
> > +	/** @PANTHOR_DEVICE_PM_STATE_SUSPENDING: Device is being suspended. */
> > +	PANTHOR_DEVICE_PM_STATE_SUSPENDING,
> > +};
> > +
> > +/**
> > + * struct panthor_irq - IRQ data
> > + *
> > + * Used to automate IRQ handling for the 3 different IRQs we have in this driver.
> > + */
> > +struct panthor_irq {
> > +	/** @ptdev: Panthor device */
> > +	struct panthor_device *ptdev;
> > +
> > +	/** @irq: IRQ number. */
> > +	int irq;
> > +
> > +	/** @mask: Current mask being applied to xxx_INT_MASK. */
> > +	u32 mask;
> > +
> > +	/** @suspended: Set to true when the IRQ is suspended. */
> > +	atomic_t suspended;
> > +};
> > +
> > +/**
> > + * struct panthor_device - Panthor device
> > + */
> > +struct panthor_device {
> > +	/** @base: Base drm_device. */
> > +	struct drm_device base;
> > +
> > +	/** @phys_addr: Physical address of the iomem region. */
> > +	phys_addr_t phys_addr;
> > +
> > +	/** @iomem: CPU mapping of the IOMEM region. */
> > +	void __iomem *iomem;
> > +
> > +	/** @clks: GPU clocks. */
> > +	struct {
> > +		/** @core: Core clock. */
> > +		struct clk *core;
> > +
> > +		/** @stacks: Stacks clock. This clock is optional. */
> > +		struct clk *stacks;
> > +
> > +		/** @coregroup: Core group clock. This clock is optional. */
> > +		struct clk *coregroup;
> > +	} clks;
> > +
> > +	/** @coherent: True if the CPU/GPU are memory coherent. */
> > +	bool coherent;
> > +
> > +	/** @gpu_info: GPU information. */
> > +	struct drm_panthor_gpu_info gpu_info;
> > +
> > +	/** @csif_info: Command stream interface information. */
> > +	struct drm_panthor_csif_info csif_info;
> > +
> > +	/** @gpu: GPU management data. */
> > +	struct panthor_gpu *gpu;
> > +
> > +	/** @fw: FW management data. */
> > +	struct panthor_fw *fw;
> > +
> > +	/** @mmu: MMU management data. */
> > +	struct panthor_mmu *mmu;
> > +
> > +	/** @scheduler: Scheduler management data. */
> > +	struct panthor_scheduler *scheduler;
> > +
> > +	/** @devfreq: Device frequency scaling management data. */
> > +	struct panthor_devfreq *devfreq;
> > +
> > +	/** @reset: Reset related fields. */
> > +	struct {
> > +		/** @wq: Ordered worqueud used to schedule reset operations. */
> > +		struct workqueue_struct *wq;
> > +
> > +		/** @work: Reset work. */
> > +		struct work_struct work;
> > +
> > +		/** @pending: Set to true if a reset is pending. */
> > +		atomic_t pending;
> > +	} reset;
> > +
> > +	/** @pm: Power management related data. */
> > +	struct {
> > +		/** @state: Power state, see panthor_device_pm_state. */
> > +		atomic_t state;
> > +
> > +		/**
> > +		 * @lock: Lock protecting the suspend/resume operations.
> > +		 *
> > +		 * This is needed to ensure we map the dummy IO pages when
> > +		 * the device is being suspended, and the real IO pages when
> > +		 * the device is being resumed. We can't just do with the
> > +		 * state atomicity to deal with this race.
> > +		 */
> > +		struct mutex lock;
> > +
> > +		/**
> > +		 * @dummy_latest_flush: Dummy LATEST_FLUSH page.
> > +		 *
> > +		 * Used to replace the real LATEST_FLUSH page when the GPU
> > +		 * is suspended.
> > +		 */
> > +		u32 *dummy_latest_flush;
> > +	} pm;
> > +};
> > +
> > +/**
> > + * struct panthor_file - Panthor file
> > + */
> > +struct panthor_file {
> > +	/** @ptdev: Device attached to this file. */
> > +	struct panthor_device *ptdev;
> > +
> > +	/** @vms: VM pool attached to this file. */
> > +	struct panthor_vm_pool *vms;
> > +
> > +	/** @groups: Scheduling group pool attached to this file. */
> > +	struct panthor_group_pool *groups;
> > +};
> > +
> > +int panthor_device_init(struct panthor_device *ptdev);
> > +void panthor_device_unplug(struct panthor_device *ptdev);
> > +
> > +/**
> > + * panthor_device_schedule_reset() - Schedules a reset operation
> > + */
> > +static inline void panthor_device_schedule_reset(struct panthor_device *ptdev)
> > +{
> > +	if (atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE &&  
> 
> As above - this is a racy check. Although it might be safe because of
> the cancel_work_sync() call in panthor_device_suspend(). But if we get
> rid of this check we don't need the atomic variable.

As mentioned above, I don't think cancel_work_sync() solves the race,
because the check might be done just before suspend, the thread paused
and resumed after cancel_work_sync() was called in the suspend path.

> 
> > +	    !atomic_cmpxchg(&ptdev->reset.pending, 0, 1))
> > +		queue_work(ptdev->reset.wq, &ptdev->reset.work);
> > +}
> > +
> > +/**
> > + * panthor_device_reset_is_pending() - Checks if a reset is pending.
> > + *
> > + * Return: true if a reset is pending, false otherwise.
> > + */
> > +static inline bool panthor_device_reset_is_pending(struct panthor_device *ptdev)
> > +{
> > +	return atomic_read(&ptdev->reset.pending) != 0;
> > +}
> > +
> > +int panthor_device_mmap_io(struct panthor_device *ptdev,
> > +			   struct vm_area_struct *vma);
> > +
> > +int panthor_device_resume(struct device *dev);
> > +int panthor_device_suspend(struct device *dev);
> > +
> > +enum drm_panthor_exception_type {
> > +	DRM_PANTHOR_EXCEPTION_OK = 0x00,
> > +	DRM_PANTHOR_EXCEPTION_TERMINATED = 0x04,
> > +	DRM_PANTHOR_EXCEPTION_KABOOM = 0x05,
> > +	DRM_PANTHOR_EXCEPTION_EUREKA = 0x06,
> > +	DRM_PANTHOR_EXCEPTION_ACTIVE = 0x08,
> > +	DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f,
> > +	DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f,
> > +	DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40,
> > +	DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44,
> > +	DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48,
> > +	DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49,
> > +	DRM_PANTHOR_EXCEPTION_CS_CALL_STACK_OVERFLOW = 0x4a,
> > +	DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT = 0x4b,
> > +	DRM_PANTHOR_EXCEPTION_INSTR_INVALID_PC = 0x50,
> > +	DRM_PANTHOR_EXCEPTION_INSTR_INVALID_ENC = 0x51,
> > +	DRM_PANTHOR_EXCEPTION_INSTR_BARRIER_FAULT = 0x55,
> > +	DRM_PANTHOR_EXCEPTION_DATA_INVALID_FAULT = 0x58,
> > +	DRM_PANTHOR_EXCEPTION_TILE_RANGE_FAULT = 0x59,
> > +	DRM_PANTHOR_EXCEPTION_ADDR_RANGE_FAULT = 0x5a,
> > +	DRM_PANTHOR_EXCEPTION_IMPRECISE_FAULT = 0x5b,
> > +	DRM_PANTHOR_EXCEPTION_OOM = 0x60,
> > +	DRM_PANTHOR_EXCEPTION_CSF_FW_INTERNAL_ERROR = 0x68,
> > +	DRM_PANTHOR_EXCEPTION_CSF_RES_EVICTION_TIMEOUT = 0x69,
> > +	DRM_PANTHOR_EXCEPTION_GPU_BUS_FAULT = 0x80,
> > +	DRM_PANTHOR_EXCEPTION_GPU_SHAREABILITY_FAULT = 0x88,
> > +	DRM_PANTHOR_EXCEPTION_SYS_SHAREABILITY_FAULT = 0x89,
> > +	DRM_PANTHOR_EXCEPTION_GPU_CACHEABILITY_FAULT = 0x8a,
> > +	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_0 = 0xc0,
> > +	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_1 = 0xc1,
> > +	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_2 = 0xc2,
> > +	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_3 = 0xc3,
> > +	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_4 = 0xc4,
> > +	DRM_PANTHOR_EXCEPTION_PERM_FAULT_0 = 0xc8,
> > +	DRM_PANTHOR_EXCEPTION_PERM_FAULT_1 = 0xc9,
> > +	DRM_PANTHOR_EXCEPTION_PERM_FAULT_2 = 0xca,
> > +	DRM_PANTHOR_EXCEPTION_PERM_FAULT_3 = 0xcb,
> > +	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_1 = 0xd9,
> > +	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_2 = 0xda,
> > +	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_3 = 0xdb,
> > +	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_IN = 0xe0,
> > +	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT0 = 0xe4,
> > +	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT1 = 0xe5,
> > +	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT2 = 0xe6,
> > +	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT3 = 0xe7,
> > +	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_0 = 0xe8,
> > +	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_1 = 0xe9,
> > +	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_2 = 0xea,
> > +	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_3 = 0xeb,
> > +};
> > +
> > +/**
> > + * panthor_exception_is_fault() - Checks if an exception is a fault.
> > + *
> > + * Return: true if the exception is a fault, false otherwise.
> > + */
> > +static inline bool
> > +panthor_exception_is_fault(u32 exception_code)
> > +{
> > +	return exception_code > DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT;
> > +}
> > +
> > +const char *panthor_exception_name(struct panthor_device *ptdev,
> > +				   u32 exception_code);
> > +
> > +/**
> > + * PANTHOR_IRQ_HANDLER() - Define interrupt handlers and the interrupt
> > + * registration function.
> > + *
> > + * The boiler-plate to gracefully deal with shared interrupts is
> > + * auto-generated. All you have to do is call PANTHOR_IRQ_HANDLER()
> > + * just after you actual handler. The handler prototype is:  
> s/you/your/ or probably s/you/the/ since we don't expect people to be
> adding more ;)
> 
> > + *
> > + * void (*handler)(struct panthor_device *, u32 status);
> > + */
> > +#define PANTHOR_IRQ_HANDLER(__name, __reg_prefix, __handler)					\
> > +static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data)			\
> > +{												\
> > +	struct panthor_irq *pirq = data;							\
> > +	struct panthor_device *ptdev = pirq->ptdev;						\  
> 
> Maybe I'm missing something, but I was expecting a check here for if the
> irq has been suspended and to avoid the register reads if it was.

Thought the INT_MASK=0 + synchronize_irq() in panthor_xxx_irq_suspend()
would guarantee that the handler can't be called after
panthor_xxx_irq_suspend() was called.

> Otherwise I'm not entirely sure I follow what all this code is for.

Not entirely sure which code we're talking about. The reason we
don't use the default raw IRQ handler is because it doesn't work if the
irq line is shared. In that case, we need to mask all interrupts to
make sure other handlers on the same irq line don't get spammed with
our IRQs.

> 
> Steve
> 
> > +												\
> > +	if (!gpu_read(ptdev, __reg_prefix ## _INT_STAT))					\
> > +		return IRQ_NONE;								\
> > +												\
> > +	gpu_write(ptdev, __reg_prefix ## _INT_MASK, 0);						\
> > +	return IRQ_WAKE_THREAD;									\
> > +}												\
> > +												\
> > +static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *data)		\
> > +{												\
> > +	struct panthor_irq *pirq = data;							\
> > +	struct panthor_device *ptdev = pirq->ptdev;						\
> > +	irqreturn_t ret = IRQ_NONE;								\
> > +												\
> > +	while (true) {										\
> > +		u32 status = gpu_read(ptdev, __reg_prefix ## _INT_RAWSTAT) & pirq->mask;	\
> > +												\
> > +		if (!status)									\
> > +			break;									\
> > +												\
> > +		gpu_write(ptdev, __reg_prefix ## _INT_CLEAR, status);				\
> > +												\
> > +		__handler(ptdev, status);							\
> > +		ret = IRQ_HANDLED;								\
> > +	}											\
> > +												\
> > +	if (!atomic_read(&pirq->suspended))							\
> > +		gpu_write(ptdev, __reg_prefix ## _INT_MASK, pirq->mask);			\
> > +												\
> > +	return ret;										\
> > +}												\
> > +												\
> > +static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq)			\
> > +{												\
> > +	int cookie;										\
> > +												\
> > +	atomic_set(&pirq->suspended, true);							\
> > +												\
> > +	if (drm_dev_enter(&pirq->ptdev->base, &cookie)) {					\
> > +		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0);				\
> > +		synchronize_irq(pirq->irq);							\
> > +		drm_dev_exit(cookie);								\
> > +	}											\
> > +												\
> > +	pirq->mask = 0;										\
> > +}												\
> > +												\
> > +static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask)	\
> > +{												\
> > +	int cookie;										\
> > +												\
> > +	atomic_set(&pirq->suspended, false);							\
> > +	pirq->mask = mask;									\
> > +												\
> > +	if (drm_dev_enter(&pirq->ptdev->base, &cookie)) {					\
> > +		gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask);			\
> > +		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, mask);			\
> > +		drm_dev_exit(cookie);								\
> > +	}											\
> > +}												\
> > +												\
> > +static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev,			\
> > +					      struct panthor_irq *pirq,				\
> > +					      int irq, u32 mask)				\
> > +{												\
> > +	pirq->ptdev = ptdev;									\
> > +	pirq->irq = irq;									\
> > +	panthor_ ## __name ## _irq_resume(pirq, mask);						\
> > +												\
> > +	return devm_request_threaded_irq(ptdev->base.dev, irq,					\
> > +					 panthor_ ## __name ## _irq_raw_handler,		\
> > +					 panthor_ ## __name ## _irq_threaded_handler,		\
> > +					 IRQF_SHARED, KBUILD_MODNAME "-" # __name,		\
> > +					 pirq);							\
> > +}
> > +
> > +extern struct workqueue_struct *panthor_cleanup_wq;
> > +
> > +#endif  
>
Steven Price Aug. 30, 2023, 1:17 p.m. UTC | #3
On 29/08/2023 15:00, Boris Brezillon wrote:
> On Fri, 11 Aug 2023 16:47:56 +0100
> Steven Price <steven.price@arm.com> wrote:
> 
>> On 09/08/2023 17:53, Boris Brezillon wrote:
>>> The panthor driver is designed in a modular way, where each logical
>>> block is dealing with a specific HW-block or software feature. In order
>>> for those blocks to communicate with each other, we need a central
>>> panthor_device collecting all the blocks, and exposing some common
>>> features, like interrupt handling, power management, reset, ...
>>>
>>> This what this panthor_device logical block is about.
>>>
>>> v2:
>>> - Rename the driver (pancsf -> panthor)
>>> - Change the license (GPL2 -> MIT + GPL2)
>>> - Split the driver addition commit
>>> - Add devfreq/PM support
>>> - Use drm_dev_{unplug,enter,exit}() to provide safe device removal
>>>
>>> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
>>> ---
>>>  drivers/gpu/drm/panthor/panthor_device.c | 479 +++++++++++++++++++++++
>>>  drivers/gpu/drm/panthor/panthor_device.h | 354 +++++++++++++++++
>>>  2 files changed, 833 insertions(+)
>>>  create mode 100644 drivers/gpu/drm/panthor/panthor_device.c
>>>  create mode 100644 drivers/gpu/drm/panthor/panthor_device.h
>>>
>>> diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
>>> new file mode 100644
>>> index 000000000000..15f102116fa0
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/panthor/panthor_device.c
>>> @@ -0,0 +1,479 @@
>>> +// SPDX-License-Identifier: GPL-2.0 or MIT
>>> +/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
>>> +/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
>>> +/* Copyright 2023 Collabora ltd. */
>>> +
>>> +#include <linux/clk.h>
>>> +#include <linux/reset.h>
>>> +#include <linux/platform_device.h>
>>> +#include <linux/pm_domain.h>
>>> +#include <linux/pm_runtime.h>
>>> +#include <linux/regulator/consumer.h>
>>> +
>>> +#include <drm/drm_drv.h>
>>> +#include <drm/drm_managed.h>
>>> +
>>> +#include "panthor_sched.h"
>>> +#include "panthor_device.h"
>>> +#include "panthor_devfreq.h"
>>> +#include "panthor_gpu.h"
>>> +#include "panthor_fw.h"
>>> +#include "panthor_mmu.h"
>>> +#include "panthor_regs.h"
>>> +
>>> +static int panthor_clk_init(struct panthor_device *ptdev)
>>> +{
>>> +	ptdev->clks.core = devm_clk_get(ptdev->base.dev, NULL);
>>> +	if (IS_ERR(ptdev->clks.core)) {
>>> +		drm_err(&ptdev->base, "get 'core' clock failed %ld\n",
>>> +			PTR_ERR(ptdev->clks.core));  
>>
>> I suspect it would be a good idea to use dev_err_probe() here (and
>> below) as I believe devm_clk_get can return -EPROBE_DEFER.
> 
> Nice, didn't know there was a logging function that was silencing
> probe-defer errors.
> 
>>
>>> +		return PTR_ERR(ptdev->clks.core);
>>> +	}
>>> +
>>> +	ptdev->clks.stacks = devm_clk_get_optional(ptdev->base.dev, "stacks");
>>> +	if (IS_ERR(ptdev->clks.stacks)) {
>>> +		drm_err(&ptdev->base, "get 'stacks' clock failed %ld\n",
>>> +			PTR_ERR(ptdev->clks.stacks));
>>> +		return PTR_ERR(ptdev->clks.stacks);
>>> +	}
>>> +
>>> +	ptdev->clks.coregroup = devm_clk_get_optional(ptdev->base.dev, "coregroup");
>>> +	if (IS_ERR(ptdev->clks.coregroup)) {
>>> +		drm_err(&ptdev->base, "get 'coregroup' clock failed %ld\n",
>>> +			PTR_ERR(ptdev->clks.coregroup));
>>> +		return PTR_ERR(ptdev->clks.coregroup);
>>> +	}
>>> +
>>> +	drm_info(&ptdev->base, "clock rate = %lu\n", clk_get_rate(ptdev->clks.core));
>>> +	return 0;
>>> +}
>>> +
>>> +void panthor_device_unplug(struct panthor_device *ptdev)
>>> +{
>>> +	/* FIXME: This is racy. */  
>>
>> Can we fix this? From a quick look it seems like a sequence like below
>> should avoid the race.
>>
>> 	if (!drm_dev_enter())
>> 		/* Already unplugged */
>> 		return;
>> 	ptdev->base.unplugged = true;
>> 	drm_dev_exit();
>>
>> Although possibly that should be in the DRM core rather than open-coded
>> here.
> 
> Are you sure that's protecting us against two concurrent calls to
> drm_dev_unplug() (drm_dev_enter() is taking a read-lock)?

Well now I'm not sure ;) This was based on the implementations of
drm_dev_is_unplugged() and drm_dev_unplug(). drm_dev_is_unplugged()
simply tries to enter then exit. drm_dev_unplug() sets dev->unplugged
(without first taking any locks). So my naïve combination resulted in
the above.

The part I was missing is the synchronize_srcu() call in
drm_dev_unplug() is what matches up with the read lock in drm_dev_enter().

> And that's not
> the only thing I need actually. If there are 2 threads entering
> panthor_device_unplug(), I need to make sure the one who losts (arrived
> after unplugged was set to false) is waiting for all operations after
> the drm_dev_unplug() call to be done, otherwise we might return from
> platform_driver->remove() before the unplug cleanups are done, and
> there might still be threads/workqueues accessing device resources
> while/after they get released by the device-model.

I can't figure out how to do this other than adding a new atomic status
bit into panthor. So something like:

	if (!drm_dev_enter())
		/* Already unplugged */
		return;

	if (atomic_cmpxchg(&unplugging, false, true)) {
		/* Racing with another thread */
		drm_dev_exit();
		/* Wait for other threads to exit */
		synchronize_srcu(&drm_unplug_srcu);
		return;
	}

	panthor_xxx_unplug()

	drm_dev_exit();

Or at least I think that might work. The need to synchronize with
drm_unplug_srcu means this really needs a new helper in drm_drv.c.

>>
>>> +	if (drm_dev_is_unplugged(&ptdev->base))
>>> +		return;
>>> +
>>> +	drm_WARN_ON(&ptdev->base, pm_runtime_get_sync(ptdev->base.dev) < 0);
>>> +
>>> +	/* Call drm_dev_unplug() so any access to HW block happening after
>>> +	 * that point get rejected.
>>> +	 */
>>> +	drm_dev_unplug(&ptdev->base);
>>> +
>>> +	/* Now, try to cleanly shutdown the GPU before the device resources
>>> +	 * get reclaimed.
>>> +	 */
>>> +	panthor_sched_unplug(ptdev);
>>> +	panthor_fw_unplug(ptdev);
>>> +	panthor_mmu_unplug(ptdev);
>>> +	panthor_gpu_unplug(ptdev);
>>> +
>>> +	pm_runtime_dont_use_autosuspend(ptdev->base.dev);
>>> +	pm_runtime_put_sync_suspend(ptdev->base.dev);
>>> +}
>>> +
>>> +static void panthor_device_reset_cleanup(struct drm_device *ddev, void *data)
>>> +{
>>> +	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
>>> +
>>> +	cancel_work_sync(&ptdev->reset.work);
>>> +	destroy_workqueue(ptdev->reset.wq);
>>> +}
>>> +
>>> +static void panthor_device_reset_work(struct work_struct *work)
>>> +{
>>> +	struct panthor_device *ptdev = container_of(work, struct panthor_device, reset.work);
>>> +	int ret, cookie;
>>> +
>>> +	if (!drm_dev_enter(&ptdev->base, &cookie))
>>> +		return;
>>> +
>>> +	panthor_sched_pre_reset(ptdev);
>>> +	panthor_fw_pre_reset(ptdev, true);
>>> +	panthor_mmu_pre_reset(ptdev);
>>> +	panthor_gpu_soft_reset(ptdev);
>>> +	panthor_gpu_l2_power_on(ptdev);
>>> +	panthor_mmu_post_reset(ptdev);
>>> +	ret = panthor_fw_post_reset(ptdev);
>>> +	if (ret)
>>> +		goto out;
>>> +
>>> +	atomic_set(&ptdev->reset.pending, 0);
>>> +	panthor_sched_post_reset(ptdev);
>>> +	drm_dev_exit(cookie);
>>> +
>>> +out:
>>> +	if (ret) {  
>>
>> This looks like a race condition too - is there a need for a
>> drm_dev_exit_and_unplug() function?
> 
> drm_dev_exit() is just releasing the read-lock. drm_dev_unplug()
> waits for all readers to be done and sets the unplugged value to true.
> So we only get readers/writer synchronization here, but nothing doing
> writer/writer sync. I guess the drm core leaves that to drivers, given
> drm_dev_unplug() is usually called from xxx_driver->remove() hook, on
> which serialization is guaranteed by the device-model.
> 
> TLDR; yes, it's racy, but I don't think drm_dev_exit_and_unplug() would
> help solve the existing race.

Yeah, I hadn't really thought through the reader/writer locks.

> It's worth noting that we currently have only 2 paths calling
> panthor_device_unplug(): the platform_driver->remove() hook and the
> reset worker. Calling drm_dev_unplug() might not be the right thing to
> do, I just thought it was a good match to reflect the fact the device
> becomes inaccessible, without adding yet another kind of device-lost
> field.

I quite liked the unplugged approach, it hides the complexities of the
GPU breaking nicely.

However I do think this path needs fixing in some way, because of the
"goto out" we end up calling panthor_device_unplug() while in the
drm_dev_enter() section. Which, unless I'm mistaken, means
panthor_device_unplug() will call drm_dev_unplug() in that section -
which should produce a lockdep warning at the very least, if not an
actual deadlock.

Given it's only a read lock - I think simply moving drm_dev_exit() below
the "out:" label fixes the deadlock without making any races worse.
Whether the race here actually matters I'm not sure.

>>
>>> +		panthor_device_unplug(ptdev);
>>> +		drm_err(&ptdev->base, "Failed to boot MCU after reset, making device unusable.");
>>> +	}
>>> +}
>>> +
>>> +static bool panthor_device_is_initialized(struct panthor_device *ptdev)
>>> +{
>>> +	return !!ptdev->scheduler;
>>> +}
>>> +
>>> +static void panthor_device_free_page(struct drm_device *ddev, void *data)
>>> +{
>>> +	free_page((unsigned long)data);
>>> +}
>>> +
>>> +int panthor_device_init(struct panthor_device *ptdev)
>>> +{
>>> +	struct resource *res;
>>> +	struct page *p;
>>> +	int ret;
>>> +
>>> +	ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT;
>>> +
>>> +	drmm_mutex_init(&ptdev->base, &ptdev->pm.lock);
>>> +	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
>>> +	p = alloc_page(GFP_KERNEL | __GFP_ZERO);
>>> +	if (!p)
>>> +		return -ENOMEM;
>>> +
>>> +	ptdev->pm.dummy_latest_flush = page_address(p);
>>> +	ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_free_page,
>>> +				       ptdev->pm.dummy_latest_flush);
>>> +	if (ret)
>>> +		return ret;
>>> +
>>> +	/* Set the dummy page to the default LATEST_FLUSH value. This
>>> +	 * will be updated on the next suspend.
>>> +	 */
>>> +	*ptdev->pm.dummy_latest_flush = CSF_GPU_LATEST_FLUSH_ID_DEFAULT;  
>>
>> I see why this register default value was defined. Although I'm not sure
>> it has any benefit over just using zero... If the GPU is off when user
>> space reads the FLUSH_ID then the GPU's caches are definitely empty so
>> any flush ID is valid.
> 
> Zero means we'll force a cache flush for all CS that were created while
> the device was suspended, that's not ideal.
> 
>>
>> Interestingly looking at kbase it seems to use an initial value of 1
>> (POWER_DOWN_LATEST_FLUSH_VALUE). I guess zero is less ideal because
>> FLUSH_CACHE2 would then unconditionally do a flush.
> 
> I guess a value of 1 would work. It just means we'll get a spurious
> flush if the CS is submitted after 32 flushes happened, on the other
> hand we also a spurious flush on the first submitted CS when we use
> POWER_DOWN_LATEST_FLUSH_VALUE. I'll switch to 1, drop the default def,
> and update the comment accordingly.

Yeah, matching kbase is almost certainly the safest approach ;) Sorry, I
was reviewing the patches mostly in order and this looked really odd
until I started digging into it. Zero is clearly not the ideal value,
but the reset value is also just a weird value for hardware validation
(it enables easier checking of the wrap condition). Since kbase picks 1,
that must be a value which works well!

>>
>>> +
>>> +	INIT_WORK(&ptdev->reset.work, panthor_device_reset_work);
>>> +	ptdev->reset.wq = alloc_ordered_workqueue("panthor-reset-wq", 0);
>>> +	if (!ptdev->reset.wq)
>>> +		return -ENOMEM;
>>> +
>>> +	ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_reset_cleanup, NULL);
>>> +	if (ret)
>>> +		return ret;
>>> +
>>> +	ret = panthor_clk_init(ptdev);
>>> +	if (ret)
>>> +		return ret;
>>> +
>>> +	ret = panthor_devfreq_init(ptdev);
>>> +	if (ret)
>>> +		return ret;
>>> +
>>> +	ptdev->iomem = devm_platform_get_and_ioremap_resource(to_platform_device(ptdev->base.dev),
>>> +							      0, &res);
>>> +	if (IS_ERR(ptdev->iomem))
>>> +		return PTR_ERR(ptdev->iomem);
>>> +
>>> +	ptdev->phys_addr = res->start;
>>> +
>>> +	ret = devm_pm_runtime_enable(ptdev->base.dev);
>>> +	if (ret)
>>> +		return ret;
>>> +
>>> +	ret = pm_runtime_resume_and_get(ptdev->base.dev);
>>> +	if (ret)
>>> +		return ret;
>>> +
>>> +	ret = panthor_gpu_init(ptdev);
>>> +	if (ret)
>>> +		goto err_rpm_put;
>>> +
>>> +	ret = panthor_mmu_init(ptdev);
>>> +	if (ret)
>>> +		goto err_rpm_put;
>>> +
>>> +	ret = panthor_fw_init(ptdev);
>>> +	if (ret)
>>> +		goto err_rpm_put;
>>> +
>>> +	ret = panthor_sched_init(ptdev);
>>> +	if (ret)
>>> +		goto err_rpm_put;
>>> +
>>> +	/* ~3 frames */
>>> +	pm_runtime_set_autosuspend_delay(ptdev->base.dev, 50);
>>> +	pm_runtime_use_autosuspend(ptdev->base.dev);
>>> +	pm_runtime_put_autosuspend(ptdev->base.dev);
>>> +	return 0;
>>> +
>>> +err_rpm_put:
>>> +	pm_runtime_put_sync_suspend(ptdev->base.dev);
>>> +	return ret;
>>> +}
>>> +
>>> +#define PANTHOR_EXCEPTION(id) \
>>> +	[DRM_PANTHOR_EXCEPTION_ ## id] = { \
>>> +		.name = #id, \
>>> +	}
>>> +
>>> +struct panthor_exception_info {
>>> +	const char *name;
>>> +};
>>> +
>>> +static const struct panthor_exception_info panthor_exception_infos[] = {
>>> +	PANTHOR_EXCEPTION(OK),
>>> +	PANTHOR_EXCEPTION(TERMINATED),
>>> +	PANTHOR_EXCEPTION(KABOOM),
>>> +	PANTHOR_EXCEPTION(EUREKA),
>>> +	PANTHOR_EXCEPTION(ACTIVE),
>>> +	PANTHOR_EXCEPTION(CS_RES_TERM),
>>> +	PANTHOR_EXCEPTION(CS_CONFIG_FAULT),
>>> +	PANTHOR_EXCEPTION(CS_ENDPOINT_FAULT),
>>> +	PANTHOR_EXCEPTION(CS_BUS_FAULT),
>>> +	PANTHOR_EXCEPTION(CS_INSTR_INVALID),
>>> +	PANTHOR_EXCEPTION(CS_CALL_STACK_OVERFLOW),
>>> +	PANTHOR_EXCEPTION(CS_INHERIT_FAULT),
>>> +	PANTHOR_EXCEPTION(INSTR_INVALID_PC),
>>> +	PANTHOR_EXCEPTION(INSTR_INVALID_ENC),
>>> +	PANTHOR_EXCEPTION(INSTR_BARRIER_FAULT),
>>> +	PANTHOR_EXCEPTION(DATA_INVALID_FAULT),
>>> +	PANTHOR_EXCEPTION(TILE_RANGE_FAULT),
>>> +	PANTHOR_EXCEPTION(ADDR_RANGE_FAULT),
>>> +	PANTHOR_EXCEPTION(IMPRECISE_FAULT),
>>> +	PANTHOR_EXCEPTION(OOM),
>>> +	PANTHOR_EXCEPTION(CSF_FW_INTERNAL_ERROR),
>>> +	PANTHOR_EXCEPTION(CSF_RES_EVICTION_TIMEOUT),
>>> +	PANTHOR_EXCEPTION(GPU_BUS_FAULT),
>>> +	PANTHOR_EXCEPTION(GPU_SHAREABILITY_FAULT),
>>> +	PANTHOR_EXCEPTION(SYS_SHAREABILITY_FAULT),
>>> +	PANTHOR_EXCEPTION(GPU_CACHEABILITY_FAULT),
>>> +	PANTHOR_EXCEPTION(TRANSLATION_FAULT_0),
>>> +	PANTHOR_EXCEPTION(TRANSLATION_FAULT_1),
>>> +	PANTHOR_EXCEPTION(TRANSLATION_FAULT_2),
>>> +	PANTHOR_EXCEPTION(TRANSLATION_FAULT_3),
>>> +	PANTHOR_EXCEPTION(TRANSLATION_FAULT_4),
>>> +	PANTHOR_EXCEPTION(PERM_FAULT_0),
>>> +	PANTHOR_EXCEPTION(PERM_FAULT_1),
>>> +	PANTHOR_EXCEPTION(PERM_FAULT_2),
>>> +	PANTHOR_EXCEPTION(PERM_FAULT_3),
>>> +	PANTHOR_EXCEPTION(ACCESS_FLAG_1),
>>> +	PANTHOR_EXCEPTION(ACCESS_FLAG_2),
>>> +	PANTHOR_EXCEPTION(ACCESS_FLAG_3),
>>> +	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_IN),
>>> +	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT0),
>>> +	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT1),
>>> +	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT2),
>>> +	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT3),
>>> +	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_0),
>>> +	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_1),
>>> +	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_2),
>>> +	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_3),
>>> +};
>>> +
>>> +const char *panthor_exception_name(struct panthor_device *ptdev, u32 exception_code)
>>> +{
>>> +	if (drm_WARN_ON(&ptdev->base,  
>>
>> I'm not convinced this should be a WARN_ON as I suspect it's probably
>> possible to inject values from user space (although I'm not completely
>> sure on that).
> 
> Normally no (it's something returned by the FW), unless userspace gets
> access to the kernel <-> FW interface, which would be worrisome :-).

I've no idea if it's actually possible, but it feels like it should be
possible to create a firmware synchronisation object with an error code
chosen by the user and possibly then propagate that error code back to
the kernel. It's certainly not trivial though. Either way the WARN is
unnecessary.

>> It's certainly not a driver error as such if we can't
>> decode the value.
> 
> Ack on dropping the WARN_ON().
> 
>>
>>> +			exception_code >= ARRAY_SIZE(panthor_exception_infos) ||
>>> +			!panthor_exception_infos[exception_code].name))
>>> +		return "Unknown exception type";
>>> +
>>> +	return panthor_exception_infos[exception_code].name;
>>> +}
>>> +
>>> +static vm_fault_t panthor_mmio_vm_fault(struct vm_fault *vmf)
>>> +{
>>> +	struct vm_area_struct *vma = vmf->vma;
>>> +	struct panthor_device *ptdev = vma->vm_private_data;
>>> +	u64 id = vma->vm_pgoff << PAGE_SHIFT;
>>> +	unsigned long pfn;
>>> +	pgprot_t pgprot;
>>> +	vm_fault_t ret;
>>> +	bool active;
>>> +	int cookie;
>>> +
>>> +	if (!drm_dev_enter(&ptdev->base, &cookie))
>>> +		return VM_FAULT_SIGBUS;
>>> +
>>> +	mutex_lock(&ptdev->pm.lock);
>>> +	active = atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE;
>>> +
>>> +	switch (id) {
>>> +	case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
>>> +		if (active)
>>> +			pfn = __phys_to_pfn(ptdev->phys_addr + CSF_GPU_LATEST_FLUSH_ID);
>>> +		else
>>> +			pfn = virt_to_pfn(ptdev->pm.dummy_latest_flush);
>>> +		break;
>>> +
>>> +	default:
>>> +		ret = VM_FAULT_SIGBUS;
>>> +		goto out_unlock;
>>> +	}
>>> +
>>> +	pgprot = vma->vm_page_prot;
>>> +	if (active)
>>> +		pgprot = pgprot_noncached(pgprot);
>>> +
>>> +	ret = vmf_insert_pfn_prot(vma, vmf->address, pfn, pgprot);
>>> +
>>> +out_unlock:
>>> +	mutex_unlock(&ptdev->pm.lock);
>>> +	drm_dev_exit(cookie);
>>> +	return ret;
>>> +}
>>> +
>>> +static const struct vm_operations_struct panthor_mmio_vm_ops = {
>>> +	.fault = panthor_mmio_vm_fault,
>>> +};
>>> +
>>> +int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct *vma)
>>> +{
>>> +	u64 id = vma->vm_pgoff << PAGE_SHIFT;
>>> +
>>> +	switch (id) {
>>> +	case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
>>> +		if (vma->vm_end - vma->vm_start != PAGE_SIZE ||
>>> +		    (vma->vm_flags & (VM_WRITE | VM_EXEC)))
>>> +			return -EINVAL;
>>> +
>>> +		break;
>>> +
>>> +	default:
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	/* Defer actual mapping to the fault handler. */
>>> +	vma->vm_private_data = ptdev;
>>> +	vma->vm_ops = &panthor_mmio_vm_ops;
>>> +	vm_flags_set(vma,
>>> +		     VM_IO | VM_DONTCOPY | VM_DONTEXPAND |
>>> +		     VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP);
>>> +	return 0;
>>> +}
>>> +
>>> +#ifdef CONFIG_PM
>>> +int panthor_device_resume(struct device *dev)
>>> +{
>>> +	struct panthor_device *ptdev = dev_get_drvdata(dev);
>>> +	int ret, cookie;
>>> +
>>> +	mutex_lock(&ptdev->pm.lock);
>>> +	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_RESUMING);
>>> +
>>> +	ret = clk_prepare_enable(ptdev->clks.core);
>>> +	if (ret)
>>> +		goto err_unlock;
>>> +
>>> +	ret = clk_prepare_enable(ptdev->clks.stacks);
>>> +	if (ret)
>>> +		goto err_disable_core_clk;
>>> +
>>> +	ret = clk_prepare_enable(ptdev->clks.coregroup);
>>> +	if (ret)
>>> +		goto err_disable_stacks_clk;
>>> +
>>> +	ret = panthor_devfreq_resume(ptdev);
>>> +	if (ret)
>>> +		goto err_disable_coregroup_clk;
>>> +
>>> +	if (panthor_device_is_initialized(ptdev) &&
>>> +	    drm_dev_enter(&ptdev->base, &cookie)) {
>>> +		panthor_gpu_resume(ptdev);
>>> +		panthor_mmu_resume(ptdev);
>>> +		ret = drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev));
>>> +		if (!ret)
>>> +			panthor_sched_resume(ptdev);
>>> +
>>> +		drm_dev_exit(cookie);
>>> +
>>> +		if (ret)
>>> +			goto err_devfreq_suspend;
>>> +	}
>>> +
>>> +	/* Clear all IOMEM mappings pointing to this device after we've
>>> +	 * resumed. This way the fake mappings pointing to the dummy pages
>>> +	 * are removed and the real iomem mapping will be restored on next
>>> +	 * access.
>>> +	 */
>>> +	unmap_mapping_range(ptdev->base.anon_inode->i_mapping,
>>> +			    DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
>>> +	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE);  
>>
>> Is the ordering here correct? I think we need to set ACTIVE before the
>> unmap_mapping_range otherwise there is a (very small) race where user
>> space could fault the page (and get the dummy mapping) before the
>> atomic_set.
> 
> We take the pm.lock in panthor_mmio_vm_fault().
> 
>>
>> Hmm, actually we have the pm.lock, so no this isn't racy. In which case
>> is there a good reason that you're using atomics? I can see two accesses
>> which aren't protected by pm.lock:
>>
>>   * the early out in panthor_device_suspend() - which could easily be
>> moved inside the lock.
> 
> When we're in suspend() we are the one in control of the pm.state, so
> no race expected here.
> 
>>
>>   * panthor_device_schedule_reset() - this looks racy (the power down
>> could happen immediately after the atomic_read()), so I suspect it would
>> be better moving the check into panthor_device_reset_work() and
>> performing it with the pm.lock held.
> 
> I think the main reason for it being an atomic is because I didn't
> have PM locking in the initial implementation, but I ended adding
> locking at some point because I didn't really have choice. I thought
> the race didn't exist because of the workqueue synchronization/work
> cancellation that happens in panthor_sched_suspend(), but I see now
> that it's not protecting us (thread queuing the job could be paused
> just after checking the PM state and resumed after the suspend
> happened). This being said, we might have a lock ordering issue if we
> take the PM lock in that path (I need to check that).

Yeah I didn't bother to check whether it would create ordering issues...
;) I'll leave you to figure out the fix - the whole atomic + mutex was
confusing and doesn't seem to have quite worked.

[...]

>>> +
>>> +/**
>>> + * PANTHOR_IRQ_HANDLER() - Define interrupt handlers and the interrupt
>>> + * registration function.
>>> + *
>>> + * The boiler-plate to gracefully deal with shared interrupts is
>>> + * auto-generated. All you have to do is call PANTHOR_IRQ_HANDLER()
>>> + * just after you actual handler. The handler prototype is:  
>> s/you/your/ or probably s/you/the/ since we don't expect people to be
>> adding more ;)
>>
>>> + *
>>> + * void (*handler)(struct panthor_device *, u32 status);
>>> + */
>>> +#define PANTHOR_IRQ_HANDLER(__name, __reg_prefix, __handler)					\
>>> +static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data)			\
>>> +{												\
>>> +	struct panthor_irq *pirq = data;							\
>>> +	struct panthor_device *ptdev = pirq->ptdev;						\  
>>
>> Maybe I'm missing something, but I was expecting a check here for if the
>> irq has been suspended and to avoid the register reads if it was.
> 
> Thought the INT_MASK=0 + synchronize_irq() in panthor_xxx_irq_suspend()
> would guarantee that the handler can't be called after
> panthor_xxx_irq_suspend() was called.

If the IRQ is shared then Linux doesn't know which device caused the
interrupt, so another device's (shared) interrupt could cause our
handler to be run.

>> Otherwise I'm not entirely sure I follow what all this code is for.
> 
> Not entirely sure which code we're talking about. The reason we
> don't use the default raw IRQ handler is because it doesn't work if the
> irq line is shared. In that case, we need to mask all interrupts to
> make sure other handlers on the same irq line don't get spammed with
> our IRQs.

What I'm not following is why we need all this extra infrastructure for
IRQs. The 'setting the mask to 0' during suspend is simple enough and
could be included in code which now calls panthor_xxx_irq_suspend()
(equally for restoring the mask on resume). But there's a loads more
code here.

My initial thought when I looked at this was that you were trying to
solve the issue of a shared IRQ where Mali might get powered off, but
the IRQ is then triggered by another device. In that case touching the
Mali registers would be problematic, so I was expecting some code in
_irq_raw_handler() to check whether the IRQ couldn't possibly be for us
(i.e. mask==0) and early out with IRQ_NONE. kbase has a concept like
this "gpu_powered" for exactly this reason.

But I can't see anything in the code to handle that case. And the
"spamming" of other drivers during suspend shouldn't really happen
(there's something odd going on if the hardware is generating interrupts
when it's meant to be suspended).

But maybe I'm just missing something - it's a while since I've dealt
with interrupt code in Linux.

Steve

>>
>> Steve
>>
>>> +												\
>>> +	if (!gpu_read(ptdev, __reg_prefix ## _INT_STAT))					\
>>> +		return IRQ_NONE;								\
>>> +												\
>>> +	gpu_write(ptdev, __reg_prefix ## _INT_MASK, 0);						\
>>> +	return IRQ_WAKE_THREAD;									\
>>> +}												\
>>> +												\
>>> +static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *data)		\
>>> +{												\
>>> +	struct panthor_irq *pirq = data;							\
>>> +	struct panthor_device *ptdev = pirq->ptdev;						\
>>> +	irqreturn_t ret = IRQ_NONE;								\
>>> +												\
>>> +	while (true) {										\
>>> +		u32 status = gpu_read(ptdev, __reg_prefix ## _INT_RAWSTAT) & pirq->mask;	\
>>> +												\
>>> +		if (!status)									\
>>> +			break;									\
>>> +												\
>>> +		gpu_write(ptdev, __reg_prefix ## _INT_CLEAR, status);				\
>>> +												\
>>> +		__handler(ptdev, status);							\
>>> +		ret = IRQ_HANDLED;								\
>>> +	}											\
>>> +												\
>>> +	if (!atomic_read(&pirq->suspended))							\
>>> +		gpu_write(ptdev, __reg_prefix ## _INT_MASK, pirq->mask);			\
>>> +												\
>>> +	return ret;										\
>>> +}												\
>>> +												\
>>> +static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq)			\
>>> +{												\
>>> +	int cookie;										\
>>> +												\
>>> +	atomic_set(&pirq->suspended, true);							\
>>> +												\
>>> +	if (drm_dev_enter(&pirq->ptdev->base, &cookie)) {					\
>>> +		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0);				\
>>> +		synchronize_irq(pirq->irq);							\
>>> +		drm_dev_exit(cookie);								\
>>> +	}											\
>>> +												\
>>> +	pirq->mask = 0;										\
>>> +}												\
>>> +												\
>>> +static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask)	\
>>> +{												\
>>> +	int cookie;										\
>>> +												\
>>> +	atomic_set(&pirq->suspended, false);							\
>>> +	pirq->mask = mask;									\
>>> +												\
>>> +	if (drm_dev_enter(&pirq->ptdev->base, &cookie)) {					\
>>> +		gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask);			\
>>> +		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, mask);			\
>>> +		drm_dev_exit(cookie);								\
>>> +	}											\
>>> +}												\
>>> +												\
>>> +static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev,			\
>>> +					      struct panthor_irq *pirq,				\
>>> +					      int irq, u32 mask)				\
>>> +{												\
>>> +	pirq->ptdev = ptdev;									\
>>> +	pirq->irq = irq;									\
>>> +	panthor_ ## __name ## _irq_resume(pirq, mask);						\
>>> +												\
>>> +	return devm_request_threaded_irq(ptdev->base.dev, irq,					\
>>> +					 panthor_ ## __name ## _irq_raw_handler,		\
>>> +					 panthor_ ## __name ## _irq_threaded_handler,		\
>>> +					 IRQF_SHARED, KBUILD_MODNAME "-" # __name,		\
>>> +					 pirq);							\
>>> +}
>>> +
>>> +extern struct workqueue_struct *panthor_cleanup_wq;
>>> +
>>> +#endif  
>>
>
Boris Brezillon Aug. 30, 2023, 2:06 p.m. UTC | #4
On Wed, 30 Aug 2023 14:17:57 +0100
Steven Price <steven.price@arm.com> wrote:

> >>> +static void panthor_device_reset_work(struct work_struct *work)
> >>> +{
> >>> +	struct panthor_device *ptdev = container_of(work, struct panthor_device, reset.work);
> >>> +	int ret, cookie;
> >>> +
> >>> +	if (!drm_dev_enter(&ptdev->base, &cookie))
> >>> +		return;
> >>> +
> >>> +	panthor_sched_pre_reset(ptdev);
> >>> +	panthor_fw_pre_reset(ptdev, true);
> >>> +	panthor_mmu_pre_reset(ptdev);
> >>> +	panthor_gpu_soft_reset(ptdev);
> >>> +	panthor_gpu_l2_power_on(ptdev);
> >>> +	panthor_mmu_post_reset(ptdev);
> >>> +	ret = panthor_fw_post_reset(ptdev);
> >>> +	if (ret)
> >>> +		goto out;
> >>> +
> >>> +	atomic_set(&ptdev->reset.pending, 0);
> >>> +	panthor_sched_post_reset(ptdev);
> >>> +	drm_dev_exit(cookie);
> >>> +
> >>> +out:
> >>> +	if (ret) {    
> >>
> >> This looks like a race condition too - is there a need for a
> >> drm_dev_exit_and_unplug() function?  
> > 
> > drm_dev_exit() is just releasing the read-lock. drm_dev_unplug()
> > waits for all readers to be done and sets the unplugged value to true.
> > So we only get readers/writer synchronization here, but nothing doing
> > writer/writer sync. I guess the drm core leaves that to drivers, given
> > drm_dev_unplug() is usually called from xxx_driver->remove() hook, on
> > which serialization is guaranteed by the device-model.
> > 
> > TLDR; yes, it's racy, but I don't think drm_dev_exit_and_unplug() would
> > help solve the existing race.  
> 
> Yeah, I hadn't really thought through the reader/writer locks.
> 
> > It's worth noting that we currently have only 2 paths calling
> > panthor_device_unplug(): the platform_driver->remove() hook and the
> > reset worker. Calling drm_dev_unplug() might not be the right thing to
> > do, I just thought it was a good match to reflect the fact the device
> > becomes inaccessible, without adding yet another kind of device-lost
> > field.  
> 
> I quite liked the unplugged approach, it hides the complexities of the
> GPU breaking nicely.
> 
> However I do think this path needs fixing in some way, because of the
> "goto out" we end up calling panthor_device_unplug() while in the
> drm_dev_enter() section. Which, unless I'm mistaken, means
> panthor_device_unplug() will call drm_dev_unplug() in that section -
> which should produce a lockdep warning at the very least, if not an
> actual deadlock.
> 
> Given it's only a read lock - I think simply moving drm_dev_exit() below
> the "out:" label fixes the deadlock without making any races worse.

Oh, yeah, I didn't realize this is what you were complaining about. We
definitely need to move the out label before drm_dev_exit().

> Whether the race here actually matters I'm not sure.

It does if we want to be safe against removal. Maybe what we should do
instead is synchronize the reset work in the platform->remove() path,
and make sure it can't be scheduled after the synchronization happened.
This way we don't have to worry about concurrent calls to
panthor_device_unplug(), and we can keep the existing is_unplugged
check.


> >>> +
> >>> +/**
> >>> + * PANTHOR_IRQ_HANDLER() - Define interrupt handlers and the interrupt
> >>> + * registration function.
> >>> + *
> >>> + * The boiler-plate to gracefully deal with shared interrupts is
> >>> + * auto-generated. All you have to do is call PANTHOR_IRQ_HANDLER()
> >>> + * just after you actual handler. The handler prototype is:    
> >> s/you/your/ or probably s/you/the/ since we don't expect people to be
> >> adding more ;)
> >>  
> >>> + *
> >>> + * void (*handler)(struct panthor_device *, u32 status);
> >>> + */
> >>> +#define PANTHOR_IRQ_HANDLER(__name, __reg_prefix, __handler)					\
> >>> +static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data)			\
> >>> +{												\
> >>> +	struct panthor_irq *pirq = data;							\
> >>> +	struct panthor_device *ptdev = pirq->ptdev;						\    
> >>
> >> Maybe I'm missing something, but I was expecting a check here for if the
> >> irq has been suspended and to avoid the register reads if it was.  
> > 
> > Thought the INT_MASK=0 + synchronize_irq() in panthor_xxx_irq_suspend()
> > would guarantee that the handler can't be called after
> > panthor_xxx_irq_suspend() was called.  
> 
> If the IRQ is shared then Linux doesn't know which device caused the
> interrupt, so another device's (shared) interrupt could cause our
> handler to be run.

Uh, that's correct. We definitely need to check the ->suspended value
before reading the register...

> 
> >> Otherwise I'm not entirely sure I follow what all this code is for.  
> > 
> > Not entirely sure which code we're talking about. The reason we
> > don't use the default raw IRQ handler is because it doesn't work if the
> > irq line is shared. In that case, we need to mask all interrupts to
> > make sure other handlers on the same irq line don't get spammed with
> > our IRQs.  
> 
> What I'm not following is why we need all this extra infrastructure for
> IRQs. The 'setting the mask to 0' during suspend is simple enough and
> could be included in code which now calls panthor_xxx_irq_suspend()
> (equally for restoring the mask on resume). But there's a loads more
> code here.

It's not just setting the mask to 0, but also making sure all pending
interrupts have been processed, otherwise we might have our threaded
handler called after we've supposedly suspended the IRQ (set _INT_MASK
to 0), which might trigger access to HW after it's been suspended. It's
pretty easy to forget to do things in the suspend/resume path, even if
those things are supposed to be trivial/obvious (I learnt it the hard
way). By having helpers, we reduce the risk of doing silly mistakes,
and we control that in a central place.

> 
> My initial thought when I looked at this was that you were trying to
> solve the issue of a shared IRQ where Mali might get powered off, but
> the IRQ is then triggered by another device. In that case touching the
> Mali registers would be problematic, so I was expecting some code in
> _irq_raw_handler() to check whether the IRQ couldn't possibly be for us
> (i.e. mask==0) and early out with IRQ_NONE. kbase has a concept like
> this "gpu_powered" for exactly this reason.

Yeah, that was one the goals, just didn't really think of that
case where the IRQ line is shared by different devices, not just the
GPU-related irqs being muxed on one line. In this case, the regs
are still accessible until the clk driving the APB interface is
disabled, which happens when all GPU irqs have been suspended.

> 
> But I can't see anything in the code to handle that case. And the
> "spamming" of other drivers during suspend shouldn't really happen
> (there's something odd going on if the hardware is generating interrupts
> when it's meant to be suspended).

The masking is here to make sure we don't receive interrupts after
calling synchronize_irq(). After that point, we just want to ignore any
IRQs until the device is resumed. As for spamming other drivers or
other GPU components, it can still happen between the moment we suspend
the IRQ, and the moment the device is actually shut down.

> 
> But maybe I'm just missing something - it's a while since I've dealt
> with interrupt code in Linux.

No, you're not missing anything, it's really just about irq
synchronization in the suspend/resume path, which might appear trivial
to you, but is very easy to get wrong in subtle ways.
Liviu Dudau Sept. 4, 2023, 11:46 a.m. UTC | #5
On Wed, Aug 30, 2023 at 02:17:57PM +0100, Steven Price wrote:
> On 29/08/2023 15:00, Boris Brezillon wrote:
> > On Fri, 11 Aug 2023 16:47:56 +0100
> > Steven Price <steven.price@arm.com> wrote:
> > 
> >> On 09/08/2023 17:53, Boris Brezillon wrote:
> >>> The panthor driver is designed in a modular way, where each logical
> >>> block is dealing with a specific HW-block or software feature. In order
> >>> for those blocks to communicate with each other, we need a central
> >>> panthor_device collecting all the blocks, and exposing some common
> >>> features, like interrupt handling, power management, reset, ...
> >>>
> >>> This what this panthor_device logical block is about.
> >>>
> >>> v2:
> >>> - Rename the driver (pancsf -> panthor)
> >>> - Change the license (GPL2 -> MIT + GPL2)
> >>> - Split the driver addition commit
> >>> - Add devfreq/PM support
> >>> - Use drm_dev_{unplug,enter,exit}() to provide safe device removal
> >>>
> >>> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> >>> ---
> >>>  drivers/gpu/drm/panthor/panthor_device.c | 479 +++++++++++++++++++++++
> >>>  drivers/gpu/drm/panthor/panthor_device.h | 354 +++++++++++++++++
> >>>  2 files changed, 833 insertions(+)
> >>>  create mode 100644 drivers/gpu/drm/panthor/panthor_device.c
> >>>  create mode 100644 drivers/gpu/drm/panthor/panthor_device.h
> >>>
> >>> diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
> >>> new file mode 100644
> >>> index 000000000000..15f102116fa0
> >>> --- /dev/null
> >>> +++ b/drivers/gpu/drm/panthor/panthor_device.c
> >>> @@ -0,0 +1,479 @@
> >>> +// SPDX-License-Identifier: GPL-2.0 or MIT
> >>> +/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
> >>> +/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
> >>> +/* Copyright 2023 Collabora ltd. */
> >>> +
> >>> +#include <linux/clk.h>
> >>> +#include <linux/reset.h>
> >>> +#include <linux/platform_device.h>
> >>> +#include <linux/pm_domain.h>
> >>> +#include <linux/pm_runtime.h>
> >>> +#include <linux/regulator/consumer.h>
> >>> +
> >>> +#include <drm/drm_drv.h>
> >>> +#include <drm/drm_managed.h>
> >>> +
> >>> +#include "panthor_sched.h"
> >>> +#include "panthor_device.h"
> >>> +#include "panthor_devfreq.h"
> >>> +#include "panthor_gpu.h"
> >>> +#include "panthor_fw.h"
> >>> +#include "panthor_mmu.h"
> >>> +#include "panthor_regs.h"
> >>> +
> >>> +static int panthor_clk_init(struct panthor_device *ptdev)
> >>> +{
> >>> +	ptdev->clks.core = devm_clk_get(ptdev->base.dev, NULL);
> >>> +	if (IS_ERR(ptdev->clks.core)) {
> >>> +		drm_err(&ptdev->base, "get 'core' clock failed %ld\n",
> >>> +			PTR_ERR(ptdev->clks.core));  
> >>
> >> I suspect it would be a good idea to use dev_err_probe() here (and
> >> below) as I believe devm_clk_get can return -EPROBE_DEFER.
> > 
> > Nice, didn't know there was a logging function that was silencing
> > probe-defer errors.
> > 
> >>
> >>> +		return PTR_ERR(ptdev->clks.core);
> >>> +	}
> >>> +
> >>> +	ptdev->clks.stacks = devm_clk_get_optional(ptdev->base.dev, "stacks");
> >>> +	if (IS_ERR(ptdev->clks.stacks)) {
> >>> +		drm_err(&ptdev->base, "get 'stacks' clock failed %ld\n",
> >>> +			PTR_ERR(ptdev->clks.stacks));
> >>> +		return PTR_ERR(ptdev->clks.stacks);
> >>> +	}
> >>> +
> >>> +	ptdev->clks.coregroup = devm_clk_get_optional(ptdev->base.dev, "coregroup");
> >>> +	if (IS_ERR(ptdev->clks.coregroup)) {
> >>> +		drm_err(&ptdev->base, "get 'coregroup' clock failed %ld\n",
> >>> +			PTR_ERR(ptdev->clks.coregroup));
> >>> +		return PTR_ERR(ptdev->clks.coregroup);
> >>> +	}
> >>> +
> >>> +	drm_info(&ptdev->base, "clock rate = %lu\n", clk_get_rate(ptdev->clks.core));
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +void panthor_device_unplug(struct panthor_device *ptdev)
> >>> +{
> >>> +	/* FIXME: This is racy. */  
> >>
> >> Can we fix this? From a quick look it seems like a sequence like below
> >> should avoid the race.
> >>
> >> 	if (!drm_dev_enter())
> >> 		/* Already unplugged */
> >> 		return;
> >> 	ptdev->base.unplugged = true;
> >> 	drm_dev_exit();
> >>
> >> Although possibly that should be in the DRM core rather than open-coded
> >> here.
> > 
> > Are you sure that's protecting us against two concurrent calls to
> > drm_dev_unplug() (drm_dev_enter() is taking a read-lock)?
> 
> Well now I'm not sure ;) This was based on the implementations of
> drm_dev_is_unplugged() and drm_dev_unplug(). drm_dev_is_unplugged()
> simply tries to enter then exit. drm_dev_unplug() sets dev->unplugged
> (without first taking any locks). So my naïve combination resulted in
> the above.
> 
> The part I was missing is the synchronize_srcu() call in
> drm_dev_unplug() is what matches up with the read lock in drm_dev_enter().
> 
> > And that's not
> > the only thing I need actually. If there are 2 threads entering
> > panthor_device_unplug(), I need to make sure the one who losts (arrived
> > after unplugged was set to false) is waiting for all operations after
> > the drm_dev_unplug() call to be done, otherwise we might return from
> > platform_driver->remove() before the unplug cleanups are done, and
> > there might still be threads/workqueues accessing device resources
> > while/after they get released by the device-model.
> 
> I can't figure out how to do this other than adding a new atomic status
> bit into panthor. So something like:
> 
> 	if (!drm_dev_enter())
> 		/* Already unplugged */
> 		return;
> 
> 	if (atomic_cmpxchg(&unplugging, false, true)) {
> 		/* Racing with another thread */
> 		drm_dev_exit();
> 		/* Wait for other threads to exit */
> 		synchronize_srcu(&drm_unplug_srcu);
> 		return;
> 	}
> 
> 	panthor_xxx_unplug()
> 
> 	drm_dev_exit();
> 
> Or at least I think that might work. The need to synchronize with
> drm_unplug_srcu means this really needs a new helper in drm_drv.c.
> 
> >>
> >>> +	if (drm_dev_is_unplugged(&ptdev->base))
> >>> +		return;
> >>> +
> >>> +	drm_WARN_ON(&ptdev->base, pm_runtime_get_sync(ptdev->base.dev) < 0);
> >>> +
> >>> +	/* Call drm_dev_unplug() so any access to HW block happening after
> >>> +	 * that point get rejected.
> >>> +	 */
> >>> +	drm_dev_unplug(&ptdev->base);
> >>> +
> >>> +	/* Now, try to cleanly shutdown the GPU before the device resources
> >>> +	 * get reclaimed.
> >>> +	 */
> >>> +	panthor_sched_unplug(ptdev);
> >>> +	panthor_fw_unplug(ptdev);
> >>> +	panthor_mmu_unplug(ptdev);
> >>> +	panthor_gpu_unplug(ptdev);
> >>> +
> >>> +	pm_runtime_dont_use_autosuspend(ptdev->base.dev);
> >>> +	pm_runtime_put_sync_suspend(ptdev->base.dev);
> >>> +}
> >>> +
> >>> +static void panthor_device_reset_cleanup(struct drm_device *ddev, void *data)
> >>> +{
> >>> +	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
> >>> +
> >>> +	cancel_work_sync(&ptdev->reset.work);
> >>> +	destroy_workqueue(ptdev->reset.wq);
> >>> +}
> >>> +
> >>> +static void panthor_device_reset_work(struct work_struct *work)
> >>> +{
> >>> +	struct panthor_device *ptdev = container_of(work, struct panthor_device, reset.work);
> >>> +	int ret, cookie;
> >>> +
> >>> +	if (!drm_dev_enter(&ptdev->base, &cookie))
> >>> +		return;
> >>> +
> >>> +	panthor_sched_pre_reset(ptdev);
> >>> +	panthor_fw_pre_reset(ptdev, true);
> >>> +	panthor_mmu_pre_reset(ptdev);
> >>> +	panthor_gpu_soft_reset(ptdev);
> >>> +	panthor_gpu_l2_power_on(ptdev);
> >>> +	panthor_mmu_post_reset(ptdev);
> >>> +	ret = panthor_fw_post_reset(ptdev);
> >>> +	if (ret)
> >>> +		goto out;
> >>> +
> >>> +	atomic_set(&ptdev->reset.pending, 0);
> >>> +	panthor_sched_post_reset(ptdev);
> >>> +	drm_dev_exit(cookie);
> >>> +
> >>> +out:
> >>> +	if (ret) {  
> >>
> >> This looks like a race condition too - is there a need for a
> >> drm_dev_exit_and_unplug() function?
> > 
> > drm_dev_exit() is just releasing the read-lock. drm_dev_unplug()
> > waits for all readers to be done and sets the unplugged value to true.
> > So we only get readers/writer synchronization here, but nothing doing
> > writer/writer sync. I guess the drm core leaves that to drivers, given
> > drm_dev_unplug() is usually called from xxx_driver->remove() hook, on
> > which serialization is guaranteed by the device-model.
> > 
> > TLDR; yes, it's racy, but I don't think drm_dev_exit_and_unplug() would
> > help solve the existing race.
> 
> Yeah, I hadn't really thought through the reader/writer locks.
> 
> > It's worth noting that we currently have only 2 paths calling
> > panthor_device_unplug(): the platform_driver->remove() hook and the
> > reset worker. Calling drm_dev_unplug() might not be the right thing to
> > do, I just thought it was a good match to reflect the fact the device
> > becomes inaccessible, without adding yet another kind of device-lost
> > field.
> 
> I quite liked the unplugged approach, it hides the complexities of the
> GPU breaking nicely.
> 
> However I do think this path needs fixing in some way, because of the
> "goto out" we end up calling panthor_device_unplug() while in the
> drm_dev_enter() section. Which, unless I'm mistaken, means
> panthor_device_unplug() will call drm_dev_unplug() in that section -
> which should produce a lockdep warning at the very least, if not an
> actual deadlock.
> 
> Given it's only a read lock - I think simply moving drm_dev_exit() below
> the "out:" label fixes the deadlock without making any races worse.
> Whether the race here actually matters I'm not sure.
> 
> >>
> >>> +		panthor_device_unplug(ptdev);
> >>> +		drm_err(&ptdev->base, "Failed to boot MCU after reset, making device unusable.");
> >>> +	}
> >>> +}
> >>> +
> >>> +static bool panthor_device_is_initialized(struct panthor_device *ptdev)
> >>> +{
> >>> +	return !!ptdev->scheduler;
> >>> +}
> >>> +
> >>> +static void panthor_device_free_page(struct drm_device *ddev, void *data)
> >>> +{
> >>> +	free_page((unsigned long)data);
> >>> +}
> >>> +
> >>> +int panthor_device_init(struct panthor_device *ptdev)
> >>> +{
> >>> +	struct resource *res;
> >>> +	struct page *p;
> >>> +	int ret;
> >>> +
> >>> +	ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT;
> >>> +
> >>> +	drmm_mutex_init(&ptdev->base, &ptdev->pm.lock);
> >>> +	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
> >>> +	p = alloc_page(GFP_KERNEL | __GFP_ZERO);
> >>> +	if (!p)
> >>> +		return -ENOMEM;
> >>> +
> >>> +	ptdev->pm.dummy_latest_flush = page_address(p);
> >>> +	ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_free_page,
> >>> +				       ptdev->pm.dummy_latest_flush);
> >>> +	if (ret)
> >>> +		return ret;
> >>> +
> >>> +	/* Set the dummy page to the default LATEST_FLUSH value. This
> >>> +	 * will be updated on the next suspend.
> >>> +	 */
> >>> +	*ptdev->pm.dummy_latest_flush = CSF_GPU_LATEST_FLUSH_ID_DEFAULT;  
> >>
> >> I see why this register default value was defined. Although I'm not sure
> >> it has any benefit over just using zero... If the GPU is off when user
> >> space reads the FLUSH_ID then the GPU's caches are definitely empty so
> >> any flush ID is valid.
> > 
> > Zero means we'll force a cache flush for all CS that were created while
> > the device was suspended, that's not ideal.
> > 
> >>
> >> Interestingly looking at kbase it seems to use an initial value of 1
> >> (POWER_DOWN_LATEST_FLUSH_VALUE). I guess zero is less ideal because
> >> FLUSH_CACHE2 would then unconditionally do a flush.
> > 
> > I guess a value of 1 would work. It just means we'll get a spurious
> > flush if the CS is submitted after 32 flushes happened, on the other
> > hand we also a spurious flush on the first submitted CS when we use
> > POWER_DOWN_LATEST_FLUSH_VALUE. I'll switch to 1, drop the default def,
> > and update the comment accordingly.
> 
> Yeah, matching kbase is almost certainly the safest approach ;) Sorry, I
> was reviewing the patches mostly in order and this looked really odd
> until I started digging into it. Zero is clearly not the ideal value,
> but the reset value is also just a weird value for hardware validation
> (it enables easier checking of the wrap condition). Since kbase picks 1,
> that must be a value which works well!
> 
> >>
> >>> +
> >>> +	INIT_WORK(&ptdev->reset.work, panthor_device_reset_work);
> >>> +	ptdev->reset.wq = alloc_ordered_workqueue("panthor-reset-wq", 0);
> >>> +	if (!ptdev->reset.wq)
> >>> +		return -ENOMEM;
> >>> +
> >>> +	ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_reset_cleanup, NULL);
> >>> +	if (ret)
> >>> +		return ret;
> >>> +
> >>> +	ret = panthor_clk_init(ptdev);
> >>> +	if (ret)
> >>> +		return ret;
> >>> +
> >>> +	ret = panthor_devfreq_init(ptdev);
> >>> +	if (ret)
> >>> +		return ret;
> >>> +
> >>> +	ptdev->iomem = devm_platform_get_and_ioremap_resource(to_platform_device(ptdev->base.dev),
> >>> +							      0, &res);
> >>> +	if (IS_ERR(ptdev->iomem))
> >>> +		return PTR_ERR(ptdev->iomem);
> >>> +
> >>> +	ptdev->phys_addr = res->start;
> >>> +
> >>> +	ret = devm_pm_runtime_enable(ptdev->base.dev);
> >>> +	if (ret)
> >>> +		return ret;
> >>> +
> >>> +	ret = pm_runtime_resume_and_get(ptdev->base.dev);
> >>> +	if (ret)
> >>> +		return ret;
> >>> +
> >>> +	ret = panthor_gpu_init(ptdev);
> >>> +	if (ret)
> >>> +		goto err_rpm_put;
> >>> +
> >>> +	ret = panthor_mmu_init(ptdev);
> >>> +	if (ret)
> >>> +		goto err_rpm_put;
> >>> +
> >>> +	ret = panthor_fw_init(ptdev);
> >>> +	if (ret)
> >>> +		goto err_rpm_put;
> >>> +
> >>> +	ret = panthor_sched_init(ptdev);
> >>> +	if (ret)
> >>> +		goto err_rpm_put;
> >>> +
> >>> +	/* ~3 frames */
> >>> +	pm_runtime_set_autosuspend_delay(ptdev->base.dev, 50);
> >>> +	pm_runtime_use_autosuspend(ptdev->base.dev);
> >>> +	pm_runtime_put_autosuspend(ptdev->base.dev);
> >>> +	return 0;
> >>> +
> >>> +err_rpm_put:
> >>> +	pm_runtime_put_sync_suspend(ptdev->base.dev);
> >>> +	return ret;
> >>> +}
> >>> +
> >>> +#define PANTHOR_EXCEPTION(id) \
> >>> +	[DRM_PANTHOR_EXCEPTION_ ## id] = { \
> >>> +		.name = #id, \
> >>> +	}
> >>> +
> >>> +struct panthor_exception_info {
> >>> +	const char *name;
> >>> +};
> >>> +
> >>> +static const struct panthor_exception_info panthor_exception_infos[] = {
> >>> +	PANTHOR_EXCEPTION(OK),
> >>> +	PANTHOR_EXCEPTION(TERMINATED),
> >>> +	PANTHOR_EXCEPTION(KABOOM),
> >>> +	PANTHOR_EXCEPTION(EUREKA),
> >>> +	PANTHOR_EXCEPTION(ACTIVE),
> >>> +	PANTHOR_EXCEPTION(CS_RES_TERM),
> >>> +	PANTHOR_EXCEPTION(CS_CONFIG_FAULT),
> >>> +	PANTHOR_EXCEPTION(CS_ENDPOINT_FAULT),
> >>> +	PANTHOR_EXCEPTION(CS_BUS_FAULT),
> >>> +	PANTHOR_EXCEPTION(CS_INSTR_INVALID),
> >>> +	PANTHOR_EXCEPTION(CS_CALL_STACK_OVERFLOW),
> >>> +	PANTHOR_EXCEPTION(CS_INHERIT_FAULT),
> >>> +	PANTHOR_EXCEPTION(INSTR_INVALID_PC),
> >>> +	PANTHOR_EXCEPTION(INSTR_INVALID_ENC),
> >>> +	PANTHOR_EXCEPTION(INSTR_BARRIER_FAULT),
> >>> +	PANTHOR_EXCEPTION(DATA_INVALID_FAULT),
> >>> +	PANTHOR_EXCEPTION(TILE_RANGE_FAULT),
> >>> +	PANTHOR_EXCEPTION(ADDR_RANGE_FAULT),
> >>> +	PANTHOR_EXCEPTION(IMPRECISE_FAULT),
> >>> +	PANTHOR_EXCEPTION(OOM),
> >>> +	PANTHOR_EXCEPTION(CSF_FW_INTERNAL_ERROR),
> >>> +	PANTHOR_EXCEPTION(CSF_RES_EVICTION_TIMEOUT),
> >>> +	PANTHOR_EXCEPTION(GPU_BUS_FAULT),
> >>> +	PANTHOR_EXCEPTION(GPU_SHAREABILITY_FAULT),
> >>> +	PANTHOR_EXCEPTION(SYS_SHAREABILITY_FAULT),
> >>> +	PANTHOR_EXCEPTION(GPU_CACHEABILITY_FAULT),
> >>> +	PANTHOR_EXCEPTION(TRANSLATION_FAULT_0),
> >>> +	PANTHOR_EXCEPTION(TRANSLATION_FAULT_1),
> >>> +	PANTHOR_EXCEPTION(TRANSLATION_FAULT_2),
> >>> +	PANTHOR_EXCEPTION(TRANSLATION_FAULT_3),
> >>> +	PANTHOR_EXCEPTION(TRANSLATION_FAULT_4),
> >>> +	PANTHOR_EXCEPTION(PERM_FAULT_0),
> >>> +	PANTHOR_EXCEPTION(PERM_FAULT_1),
> >>> +	PANTHOR_EXCEPTION(PERM_FAULT_2),
> >>> +	PANTHOR_EXCEPTION(PERM_FAULT_3),
> >>> +	PANTHOR_EXCEPTION(ACCESS_FLAG_1),
> >>> +	PANTHOR_EXCEPTION(ACCESS_FLAG_2),
> >>> +	PANTHOR_EXCEPTION(ACCESS_FLAG_3),
> >>> +	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_IN),
> >>> +	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT0),
> >>> +	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT1),
> >>> +	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT2),
> >>> +	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT3),
> >>> +	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_0),
> >>> +	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_1),
> >>> +	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_2),
> >>> +	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_3),
> >>> +};
> >>> +
> >>> +const char *panthor_exception_name(struct panthor_device *ptdev, u32 exception_code)
> >>> +{
> >>> +	if (drm_WARN_ON(&ptdev->base,  
> >>
> >> I'm not convinced this should be a WARN_ON as I suspect it's probably
> >> possible to inject values from user space (although I'm not completely
> >> sure on that).
> > 
> > Normally no (it's something returned by the FW), unless userspace gets
> > access to the kernel <-> FW interface, which would be worrisome :-).
> 
> I've no idea if it's actually possible, but it feels like it should be
> possible to create a firmware synchronisation object with an error code
> chosen by the user and possibly then propagate that error code back to
> the kernel. It's certainly not trivial though. Either way the WARN is
> unnecessary.
> 
> >> It's certainly not a driver error as such if we can't
> >> decode the value.
> > 
> > Ack on dropping the WARN_ON().
> > 
> >>
> >>> +			exception_code >= ARRAY_SIZE(panthor_exception_infos) ||
> >>> +			!panthor_exception_infos[exception_code].name))
> >>> +		return "Unknown exception type";
> >>> +
> >>> +	return panthor_exception_infos[exception_code].name;
> >>> +}
> >>> +
> >>> +static vm_fault_t panthor_mmio_vm_fault(struct vm_fault *vmf)
> >>> +{
> >>> +	struct vm_area_struct *vma = vmf->vma;
> >>> +	struct panthor_device *ptdev = vma->vm_private_data;
> >>> +	u64 id = vma->vm_pgoff << PAGE_SHIFT;
> >>> +	unsigned long pfn;
> >>> +	pgprot_t pgprot;
> >>> +	vm_fault_t ret;
> >>> +	bool active;
> >>> +	int cookie;
> >>> +
> >>> +	if (!drm_dev_enter(&ptdev->base, &cookie))
> >>> +		return VM_FAULT_SIGBUS;
> >>> +
> >>> +	mutex_lock(&ptdev->pm.lock);
> >>> +	active = atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE;
> >>> +
> >>> +	switch (id) {
> >>> +	case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
> >>> +		if (active)
> >>> +			pfn = __phys_to_pfn(ptdev->phys_addr + CSF_GPU_LATEST_FLUSH_ID);
> >>> +		else
> >>> +			pfn = virt_to_pfn(ptdev->pm.dummy_latest_flush);
> >>> +		break;
> >>> +
> >>> +	default:
> >>> +		ret = VM_FAULT_SIGBUS;
> >>> +		goto out_unlock;
> >>> +	}
> >>> +
> >>> +	pgprot = vma->vm_page_prot;
> >>> +	if (active)
> >>> +		pgprot = pgprot_noncached(pgprot);
> >>> +
> >>> +	ret = vmf_insert_pfn_prot(vma, vmf->address, pfn, pgprot);
> >>> +
> >>> +out_unlock:
> >>> +	mutex_unlock(&ptdev->pm.lock);
> >>> +	drm_dev_exit(cookie);
> >>> +	return ret;
> >>> +}
> >>> +
> >>> +static const struct vm_operations_struct panthor_mmio_vm_ops = {
> >>> +	.fault = panthor_mmio_vm_fault,
> >>> +};
> >>> +
> >>> +int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct *vma)
> >>> +{
> >>> +	u64 id = vma->vm_pgoff << PAGE_SHIFT;
> >>> +
> >>> +	switch (id) {
> >>> +	case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
> >>> +		if (vma->vm_end - vma->vm_start != PAGE_SIZE ||
> >>> +		    (vma->vm_flags & (VM_WRITE | VM_EXEC)))
> >>> +			return -EINVAL;
> >>> +
> >>> +		break;
> >>> +
> >>> +	default:
> >>> +		return -EINVAL;
> >>> +	}
> >>> +
> >>> +	/* Defer actual mapping to the fault handler. */
> >>> +	vma->vm_private_data = ptdev;
> >>> +	vma->vm_ops = &panthor_mmio_vm_ops;
> >>> +	vm_flags_set(vma,
> >>> +		     VM_IO | VM_DONTCOPY | VM_DONTEXPAND |
> >>> +		     VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP);
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +#ifdef CONFIG_PM
> >>> +int panthor_device_resume(struct device *dev)
> >>> +{
> >>> +	struct panthor_device *ptdev = dev_get_drvdata(dev);
> >>> +	int ret, cookie;
> >>> +
> >>> +	mutex_lock(&ptdev->pm.lock);
> >>> +	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_RESUMING);
> >>> +
> >>> +	ret = clk_prepare_enable(ptdev->clks.core);
> >>> +	if (ret)
> >>> +		goto err_unlock;
> >>> +
> >>> +	ret = clk_prepare_enable(ptdev->clks.stacks);
> >>> +	if (ret)
> >>> +		goto err_disable_core_clk;
> >>> +
> >>> +	ret = clk_prepare_enable(ptdev->clks.coregroup);
> >>> +	if (ret)
> >>> +		goto err_disable_stacks_clk;
> >>> +
> >>> +	ret = panthor_devfreq_resume(ptdev);
> >>> +	if (ret)
> >>> +		goto err_disable_coregroup_clk;
> >>> +
> >>> +	if (panthor_device_is_initialized(ptdev) &&
> >>> +	    drm_dev_enter(&ptdev->base, &cookie)) {
> >>> +		panthor_gpu_resume(ptdev);
> >>> +		panthor_mmu_resume(ptdev);
> >>> +		ret = drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev));
> >>> +		if (!ret)
> >>> +			panthor_sched_resume(ptdev);
> >>> +
> >>> +		drm_dev_exit(cookie);
> >>> +
> >>> +		if (ret)
> >>> +			goto err_devfreq_suspend;
> >>> +	}
> >>> +
> >>> +	/* Clear all IOMEM mappings pointing to this device after we've
> >>> +	 * resumed. This way the fake mappings pointing to the dummy pages
> >>> +	 * are removed and the real iomem mapping will be restored on next
> >>> +	 * access.
> >>> +	 */
> >>> +	unmap_mapping_range(ptdev->base.anon_inode->i_mapping,
> >>> +			    DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
> >>> +	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE);  
> >>
> >> Is the ordering here correct? I think we need to set ACTIVE before the
> >> unmap_mapping_range otherwise there is a (very small) race where user
> >> space could fault the page (and get the dummy mapping) before the
> >> atomic_set.
> > 
> > We take the pm.lock in panthor_mmio_vm_fault().
> > 
> >>
> >> Hmm, actually we have the pm.lock, so no this isn't racy. In which case
> >> is there a good reason that you're using atomics? I can see two accesses
> >> which aren't protected by pm.lock:
> >>
> >>   * the early out in panthor_device_suspend() - which could easily be
> >> moved inside the lock.
> > 
> > When we're in suspend() we are the one in control of the pm.state, so
> > no race expected here.
> > 
> >>
> >>   * panthor_device_schedule_reset() - this looks racy (the power down
> >> could happen immediately after the atomic_read()), so I suspect it would
> >> be better moving the check into panthor_device_reset_work() and
> >> performing it with the pm.lock held.
> > 
> > I think the main reason for it being an atomic is because I didn't
> > have PM locking in the initial implementation, but I ended adding
> > locking at some point because I didn't really have choice. I thought
> > the race didn't exist because of the workqueue synchronization/work
> > cancellation that happens in panthor_sched_suspend(), but I see now
> > that it's not protecting us (thread queuing the job could be paused
> > just after checking the PM state and resumed after the suspend
> > happened). This being said, we might have a lock ordering issue if we
> > take the PM lock in that path (I need to check that).
> 
> Yeah I didn't bother to check whether it would create ordering issues...
> ;) I'll leave you to figure out the fix - the whole atomic + mutex was
> confusing and doesn't seem to have quite worked.
> 
> [...]
> 
> >>> +
> >>> +/**
> >>> + * PANTHOR_IRQ_HANDLER() - Define interrupt handlers and the interrupt
> >>> + * registration function.
> >>> + *
> >>> + * The boiler-plate to gracefully deal with shared interrupts is
> >>> + * auto-generated. All you have to do is call PANTHOR_IRQ_HANDLER()
> >>> + * just after you actual handler. The handler prototype is:  
> >> s/you/your/ or probably s/you/the/ since we don't expect people to be
> >> adding more ;)
> >>
> >>> + *
> >>> + * void (*handler)(struct panthor_device *, u32 status);
> >>> + */
> >>> +#define PANTHOR_IRQ_HANDLER(__name, __reg_prefix, __handler)					\
> >>> +static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data)			\
> >>> +{												\
> >>> +	struct panthor_irq *pirq = data;							\
> >>> +	struct panthor_device *ptdev = pirq->ptdev;						\  
> >>
> >> Maybe I'm missing something, but I was expecting a check here for if the
> >> irq has been suspended and to avoid the register reads if it was.
> > 
> > Thought the INT_MASK=0 + synchronize_irq() in panthor_xxx_irq_suspend()
> > would guarantee that the handler can't be called after
> > panthor_xxx_irq_suspend() was called.
> 
> If the IRQ is shared then Linux doesn't know which device caused the
> interrupt, so another device's (shared) interrupt could cause our
> handler to be run.
> 
> >> Otherwise I'm not entirely sure I follow what all this code is for.
> > 
> > Not entirely sure which code we're talking about. The reason we
> > don't use the default raw IRQ handler is because it doesn't work if the
> > irq line is shared. In that case, we need to mask all interrupts to
> > make sure other handlers on the same irq line don't get spammed with
> > our IRQs.
> 
> What I'm not following is why we need all this extra infrastructure for
> IRQs. The 'setting the mask to 0' during suspend is simple enough and
> could be included in code which now calls panthor_xxx_irq_suspend()
> (equally for restoring the mask on resume). But there's a loads more
> code here.
> 
> My initial thought when I looked at this was that you were trying to
> solve the issue of a shared IRQ where Mali might get powered off, but
> the IRQ is then triggered by another device. In that case touching the
> Mali registers would be problematic, so I was expecting some code in
> _irq_raw_handler() to check whether the IRQ couldn't possibly be for us
> (i.e. mask==0) and early out with IRQ_NONE. kbase has a concept like
> this "gpu_powered" for exactly this reason.

There is also support for Juno setups where all the GPU IRQs are muxed into
a single interrupt out of the FPGAs, so you need to share the line between
blocks. I have initially suggested the generic approach to Boris over a
freedesktop MR, but even that one did not handle the case where you would
share the interrupt with another device (who would want to do that anyway,
right? :) )

Best regards,
Liviu

> 
> But I can't see anything in the code to handle that case. And the
> "spamming" of other drivers during suspend shouldn't really happen
> (there's something odd going on if the hardware is generating interrupts
> when it's meant to be suspended).
> 
> But maybe I'm just missing something - it's a while since I've dealt
> with interrupt code in Linux.
> 
> Steve
> 
> >>
> >> Steve
> >>
> >>> +												\
> >>> +	if (!gpu_read(ptdev, __reg_prefix ## _INT_STAT))					\
> >>> +		return IRQ_NONE;								\
> >>> +												\
> >>> +	gpu_write(ptdev, __reg_prefix ## _INT_MASK, 0);						\
> >>> +	return IRQ_WAKE_THREAD;									\
> >>> +}												\
> >>> +												\
> >>> +static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *data)		\
> >>> +{												\
> >>> +	struct panthor_irq *pirq = data;							\
> >>> +	struct panthor_device *ptdev = pirq->ptdev;						\
> >>> +	irqreturn_t ret = IRQ_NONE;								\
> >>> +												\
> >>> +	while (true) {										\
> >>> +		u32 status = gpu_read(ptdev, __reg_prefix ## _INT_RAWSTAT) & pirq->mask;	\
> >>> +												\
> >>> +		if (!status)									\
> >>> +			break;									\
> >>> +												\
> >>> +		gpu_write(ptdev, __reg_prefix ## _INT_CLEAR, status);				\
> >>> +												\
> >>> +		__handler(ptdev, status);							\
> >>> +		ret = IRQ_HANDLED;								\
> >>> +	}											\
> >>> +												\
> >>> +	if (!atomic_read(&pirq->suspended))							\
> >>> +		gpu_write(ptdev, __reg_prefix ## _INT_MASK, pirq->mask);			\
> >>> +												\
> >>> +	return ret;										\
> >>> +}												\
> >>> +												\
> >>> +static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq)			\
> >>> +{												\
> >>> +	int cookie;										\
> >>> +												\
> >>> +	atomic_set(&pirq->suspended, true);							\
> >>> +												\
> >>> +	if (drm_dev_enter(&pirq->ptdev->base, &cookie)) {					\
> >>> +		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0);				\
> >>> +		synchronize_irq(pirq->irq);							\
> >>> +		drm_dev_exit(cookie);								\
> >>> +	}											\
> >>> +												\
> >>> +	pirq->mask = 0;										\
> >>> +}												\
> >>> +												\
> >>> +static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask)	\
> >>> +{												\
> >>> +	int cookie;										\
> >>> +												\
> >>> +	atomic_set(&pirq->suspended, false);							\
> >>> +	pirq->mask = mask;									\
> >>> +												\
> >>> +	if (drm_dev_enter(&pirq->ptdev->base, &cookie)) {					\
> >>> +		gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask);			\
> >>> +		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, mask);			\
> >>> +		drm_dev_exit(cookie);								\
> >>> +	}											\
> >>> +}												\
> >>> +												\
> >>> +static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev,			\
> >>> +					      struct panthor_irq *pirq,				\
> >>> +					      int irq, u32 mask)				\
> >>> +{												\
> >>> +	pirq->ptdev = ptdev;									\
> >>> +	pirq->irq = irq;									\
> >>> +	panthor_ ## __name ## _irq_resume(pirq, mask);						\
> >>> +												\
> >>> +	return devm_request_threaded_irq(ptdev->base.dev, irq,					\
> >>> +					 panthor_ ## __name ## _irq_raw_handler,		\
> >>> +					 panthor_ ## __name ## _irq_threaded_handler,		\
> >>> +					 IRQF_SHARED, KBUILD_MODNAME "-" # __name,		\
> >>> +					 pirq);							\
> >>> +}
> >>> +
> >>> +extern struct workqueue_struct *panthor_cleanup_wq;
> >>> +
> >>> +#endif  
> >>
> > 
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
new file mode 100644
index 000000000000..15f102116fa0
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_device.c
@@ -0,0 +1,479 @@ 
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2023 Collabora ltd. */
+
+#include <linux/clk.h>
+#include <linux/reset.h>
+#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
+#include <linux/regulator/consumer.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+
+#include "panthor_sched.h"
+#include "panthor_device.h"
+#include "panthor_devfreq.h"
+#include "panthor_gpu.h"
+#include "panthor_fw.h"
+#include "panthor_mmu.h"
+#include "panthor_regs.h"
+
+static int panthor_clk_init(struct panthor_device *ptdev)
+{
+	ptdev->clks.core = devm_clk_get(ptdev->base.dev, NULL);
+	if (IS_ERR(ptdev->clks.core)) {
+		drm_err(&ptdev->base, "get 'core' clock failed %ld\n",
+			PTR_ERR(ptdev->clks.core));
+		return PTR_ERR(ptdev->clks.core);
+	}
+
+	ptdev->clks.stacks = devm_clk_get_optional(ptdev->base.dev, "stacks");
+	if (IS_ERR(ptdev->clks.stacks)) {
+		drm_err(&ptdev->base, "get 'stacks' clock failed %ld\n",
+			PTR_ERR(ptdev->clks.stacks));
+		return PTR_ERR(ptdev->clks.stacks);
+	}
+
+	ptdev->clks.coregroup = devm_clk_get_optional(ptdev->base.dev, "coregroup");
+	if (IS_ERR(ptdev->clks.coregroup)) {
+		drm_err(&ptdev->base, "get 'coregroup' clock failed %ld\n",
+			PTR_ERR(ptdev->clks.coregroup));
+		return PTR_ERR(ptdev->clks.coregroup);
+	}
+
+	drm_info(&ptdev->base, "clock rate = %lu\n", clk_get_rate(ptdev->clks.core));
+	return 0;
+}
+
+void panthor_device_unplug(struct panthor_device *ptdev)
+{
+	/* FIXME: This is racy. */
+	if (drm_dev_is_unplugged(&ptdev->base))
+		return;
+
+	drm_WARN_ON(&ptdev->base, pm_runtime_get_sync(ptdev->base.dev) < 0);
+
+	/* Call drm_dev_unplug() so any access to HW block happening after
+	 * that point get rejected.
+	 */
+	drm_dev_unplug(&ptdev->base);
+
+	/* Now, try to cleanly shutdown the GPU before the device resources
+	 * get reclaimed.
+	 */
+	panthor_sched_unplug(ptdev);
+	panthor_fw_unplug(ptdev);
+	panthor_mmu_unplug(ptdev);
+	panthor_gpu_unplug(ptdev);
+
+	pm_runtime_dont_use_autosuspend(ptdev->base.dev);
+	pm_runtime_put_sync_suspend(ptdev->base.dev);
+}
+
+static void panthor_device_reset_cleanup(struct drm_device *ddev, void *data)
+{
+	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
+
+	cancel_work_sync(&ptdev->reset.work);
+	destroy_workqueue(ptdev->reset.wq);
+}
+
+static void panthor_device_reset_work(struct work_struct *work)
+{
+	struct panthor_device *ptdev = container_of(work, struct panthor_device, reset.work);
+	int ret, cookie;
+
+	if (!drm_dev_enter(&ptdev->base, &cookie))
+		return;
+
+	panthor_sched_pre_reset(ptdev);
+	panthor_fw_pre_reset(ptdev, true);
+	panthor_mmu_pre_reset(ptdev);
+	panthor_gpu_soft_reset(ptdev);
+	panthor_gpu_l2_power_on(ptdev);
+	panthor_mmu_post_reset(ptdev);
+	ret = panthor_fw_post_reset(ptdev);
+	if (ret)
+		goto out;
+
+	atomic_set(&ptdev->reset.pending, 0);
+	panthor_sched_post_reset(ptdev);
+	drm_dev_exit(cookie);
+
+out:
+	if (ret) {
+		panthor_device_unplug(ptdev);
+		drm_err(&ptdev->base, "Failed to boot MCU after reset, making device unusable.");
+	}
+}
+
+static bool panthor_device_is_initialized(struct panthor_device *ptdev)
+{
+	return !!ptdev->scheduler;
+}
+
+static void panthor_device_free_page(struct drm_device *ddev, void *data)
+{
+	free_page((unsigned long)data);
+}
+
+int panthor_device_init(struct panthor_device *ptdev)
+{
+	struct resource *res;
+	struct page *p;
+	int ret;
+
+	ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT;
+
+	drmm_mutex_init(&ptdev->base, &ptdev->pm.lock);
+	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
+	p = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!p)
+		return -ENOMEM;
+
+	ptdev->pm.dummy_latest_flush = page_address(p);
+	ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_free_page,
+				       ptdev->pm.dummy_latest_flush);
+	if (ret)
+		return ret;
+
+	/* Set the dummy page to the default LATEST_FLUSH value. This
+	 * will be updated on the next suspend.
+	 */
+	*ptdev->pm.dummy_latest_flush = CSF_GPU_LATEST_FLUSH_ID_DEFAULT;
+
+	INIT_WORK(&ptdev->reset.work, panthor_device_reset_work);
+	ptdev->reset.wq = alloc_ordered_workqueue("panthor-reset-wq", 0);
+	if (!ptdev->reset.wq)
+		return -ENOMEM;
+
+	ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_reset_cleanup, NULL);
+	if (ret)
+		return ret;
+
+	ret = panthor_clk_init(ptdev);
+	if (ret)
+		return ret;
+
+	ret = panthor_devfreq_init(ptdev);
+	if (ret)
+		return ret;
+
+	ptdev->iomem = devm_platform_get_and_ioremap_resource(to_platform_device(ptdev->base.dev),
+							      0, &res);
+	if (IS_ERR(ptdev->iomem))
+		return PTR_ERR(ptdev->iomem);
+
+	ptdev->phys_addr = res->start;
+
+	ret = devm_pm_runtime_enable(ptdev->base.dev);
+	if (ret)
+		return ret;
+
+	ret = pm_runtime_resume_and_get(ptdev->base.dev);
+	if (ret)
+		return ret;
+
+	ret = panthor_gpu_init(ptdev);
+	if (ret)
+		goto err_rpm_put;
+
+	ret = panthor_mmu_init(ptdev);
+	if (ret)
+		goto err_rpm_put;
+
+	ret = panthor_fw_init(ptdev);
+	if (ret)
+		goto err_rpm_put;
+
+	ret = panthor_sched_init(ptdev);
+	if (ret)
+		goto err_rpm_put;
+
+	/* ~3 frames */
+	pm_runtime_set_autosuspend_delay(ptdev->base.dev, 50);
+	pm_runtime_use_autosuspend(ptdev->base.dev);
+	pm_runtime_put_autosuspend(ptdev->base.dev);
+	return 0;
+
+err_rpm_put:
+	pm_runtime_put_sync_suspend(ptdev->base.dev);
+	return ret;
+}
+
+#define PANTHOR_EXCEPTION(id) \
+	[DRM_PANTHOR_EXCEPTION_ ## id] = { \
+		.name = #id, \
+	}
+
+struct panthor_exception_info {
+	const char *name;
+};
+
+static const struct panthor_exception_info panthor_exception_infos[] = {
+	PANTHOR_EXCEPTION(OK),
+	PANTHOR_EXCEPTION(TERMINATED),
+	PANTHOR_EXCEPTION(KABOOM),
+	PANTHOR_EXCEPTION(EUREKA),
+	PANTHOR_EXCEPTION(ACTIVE),
+	PANTHOR_EXCEPTION(CS_RES_TERM),
+	PANTHOR_EXCEPTION(CS_CONFIG_FAULT),
+	PANTHOR_EXCEPTION(CS_ENDPOINT_FAULT),
+	PANTHOR_EXCEPTION(CS_BUS_FAULT),
+	PANTHOR_EXCEPTION(CS_INSTR_INVALID),
+	PANTHOR_EXCEPTION(CS_CALL_STACK_OVERFLOW),
+	PANTHOR_EXCEPTION(CS_INHERIT_FAULT),
+	PANTHOR_EXCEPTION(INSTR_INVALID_PC),
+	PANTHOR_EXCEPTION(INSTR_INVALID_ENC),
+	PANTHOR_EXCEPTION(INSTR_BARRIER_FAULT),
+	PANTHOR_EXCEPTION(DATA_INVALID_FAULT),
+	PANTHOR_EXCEPTION(TILE_RANGE_FAULT),
+	PANTHOR_EXCEPTION(ADDR_RANGE_FAULT),
+	PANTHOR_EXCEPTION(IMPRECISE_FAULT),
+	PANTHOR_EXCEPTION(OOM),
+	PANTHOR_EXCEPTION(CSF_FW_INTERNAL_ERROR),
+	PANTHOR_EXCEPTION(CSF_RES_EVICTION_TIMEOUT),
+	PANTHOR_EXCEPTION(GPU_BUS_FAULT),
+	PANTHOR_EXCEPTION(GPU_SHAREABILITY_FAULT),
+	PANTHOR_EXCEPTION(SYS_SHAREABILITY_FAULT),
+	PANTHOR_EXCEPTION(GPU_CACHEABILITY_FAULT),
+	PANTHOR_EXCEPTION(TRANSLATION_FAULT_0),
+	PANTHOR_EXCEPTION(TRANSLATION_FAULT_1),
+	PANTHOR_EXCEPTION(TRANSLATION_FAULT_2),
+	PANTHOR_EXCEPTION(TRANSLATION_FAULT_3),
+	PANTHOR_EXCEPTION(TRANSLATION_FAULT_4),
+	PANTHOR_EXCEPTION(PERM_FAULT_0),
+	PANTHOR_EXCEPTION(PERM_FAULT_1),
+	PANTHOR_EXCEPTION(PERM_FAULT_2),
+	PANTHOR_EXCEPTION(PERM_FAULT_3),
+	PANTHOR_EXCEPTION(ACCESS_FLAG_1),
+	PANTHOR_EXCEPTION(ACCESS_FLAG_2),
+	PANTHOR_EXCEPTION(ACCESS_FLAG_3),
+	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_IN),
+	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT0),
+	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT1),
+	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT2),
+	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT3),
+	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_0),
+	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_1),
+	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_2),
+	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_3),
+};
+
+const char *panthor_exception_name(struct panthor_device *ptdev, u32 exception_code)
+{
+	if (drm_WARN_ON(&ptdev->base,
+			exception_code >= ARRAY_SIZE(panthor_exception_infos) ||
+			!panthor_exception_infos[exception_code].name))
+		return "Unknown exception type";
+
+	return panthor_exception_infos[exception_code].name;
+}
+
+static vm_fault_t panthor_mmio_vm_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct panthor_device *ptdev = vma->vm_private_data;
+	u64 id = vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long pfn;
+	pgprot_t pgprot;
+	vm_fault_t ret;
+	bool active;
+	int cookie;
+
+	if (!drm_dev_enter(&ptdev->base, &cookie))
+		return VM_FAULT_SIGBUS;
+
+	mutex_lock(&ptdev->pm.lock);
+	active = atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE;
+
+	switch (id) {
+	case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
+		if (active)
+			pfn = __phys_to_pfn(ptdev->phys_addr + CSF_GPU_LATEST_FLUSH_ID);
+		else
+			pfn = virt_to_pfn(ptdev->pm.dummy_latest_flush);
+		break;
+
+	default:
+		ret = VM_FAULT_SIGBUS;
+		goto out_unlock;
+	}
+
+	pgprot = vma->vm_page_prot;
+	if (active)
+		pgprot = pgprot_noncached(pgprot);
+
+	ret = vmf_insert_pfn_prot(vma, vmf->address, pfn, pgprot);
+
+out_unlock:
+	mutex_unlock(&ptdev->pm.lock);
+	drm_dev_exit(cookie);
+	return ret;
+}
+
+static const struct vm_operations_struct panthor_mmio_vm_ops = {
+	.fault = panthor_mmio_vm_fault,
+};
+
+int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct *vma)
+{
+	u64 id = vma->vm_pgoff << PAGE_SHIFT;
+
+	switch (id) {
+	case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
+		if (vma->vm_end - vma->vm_start != PAGE_SIZE ||
+		    (vma->vm_flags & (VM_WRITE | VM_EXEC)))
+			return -EINVAL;
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	/* Defer actual mapping to the fault handler. */
+	vma->vm_private_data = ptdev;
+	vma->vm_ops = &panthor_mmio_vm_ops;
+	vm_flags_set(vma,
+		     VM_IO | VM_DONTCOPY | VM_DONTEXPAND |
+		     VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+int panthor_device_resume(struct device *dev)
+{
+	struct panthor_device *ptdev = dev_get_drvdata(dev);
+	int ret, cookie;
+
+	mutex_lock(&ptdev->pm.lock);
+	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_RESUMING);
+
+	ret = clk_prepare_enable(ptdev->clks.core);
+	if (ret)
+		goto err_unlock;
+
+	ret = clk_prepare_enable(ptdev->clks.stacks);
+	if (ret)
+		goto err_disable_core_clk;
+
+	ret = clk_prepare_enable(ptdev->clks.coregroup);
+	if (ret)
+		goto err_disable_stacks_clk;
+
+	ret = panthor_devfreq_resume(ptdev);
+	if (ret)
+		goto err_disable_coregroup_clk;
+
+	if (panthor_device_is_initialized(ptdev) &&
+	    drm_dev_enter(&ptdev->base, &cookie)) {
+		panthor_gpu_resume(ptdev);
+		panthor_mmu_resume(ptdev);
+		ret = drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev));
+		if (!ret)
+			panthor_sched_resume(ptdev);
+
+		drm_dev_exit(cookie);
+
+		if (ret)
+			goto err_devfreq_suspend;
+	}
+
+	/* Clear all IOMEM mappings pointing to this device after we've
+	 * resumed. This way the fake mappings pointing to the dummy pages
+	 * are removed and the real iomem mapping will be restored on next
+	 * access.
+	 */
+	unmap_mapping_range(ptdev->base.anon_inode->i_mapping,
+			    DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
+	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE);
+	if (atomic_read(&ptdev->reset.pending))
+		queue_work(ptdev->reset.wq, &ptdev->reset.work);
+
+	mutex_unlock(&ptdev->pm.lock);
+	return 0;
+
+err_devfreq_suspend:
+	panthor_devfreq_suspend(ptdev);
+
+err_disable_coregroup_clk:
+	clk_disable_unprepare(ptdev->clks.coregroup);
+
+err_disable_stacks_clk:
+	clk_disable_unprepare(ptdev->clks.stacks);
+
+err_disable_core_clk:
+	clk_disable_unprepare(ptdev->clks.core);
+
+err_unlock:
+	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
+	mutex_unlock(&ptdev->pm.lock);
+	return ret;
+}
+
+int panthor_device_suspend(struct device *dev)
+{
+	struct panthor_device *ptdev = dev_get_drvdata(dev);
+	int ret, cookie;
+
+	if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE)
+		return 0;
+
+	mutex_lock(&ptdev->pm.lock);
+	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDING);
+
+	/* Clear all IOMEM mappings pointing to this device before we
+	 * shutdown the power-domain and clocks. Failing to do that results
+	 * in external aborts when the process accesses the iomem region.
+	 */
+	unmap_mapping_range(ptdev->base.anon_inode->i_mapping,
+			    DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
+
+	if (panthor_device_is_initialized(ptdev) &&
+	    drm_dev_enter(&ptdev->base, &cookie)) {
+		cancel_work_sync(&ptdev->reset.work);
+
+		/* We prepare everything as if we were resetting the GPU.
+		 * The end of the reset will happen in the resume path though.
+		 */
+		panthor_sched_suspend(ptdev);
+		panthor_fw_suspend(ptdev);
+		panthor_mmu_suspend(ptdev);
+		panthor_gpu_suspend(ptdev);
+		drm_dev_exit(cookie);
+	}
+
+	ret = panthor_devfreq_suspend(ptdev);
+	if (ret) {
+		if (panthor_device_is_initialized(ptdev) &&
+		    drm_dev_enter(&ptdev->base, &cookie)) {
+			panthor_gpu_resume(ptdev);
+			panthor_mmu_resume(ptdev);
+			drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev));
+			panthor_sched_resume(ptdev);
+			drm_dev_exit(cookie);
+		}
+
+		atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE);
+		goto out_unlock;
+	}
+
+	/* Before we suspend, update the dummy_latest_flush page, so accesses
+	 * to this dummy page return the value the HW would have returned.
+	 */
+	*ptdev->pm.dummy_latest_flush = gpu_read(ptdev, CSF_GPU_LATEST_FLUSH_ID);
+
+	clk_disable_unprepare(ptdev->clks.coregroup);
+	clk_disable_unprepare(ptdev->clks.stacks);
+	clk_disable_unprepare(ptdev->clks.core);
+	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
+
+out_unlock:
+	mutex_unlock(&ptdev->pm.lock);
+	return ret;
+}
+#endif
diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
new file mode 100644
index 000000000000..e0e1be263eb9
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -0,0 +1,354 @@ 
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2023 Collabora ltd. */
+
+#ifndef __PANTHOR_DEVICE_H__
+#define __PANTHOR_DEVICE_H__
+
+#include <linux/atomic.h>
+#include <linux/io-pgtable.h>
+#include <linux/regulator/consumer.h>
+#include <linux/spinlock.h>
+#include <drm/drm_device.h>
+#include <drm/drm_mm.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/panthor_drm.h>
+
+struct panthor_csf;
+struct panthor_csf_ctx;
+struct panthor_device;
+struct panthor_gpu;
+struct panthor_group_pool;
+struct panthor_heap_pool;
+struct panthor_job;
+struct panthor_mmu;
+struct panthor_fw;
+struct panthor_perfcnt;
+struct panthor_vm;
+struct panthor_vm_pool;
+
+/**
+ * enum panthor_device_pm_state - PM state
+ */
+enum panthor_device_pm_state {
+	/** @PANTHOR_DEVICE_PM_STATE_SUSPENDED: Device is suspended. */
+	PANTHOR_DEVICE_PM_STATE_SUSPENDED = 0,
+
+	/** @PANTHOR_DEVICE_PM_STATE_RESUMING: Device is being resumed. */
+	PANTHOR_DEVICE_PM_STATE_RESUMING,
+
+	/** @PANTHOR_DEVICE_PM_STATE_ACTIVE: Device is active. */
+	PANTHOR_DEVICE_PM_STATE_ACTIVE,
+
+	/** @PANTHOR_DEVICE_PM_STATE_SUSPENDING: Device is being suspended. */
+	PANTHOR_DEVICE_PM_STATE_SUSPENDING,
+};
+
+/**
+ * struct panthor_irq - IRQ data
+ *
+ * Used to automate IRQ handling for the 3 different IRQs we have in this driver.
+ */
+struct panthor_irq {
+	/** @ptdev: Panthor device */
+	struct panthor_device *ptdev;
+
+	/** @irq: IRQ number. */
+	int irq;
+
+	/** @mask: Current mask being applied to xxx_INT_MASK. */
+	u32 mask;
+
+	/** @suspended: Set to true when the IRQ is suspended. */
+	atomic_t suspended;
+};
+
+/**
+ * struct panthor_device - Panthor device
+ */
+struct panthor_device {
+	/** @base: Base drm_device. */
+	struct drm_device base;
+
+	/** @phys_addr: Physical address of the iomem region. */
+	phys_addr_t phys_addr;
+
+	/** @iomem: CPU mapping of the IOMEM region. */
+	void __iomem *iomem;
+
+	/** @clks: GPU clocks. */
+	struct {
+		/** @core: Core clock. */
+		struct clk *core;
+
+		/** @stacks: Stacks clock. This clock is optional. */
+		struct clk *stacks;
+
+		/** @coregroup: Core group clock. This clock is optional. */
+		struct clk *coregroup;
+	} clks;
+
+	/** @coherent: True if the CPU/GPU are memory coherent. */
+	bool coherent;
+
+	/** @gpu_info: GPU information. */
+	struct drm_panthor_gpu_info gpu_info;
+
+	/** @csif_info: Command stream interface information. */
+	struct drm_panthor_csif_info csif_info;
+
+	/** @gpu: GPU management data. */
+	struct panthor_gpu *gpu;
+
+	/** @fw: FW management data. */
+	struct panthor_fw *fw;
+
+	/** @mmu: MMU management data. */
+	struct panthor_mmu *mmu;
+
+	/** @scheduler: Scheduler management data. */
+	struct panthor_scheduler *scheduler;
+
+	/** @devfreq: Device frequency scaling management data. */
+	struct panthor_devfreq *devfreq;
+
+	/** @reset: Reset related fields. */
+	struct {
+		/** @wq: Ordered worqueud used to schedule reset operations. */
+		struct workqueue_struct *wq;
+
+		/** @work: Reset work. */
+		struct work_struct work;
+
+		/** @pending: Set to true if a reset is pending. */
+		atomic_t pending;
+	} reset;
+
+	/** @pm: Power management related data. */
+	struct {
+		/** @state: Power state, see panthor_device_pm_state. */
+		atomic_t state;
+
+		/**
+		 * @lock: Lock protecting the suspend/resume operations.
+		 *
+		 * This is needed to ensure we map the dummy IO pages when
+		 * the device is being suspended, and the real IO pages when
+		 * the device is being resumed. We can't just do with the
+		 * state atomicity to deal with this race.
+		 */
+		struct mutex lock;
+
+		/**
+		 * @dummy_latest_flush: Dummy LATEST_FLUSH page.
+		 *
+		 * Used to replace the real LATEST_FLUSH page when the GPU
+		 * is suspended.
+		 */
+		u32 *dummy_latest_flush;
+	} pm;
+};
+
+/**
+ * struct panthor_file - Panthor file
+ */
+struct panthor_file {
+	/** @ptdev: Device attached to this file. */
+	struct panthor_device *ptdev;
+
+	/** @vms: VM pool attached to this file. */
+	struct panthor_vm_pool *vms;
+
+	/** @groups: Scheduling group pool attached to this file. */
+	struct panthor_group_pool *groups;
+};
+
+int panthor_device_init(struct panthor_device *ptdev);
+void panthor_device_unplug(struct panthor_device *ptdev);
+
+/**
+ * panthor_device_schedule_reset() - Schedules a reset operation
+ */
+static inline void panthor_device_schedule_reset(struct panthor_device *ptdev)
+{
+	if (atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE &&
+	    !atomic_cmpxchg(&ptdev->reset.pending, 0, 1))
+		queue_work(ptdev->reset.wq, &ptdev->reset.work);
+}
+
+/**
+ * panthor_device_reset_is_pending() - Checks if a reset is pending.
+ *
+ * Return: true if a reset is pending, false otherwise.
+ */
+static inline bool panthor_device_reset_is_pending(struct panthor_device *ptdev)
+{
+	return atomic_read(&ptdev->reset.pending) != 0;
+}
+
+int panthor_device_mmap_io(struct panthor_device *ptdev,
+			   struct vm_area_struct *vma);
+
+int panthor_device_resume(struct device *dev);
+int panthor_device_suspend(struct device *dev);
+
+enum drm_panthor_exception_type {
+	DRM_PANTHOR_EXCEPTION_OK = 0x00,
+	DRM_PANTHOR_EXCEPTION_TERMINATED = 0x04,
+	DRM_PANTHOR_EXCEPTION_KABOOM = 0x05,
+	DRM_PANTHOR_EXCEPTION_EUREKA = 0x06,
+	DRM_PANTHOR_EXCEPTION_ACTIVE = 0x08,
+	DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f,
+	DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f,
+	DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40,
+	DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44,
+	DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48,
+	DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49,
+	DRM_PANTHOR_EXCEPTION_CS_CALL_STACK_OVERFLOW = 0x4a,
+	DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT = 0x4b,
+	DRM_PANTHOR_EXCEPTION_INSTR_INVALID_PC = 0x50,
+	DRM_PANTHOR_EXCEPTION_INSTR_INVALID_ENC = 0x51,
+	DRM_PANTHOR_EXCEPTION_INSTR_BARRIER_FAULT = 0x55,
+	DRM_PANTHOR_EXCEPTION_DATA_INVALID_FAULT = 0x58,
+	DRM_PANTHOR_EXCEPTION_TILE_RANGE_FAULT = 0x59,
+	DRM_PANTHOR_EXCEPTION_ADDR_RANGE_FAULT = 0x5a,
+	DRM_PANTHOR_EXCEPTION_IMPRECISE_FAULT = 0x5b,
+	DRM_PANTHOR_EXCEPTION_OOM = 0x60,
+	DRM_PANTHOR_EXCEPTION_CSF_FW_INTERNAL_ERROR = 0x68,
+	DRM_PANTHOR_EXCEPTION_CSF_RES_EVICTION_TIMEOUT = 0x69,
+	DRM_PANTHOR_EXCEPTION_GPU_BUS_FAULT = 0x80,
+	DRM_PANTHOR_EXCEPTION_GPU_SHAREABILITY_FAULT = 0x88,
+	DRM_PANTHOR_EXCEPTION_SYS_SHAREABILITY_FAULT = 0x89,
+	DRM_PANTHOR_EXCEPTION_GPU_CACHEABILITY_FAULT = 0x8a,
+	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_0 = 0xc0,
+	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_1 = 0xc1,
+	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_2 = 0xc2,
+	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_3 = 0xc3,
+	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_4 = 0xc4,
+	DRM_PANTHOR_EXCEPTION_PERM_FAULT_0 = 0xc8,
+	DRM_PANTHOR_EXCEPTION_PERM_FAULT_1 = 0xc9,
+	DRM_PANTHOR_EXCEPTION_PERM_FAULT_2 = 0xca,
+	DRM_PANTHOR_EXCEPTION_PERM_FAULT_3 = 0xcb,
+	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_1 = 0xd9,
+	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_2 = 0xda,
+	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_3 = 0xdb,
+	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_IN = 0xe0,
+	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT0 = 0xe4,
+	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT1 = 0xe5,
+	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT2 = 0xe6,
+	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT3 = 0xe7,
+	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_0 = 0xe8,
+	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_1 = 0xe9,
+	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_2 = 0xea,
+	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_3 = 0xeb,
+};
+
+/**
+ * panthor_exception_is_fault() - Checks if an exception is a fault.
+ *
+ * Return: true if the exception is a fault, false otherwise.
+ */
+static inline bool
+panthor_exception_is_fault(u32 exception_code)
+{
+	return exception_code > DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT;
+}
+
+const char *panthor_exception_name(struct panthor_device *ptdev,
+				   u32 exception_code);
+
+/**
+ * PANTHOR_IRQ_HANDLER() - Define interrupt handlers and the interrupt
+ * registration function.
+ *
+ * The boiler-plate to gracefully deal with shared interrupts is
+ * auto-generated. All you have to do is call PANTHOR_IRQ_HANDLER()
+ * just after you actual handler. The handler prototype is:
+ *
+ * void (*handler)(struct panthor_device *, u32 status);
+ */
+#define PANTHOR_IRQ_HANDLER(__name, __reg_prefix, __handler)					\
+static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data)			\
+{												\
+	struct panthor_irq *pirq = data;							\
+	struct panthor_device *ptdev = pirq->ptdev;						\
+												\
+	if (!gpu_read(ptdev, __reg_prefix ## _INT_STAT))					\
+		return IRQ_NONE;								\
+												\
+	gpu_write(ptdev, __reg_prefix ## _INT_MASK, 0);						\
+	return IRQ_WAKE_THREAD;									\
+}												\
+												\
+static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *data)		\
+{												\
+	struct panthor_irq *pirq = data;							\
+	struct panthor_device *ptdev = pirq->ptdev;						\
+	irqreturn_t ret = IRQ_NONE;								\
+												\
+	while (true) {										\
+		u32 status = gpu_read(ptdev, __reg_prefix ## _INT_RAWSTAT) & pirq->mask;	\
+												\
+		if (!status)									\
+			break;									\
+												\
+		gpu_write(ptdev, __reg_prefix ## _INT_CLEAR, status);				\
+												\
+		__handler(ptdev, status);							\
+		ret = IRQ_HANDLED;								\
+	}											\
+												\
+	if (!atomic_read(&pirq->suspended))							\
+		gpu_write(ptdev, __reg_prefix ## _INT_MASK, pirq->mask);			\
+												\
+	return ret;										\
+}												\
+												\
+static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq)			\
+{												\
+	int cookie;										\
+												\
+	atomic_set(&pirq->suspended, true);							\
+												\
+	if (drm_dev_enter(&pirq->ptdev->base, &cookie)) {					\
+		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0);				\
+		synchronize_irq(pirq->irq);							\
+		drm_dev_exit(cookie);								\
+	}											\
+												\
+	pirq->mask = 0;										\
+}												\
+												\
+static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask)	\
+{												\
+	int cookie;										\
+												\
+	atomic_set(&pirq->suspended, false);							\
+	pirq->mask = mask;									\
+												\
+	if (drm_dev_enter(&pirq->ptdev->base, &cookie)) {					\
+		gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask);			\
+		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, mask);			\
+		drm_dev_exit(cookie);								\
+	}											\
+}												\
+												\
+static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev,			\
+					      struct panthor_irq *pirq,				\
+					      int irq, u32 mask)				\
+{												\
+	pirq->ptdev = ptdev;									\
+	pirq->irq = irq;									\
+	panthor_ ## __name ## _irq_resume(pirq, mask);						\
+												\
+	return devm_request_threaded_irq(ptdev->base.dev, irq,					\
+					 panthor_ ## __name ## _irq_raw_handler,		\
+					 panthor_ ## __name ## _irq_threaded_handler,		\
+					 IRQF_SHARED, KBUILD_MODNAME "-" # __name,		\
+					 pirq);							\
+}
+
+extern struct workqueue_struct *panthor_cleanup_wq;
+
+#endif