diff mbox series

[v5,08/17] drm/imagination: Add GEM and VM related code

Message ID 20230816082531.164695-9-sarah.walker@imgtec.com (mailing list archive)
State New, archived
Headers show
Series Imagination Technologies PowerVR DRM driver | expand

Commit Message

Sarah Walker Aug. 16, 2023, 8:25 a.m. UTC
Add a GEM implementation based on drm_gem_shmem, and support code for the
PowerVR GPU MMU. The GPU VA manager is used for address space management.

Changes since v4:
- Correct sync function in vmap/vunmap function documentation
- Update for upstream GPU VA manager
- Fix missing frees when unmapping drm_gpuva objects
- Always zero GEM BOs on creation

Changes since v3:
- Split MMU and VM code
- Register page table allocations with kmemleak
- Use drm_dev_{enter,exit}

Changes since v2:
- Use GPU VA manager
- Use drm_gem_shmem

Co-developed-by: Matt Coster <matt.coster@imgtec.com>
Signed-off-by: Matt Coster <matt.coster@imgtec.com>
Co-developed-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
---
 drivers/gpu/drm/imagination/Makefile     |    5 +-
 drivers/gpu/drm/imagination/pvr_device.c |   23 +-
 drivers/gpu/drm/imagination/pvr_device.h |   18 +
 drivers/gpu/drm/imagination/pvr_drv.c    |  302 ++-
 drivers/gpu/drm/imagination/pvr_gem.c    |  396 ++++
 drivers/gpu/drm/imagination/pvr_gem.h    |  177 ++
 drivers/gpu/drm/imagination/pvr_mmu.c    | 2487 ++++++++++++++++++++++
 drivers/gpu/drm/imagination/pvr_mmu.h    |  108 +
 drivers/gpu/drm/imagination/pvr_vm.c     |  890 ++++++++
 drivers/gpu/drm/imagination/pvr_vm.h     |   60 +
 10 files changed, 4455 insertions(+), 11 deletions(-)
 create mode 100644 drivers/gpu/drm/imagination/pvr_gem.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_gem.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_mmu.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_mmu.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_vm.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_vm.h

Comments

Jann Horn Aug. 17, 2023, 10:42 p.m. UTC | #1
Hi!

Thanks, I think it's great that Imagination is writing an upstream
driver for PowerVR. :)

On Wed, Aug 16, 2023 at 10:25 AM Sarah Walker <sarah.walker@imgtec.com> wrote:
> Add a GEM implementation based on drm_gem_shmem, and support code for the
> PowerVR GPU MMU. The GPU VA manager is used for address space management.
[...]
> +/**
> + * DOC: Flags for DRM_IOCTL_PVR_CREATE_BO (kernel-only)
> + *
> + * Kernel-only values allowed in &pvr_gem_object->flags. The majority of options
> + * for this field are specified in the UAPI header "pvr_drm.h" with a
> + * DRM_PVR_BO_ prefix. To distinguish these internal options (which must exist
> + * in ranges marked as "reserved" in the UAPI header), we drop the DRM prefix.
> + * The public options should be used directly, DRM prefix and all.
> + *
> + * To avoid potentially confusing gaps in the UAPI options, these kernel-only
> + * options are specified "in reverse", starting at bit 63.
> + *
> + * We use "reserved" to refer to bits defined here and not exposed in the UAPI.
> + * Bits not defined anywhere are "undefined".
> + *
> + * Creation options
> + *    These use the prefix PVR_BO_CREATE_.
> + *
> + *    *There are currently no kernel-only flags in this group.*
> + *
> + * Device mapping options
> + *    These use the prefix PVR_BO_DEVICE_.
> + *
> + *    *There are currently no kernel-only flags in this group.*
> + *
> + * CPU mapping options
> + *    These use the prefix PVR_BO_CPU_.
> + *
> + *    :CACHED: By default, all GEM objects are mapped write-combined on the
> + *       CPU. Set this flag to override this behaviour and map the object
> + *       cached.
> + */
> +#define PVR_BO_CPU_CACHED BIT_ULL(63)
> +
> +#define PVR_BO_FW_NO_CLEAR_ON_RESET BIT_ULL(62)
> +
> +/* Bits 62..3 are undefined. */
> +/* Bits 2..0 are defined in the UAPI. */
> +
> +/* Other utilities. */
> +#define PVR_BO_UNDEFINED_MASK GENMASK_ULL(61, 3)
> +#define PVR_BO_RESERVED_MASK (PVR_BO_UNDEFINED_MASK | GENMASK_ULL(63, 63))

In commit 1a9c568fb559 ("drm/imagination: Rework firmware object
initialisation") in powervr-next, PVR_BO_FW_NO_CLEAR_ON_RESET (bit 62)
was added in the kernel-only flags group, but the mask
PVR_BO_RESERVED_MASK (which is used in pvr_ioctl_create_bo to detect
kernel-only and reserved flags) looks like it wasn't changed to
include bit 62. I think that means it becomes possible for userspace
to pass this bit in via pvr_ioctl_create_bo()?
If my understanding is correct and that was unintentional, it might be
a good idea to change these defines:

#define PVR_BO_UNDEFINED_MASK GENMASK_ULL(61, 3)
#define PVR_BO_RESERVED_MASK (PVR_BO_UNDEFINED_MASK | GENMASK_ULL(63, 63))

into something like this to avoid future mishaps like this:

/* first bit that is not used for UAPI BO options */
#define PVR_BO_FIRST_RESERVED_BIT 3
#define PVR_BO_UNDEFINED_MASK GENMASK_ULL(61, PVR_BO_FIRST_RESERVED_BIT)
#define PVR_BO_RESERVED_MASK GENMASK_ULL(63, PVR_BO_FIRST_RESERVED_BIT)

> +
> +/*
> + * All firmware-mapped memory uses (mostly) the same flags. Specifically,
> + * firmware-mapped memory should be:
> + *  * Read/write on the device,
> + *  * Read/write on the CPU, and
> + *  * Write-combined on the CPU.
> + *
> + * The only variation is in caching on the device.
> + */
> +#define PVR_BO_FW_FLAGS_DEVICE_CACHED (ULL(0))
> +#define PVR_BO_FW_FLAGS_DEVICE_UNCACHED DRM_PVR_BO_DEVICE_BYPASS_CACHE
[...]
> +/**
> + * pvr_page_table_l2_entry_raw_set() - Write a valid entry into a raw level 2
> + *                                     page table.
> + * @entry: Target raw level 2 page table entry.
> + * @child_table_dma_addr: DMA address of the level 1 page table to be
> + *                        associated with @entry.
> + *
> + * When calling this function, @child_table_dma_addr must be a valid DMA
> + * address and a multiple of %ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSIZE.
> + */
> +static void
> +pvr_page_table_l2_entry_raw_set(struct pvr_page_table_l2_entry_raw *entry,
> +                               dma_addr_t child_table_dma_addr)
> +{
> +       child_table_dma_addr >>= ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSHIFT;
> +
> +       entry->val =
> +               PVR_PAGE_TABLE_FIELD_PREP(2, PC, VALID, true) |
> +               PVR_PAGE_TABLE_FIELD_PREP(2, PC, ENTRY_PENDING, false) |
> +               PVR_PAGE_TABLE_FIELD_PREP(2, PC, PD_BASE, child_table_dma_addr);
> +}

For this function and others that manipulate page table entries,
please use some kernel helper that ensures that the store can't tear
(at least WRITE_ONCE() - that can still tear on 32-bit, but I see the
driver depends on ARM64, so that's not a problem).

[...]
> +/**
> + * pvr_page_table_l2_insert() - Insert an entry referring to a level 1 page
> + * table into a level 2 page table.
> + * @op_ctx: Target MMU op context pointing at the entry to insert the L1 page
> + * table into.
> + * @child_table: Target level 1 page table to be referenced by the new entry.
> + *
> + * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
> + * valid L2 entry.
> + */
> +static void
> +pvr_page_table_l2_insert(struct pvr_mmu_op_context *op_ctx,
> +                        struct pvr_page_table_l1 *child_table)
> +{
> +       struct pvr_page_table_l2 *l2_table =
> +               &op_ctx->mmu_ctx->page_table_l2;
> +       struct pvr_page_table_l2_entry_raw *entry_raw =
> +               pvr_page_table_l2_get_entry_raw(l2_table,
> +                                               op_ctx->curr_page.l2_idx);
> +
> +       pvr_page_table_l2_entry_raw_set(entry_raw,
> +                                       child_table->backing_page.dma_addr);

Can you maybe add comments in functions that set page table entries to
document who is responsible for using a memory barrier (like wmb()) to
ensure that the creation of a page table entry is ordered after the
thing it points to is fully initialized, so that the GPU can't end up
concurrently walking into a page table and observe its old contents
from before it was zero-initialized?

> +
> +       child_table->parent = l2_table;
> +       child_table->parent_idx = op_ctx->curr_page.l2_idx;
> +       l2_table->entries[op_ctx->curr_page.l2_idx] = child_table;
> +       ++l2_table->entry_count;
> +       op_ctx->curr_page.l1_table = child_table;
> +}
[...]
> +/**
> + * pvr_page_table_l1_get_or_insert() - Retrieves (optionally inserting if
> + * necessary) a level 1 page table from the specified level 2 page table entry.
> + * @op_ctx: Target MMU op context.
> + * @should_insert: [IN] Specifies whether new page tables should be inserted
> + * when empty page table entries are encountered during traversal.
> + *
> + * Return:
> + *  * 0 on success, or
> + *
> + *    If @should_insert is %false:
> + *     * -%ENXIO if a level 1 page table would have been inserted.
> + *
> + *    If @should_insert is %true:
> + *     * Any error encountered while inserting the level 1 page table.
> + */
> +static int
> +pvr_page_table_l1_get_or_insert(struct pvr_mmu_op_context *op_ctx,
> +                               bool should_insert)
> +{
> +       struct pvr_page_table_l2 *l2_table =
> +               &op_ctx->mmu_ctx->page_table_l2;
> +       struct pvr_page_table_l1 *table;
> +       int err;
> +
> +       if (pvr_page_table_l2_entry_is_valid(l2_table,
> +                                            op_ctx->curr_page.l2_idx)) {
> +               op_ctx->curr_page.l1_table =
> +                       l2_table->entries[op_ctx->curr_page.l2_idx];
> +               return 0;
> +       }
> +
> +       if (!should_insert)
> +               return -ENXIO;
> +
> +       /* Take a prealloced table. */
> +       table = op_ctx->l1_free_tables;
> +       if (!table)
> +               return -ENOMEM;
> +
> +       err = pvr_page_table_l1_init(table, op_ctx->mmu_ctx->pvr_dev);

I think when we have a preallocated table here, it was allocated in
pvr_page_table_l1_alloc(), which already called
pvr_page_table_l1_init()? So it looks to me like this second
pvr_page_table_l1_init() call will allocate another page and leak the
old allocation.

> +       if (err)
> +               return err;
> +
> +       /* Pop */
> +       op_ctx->l1_free_tables = table->next_free;
> +       table->next_free = NULL;
> +
> +       pvr_page_table_l2_insert(op_ctx, table);
> +
> +       return 0;
> +}
[...]
> +/**
> + * pvr_mmu_op_context_create() - Create an MMU op context.
> + * @ctx: MMU context associated with owning VM context.
> + * @sgt: Scatter gather table containing pages pinned for use by this context.
> + * @sgt_offset: Start offset of the requested device-virtual memory mapping.
> + * @size: Size in bytes of the requested device-virtual memory mapping. For an
> + * unmapping, this should be zero so that no page tables are allocated.
> + *
> + * Returns:
> + *  * Newly created MMU op context object on success, or
> + *  * -%ENOMEM if no memory is available,
> + *  * Any error code returned by pvr_page_table_l2_init().
> + */
> +struct pvr_mmu_op_context *
> +pvr_mmu_op_context_create(struct pvr_mmu_context *ctx, struct sg_table *sgt,
> +                         u64 sgt_offset, u64 size)
> +{
> +       int err;
> +
> +       struct pvr_mmu_op_context *op_ctx =
> +               kzalloc(sizeof(*op_ctx), GFP_KERNEL);
> +
> +       if (!op_ctx)
> +               return ERR_PTR(-ENOMEM);
> +
> +       op_ctx->mmu_ctx = ctx;
> +       op_ctx->map.sgt = sgt;
> +       op_ctx->map.sgt_offset = sgt_offset;
> +       op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
> +
> +       if (size) {
> +               const u32 l1_start_idx = pvr_page_table_l2_idx(sgt_offset);
> +               const u32 l1_end_idx = pvr_page_table_l2_idx(sgt_offset + size);
> +               const u32 l1_count = l1_end_idx - l1_start_idx + 1;
> +               const u32 l0_start_idx = pvr_page_table_l1_idx(sgt_offset);
> +               const u32 l0_end_idx = pvr_page_table_l1_idx(sgt_offset + size);
> +               const u32 l0_count = l0_end_idx - l0_start_idx + 1;

Shouldn't the page table indices be calculated from the device_addr
(which is not currently passed in by pvr_vm_map())? As far as I can
tell, sgt_offset doesn't have anything to do with the device address
at which this mapping will be inserted in the page tables?

> +
> +               /*
> +                * Alloc and push page table entries until we have enough of
> +                * each type, ending with linked lists of l0 and l1 entries in
> +                * reverse order.
> +                */
> +               for (int i = 0; i < l1_count; i++) {
> +                       struct pvr_page_table_l1 *l1_tmp =
> +                               pvr_page_table_l1_alloc(ctx);
> +
> +                       err = PTR_ERR_OR_ZERO(l1_tmp);
> +                       if (err)
> +                               goto err_cleanup;
> +
> +                       l1_tmp->next_free = op_ctx->l1_free_tables;
> +                       op_ctx->l1_free_tables = l1_tmp;
> +               }
> +
> +               for (int i = 0; i < l0_count; i++) {
> +                       struct pvr_page_table_l0 *l0_tmp =
> +                               pvr_page_table_l0_alloc(ctx);
> +
> +                       err = PTR_ERR_OR_ZERO(l0_tmp);
> +                       if (err)
> +                               goto err_cleanup;
> +
> +                       l0_tmp->next_free = op_ctx->l0_free_tables;
> +                       op_ctx->l0_free_tables = l0_tmp;
> +               }
> +       }
> +
> +       return op_ctx;
> +
> +err_cleanup:
> +       pvr_mmu_op_context_destroy(op_ctx);

> +
> +       return ERR_PTR(err);
> +}
Sarah Walker Aug. 18, 2023, 2:19 p.m. UTC | #2
On Fri, 2023-08-18 at 00:42 +0200, Jann Horn wrote:
> *** CAUTION: This email originates from a source not known to Imagination Technologies. Think before you click a link or open an attachment ***
> 
> Hi!
> 
> Thanks, I think it's great that Imagination is writing an upstream
> driver for PowerVR. :)
> 
> On Wed, Aug 16, 2023 at 10:25 AM Sarah Walker <sarah.walker@imgtec.com> wrote:
> > +#define PVR_BO_CPU_CACHED BIT_ULL(63)
> > +
> > +#define PVR_BO_FW_NO_CLEAR_ON_RESET BIT_ULL(62)
> > +
> > +/* Bits 62..3 are undefined. */
> > +/* Bits 2..0 are defined in the UAPI. */
> > +
> > +/* Other utilities. */
> > +#define PVR_BO_UNDEFINED_MASK GENMASK_ULL(61, 3)
> > +#define PVR_BO_RESERVED_MASK (PVR_BO_UNDEFINED_MASK | GENMASK_ULL(63, 63))
> 
> In commit 1a9c568fb559 ("drm/imagination: Rework firmware object
> initialisation") in powervr-next, PVR_BO_FW_NO_CLEAR_ON_RESET (bit 62)
> was added in the kernel-only flags group, but the mask
> PVR_BO_RESERVED_MASK (which is used in pvr_ioctl_create_bo to detect
> kernel-only and reserved flags) looks like it wasn't changed to
> include bit 62. I think that means it becomes possible for userspace
> to pass this bit in via pvr_ioctl_create_bo()?

Yes, this is a bug. Will fix (and refactor a bit).

> > +/**
> > + * pvr_page_table_l2_entry_raw_set() - Write a valid entry into a raw level 2
> > + *                                     page table.
> > + * @entry: Target raw level 2 page table entry.
> > + * @child_table_dma_addr: DMA address of the level 1 page table to be
> > + *                        associated with @entry.
> > + *
> > + * When calling this function, @child_table_dma_addr must be a valid DMA
> > + * address and a multiple of %ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSIZE.
> > + */
> > +static void
> > +pvr_page_table_l2_entry_raw_set(struct pvr_page_table_l2_entry_raw *entry,
> > +                               dma_addr_t child_table_dma_addr)
> > +{
> > +       child_table_dma_addr >>= ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSHIFT;
> > +
> > +       entry->val =
> > +               PVR_PAGE_TABLE_FIELD_PREP(2, PC, VALID, true) |
> > +               PVR_PAGE_TABLE_FIELD_PREP(2, PC, ENTRY_PENDING, false) |
> > +               PVR_PAGE_TABLE_FIELD_PREP(2, PC, PD_BASE, child_table_dma_addr);
> > +}
> 
> For this function and others that manipulate page table entries,
> please use some kernel helper that ensures that the store can't tear
> (at least WRITE_ONCE() - that can still tear on 32-bit, but I see the
> driver depends on ARM64, so that's not a problem).

Will do.

> > +/**
> > + * pvr_page_table_l2_insert() - Insert an entry referring to a level 1 page
> > + * table into a level 2 page table.
> > + * @op_ctx: Target MMU op context pointing at the entry to insert the L1 page
> > + * table into.
> > + * @child_table: Target level 1 page table to be referenced by the new entry.
> > + *
> > + * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
> > + * valid L2 entry.
> > + */
> > +static void
> > +pvr_page_table_l2_insert(struct pvr_mmu_op_context *op_ctx,
> > +                        struct pvr_page_table_l1 *child_table)
> > +{
> > +       struct pvr_page_table_l2 *l2_table =
> > +               &op_ctx->mmu_ctx->page_table_l2;
> > +       struct pvr_page_table_l2_entry_raw *entry_raw =
> > +               pvr_page_table_l2_get_entry_raw(l2_table,
> > +                                               op_ctx->curr_page.l2_idx);
> > +
> > +       pvr_page_table_l2_entry_raw_set(entry_raw,
> > +                                       child_table->backing_page.dma_addr);
> 
> Can you maybe add comments in functions that set page table entries to
> document who is responsible for using a memory barrier (like wmb()) to
> ensure that the creation of a page table entry is ordered after the
> thing it points to is fully initialized, so that the GPU can't end up
> concurrently walking into a page table and observe its old contents
> from before it was zero-initialized?

Will do.

> > +static int
> > +pvr_page_table_l1_get_or_insert(struct pvr_mmu_op_context *op_ctx,
> > +                               bool should_insert)
> > +{
> > +       struct pvr_page_table_l2 *l2_table =
> > +               &op_ctx->mmu_ctx->page_table_l2;
> > +       struct pvr_page_table_l1 *table;
> > +       int err;
> > +
> > +       if (pvr_page_table_l2_entry_is_valid(l2_table,
> > +                                            op_ctx->curr_page.l2_idx)) {
> > +               op_ctx->curr_page.l1_table =
> > +                       l2_table->entries[op_ctx->curr_page.l2_idx];
> > +               return 0;
> > +       }
> > +
> > +       if (!should_insert)
> > +               return -ENXIO;
> > +
> > +       /* Take a prealloced table. */
> > +       table = op_ctx->l1_free_tables;
> > +       if (!table)
> > +               return -ENOMEM;
> > +
> > +       err = pvr_page_table_l1_init(table, op_ctx->mmu_ctx->pvr_dev);
> 
> I think when we have a preallocated table here, it was allocated in
> pvr_page_table_l1_alloc(), which already called
> pvr_page_table_l1_init()? So it looks to me like this second
> pvr_page_table_l1_init() call will allocate another page and leak the
> old allocation.

Yes, this is also a bug. Will address.

> +/**
> > + * pvr_mmu_op_context_create() - Create an MMU op context.
> > + * @ctx: MMU context associated with owning VM context.
> > + * @sgt: Scatter gather table containing pages pinned for use by this context.
> > + * @sgt_offset: Start offset of the requested device-virtual memory mapping.
> > + * @size: Size in bytes of the requested device-virtual memory mapping. For an
> > + * unmapping, this should be zero so that no page tables are allocated.
> > + *
> > + * Returns:
> > + *  * Newly created MMU op context object on success, or
> > + *  * -%ENOMEM if no memory is available,
> > + *  * Any error code returned by pvr_page_table_l2_init().
> > + */
> > +struct pvr_mmu_op_context *
> > +pvr_mmu_op_context_create(struct pvr_mmu_context *ctx, struct sg_table *sgt,
> > +                         u64 sgt_offset, u64 size)
> > +{
> > +       int err;
> > +
> > +       struct pvr_mmu_op_context *op_ctx =
> > +               kzalloc(sizeof(*op_ctx), GFP_KERNEL);
> > +
> > +       if (!op_ctx)
> > +               return ERR_PTR(-ENOMEM);
> > +
> > +       op_ctx->mmu_ctx = ctx;
> > +       op_ctx->map.sgt = sgt;
> > +       op_ctx->map.sgt_offset = sgt_offset;
> > +       op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
> > +
> > +       if (size) {
> > +               const u32 l1_start_idx = pvr_page_table_l2_idx(sgt_offset);
> > +               const u32 l1_end_idx = pvr_page_table_l2_idx(sgt_offset + size);
> > +               const u32 l1_count = l1_end_idx - l1_start_idx + 1;
> > +               const u32 l0_start_idx = pvr_page_table_l1_idx(sgt_offset);
> > +               const u32 l0_end_idx = pvr_page_table_l1_idx(sgt_offset + size);
> > +               const u32 l0_count = l0_end_idx - l0_start_idx + 1;
> 
> Shouldn't the page table indices be calculated from the device_addr
> (which is not currently passed in by pvr_vm_map())? As far as I can
> tell, sgt_offset doesn't have anything to do with the device address
> at which this mapping will be inserted in the page tables?

This code is correct, but badly documented; this function only cares about the
number of l0/l1 pages required, not the address. Will improve it to make less
confusing.

Thanks,
Sarah
Danilo Krummrich Aug. 18, 2023, 3:30 p.m. UTC | #3
Hi Sarah,

On Wed, Aug 16, 2023 at 09:25:23AM +0100, Sarah Walker wrote:
> Add a GEM implementation based on drm_gem_shmem, and support code for the
> PowerVR GPU MMU. The GPU VA manager is used for address space management.
> 
> Changes since v4:
> - Correct sync function in vmap/vunmap function documentation
> - Update for upstream GPU VA manager
> - Fix missing frees when unmapping drm_gpuva objects
> - Always zero GEM BOs on creation
> 
> Changes since v3:
> - Split MMU and VM code
> - Register page table allocations with kmemleak
> - Use drm_dev_{enter,exit}
> 
> Changes since v2:
> - Use GPU VA manager
> - Use drm_gem_shmem
> 
> Co-developed-by: Matt Coster <matt.coster@imgtec.com>
> Signed-off-by: Matt Coster <matt.coster@imgtec.com>
> Co-developed-by: Donald Robson <donald.robson@imgtec.com>
> Signed-off-by: Donald Robson <donald.robson@imgtec.com>
> Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
> ---
>  drivers/gpu/drm/imagination/Makefile     |    5 +-
>  drivers/gpu/drm/imagination/pvr_device.c |   23 +-
>  drivers/gpu/drm/imagination/pvr_device.h |   18 +
>  drivers/gpu/drm/imagination/pvr_drv.c    |  302 ++-
>  drivers/gpu/drm/imagination/pvr_gem.c    |  396 ++++
>  drivers/gpu/drm/imagination/pvr_gem.h    |  177 ++
>  drivers/gpu/drm/imagination/pvr_mmu.c    | 2487 ++++++++++++++++++++++
>  drivers/gpu/drm/imagination/pvr_mmu.h    |  108 +
>  drivers/gpu/drm/imagination/pvr_vm.c     |  890 ++++++++
>  drivers/gpu/drm/imagination/pvr_vm.h     |   60 +
>  10 files changed, 4455 insertions(+), 11 deletions(-)
>  create mode 100644 drivers/gpu/drm/imagination/pvr_gem.c
>  create mode 100644 drivers/gpu/drm/imagination/pvr_gem.h
>  create mode 100644 drivers/gpu/drm/imagination/pvr_mmu.c
>  create mode 100644 drivers/gpu/drm/imagination/pvr_mmu.h
>  create mode 100644 drivers/gpu/drm/imagination/pvr_vm.c
>  create mode 100644 drivers/gpu/drm/imagination/pvr_vm.h

<snip>

> diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c
> new file mode 100644
> index 000000000000..616fad3a3325
> --- /dev/null
> +++ b/drivers/gpu/drm/imagination/pvr_vm.c
> @@ -0,0 +1,890 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright (c) 2023 Imagination Technologies Ltd. */
> +
> +#include "pvr_vm.h"
> +
> +#include "pvr_device.h"
> +#include "pvr_drv.h"
> +#include "pvr_gem.h"
> +#include "pvr_mmu.h"
> +#include "pvr_rogue_fwif.h"
> +#include "pvr_rogue_heap_config.h"
> +
> +#include <drm/drm_gem.h>
> +#include <drm/drm_gpuva_mgr.h>
> +
> +#include <linux/container_of.h>
> +#include <linux/err.h>
> +#include <linux/errno.h>
> +#include <linux/gfp_types.h>
> +#include <linux/kref.h>
> +#include <linux/mutex.h>
> +#include <linux/stddef.h>
> +
> +/**
> + * DOC: Memory context
> + *
> + * This is the "top level" datatype in the VM code. It's exposed in the public
> + * API as an opaque handle.
> + */
> +
> +/**
> + * struct pvr_vm_context - Context type which encapsulates an entire page table
> + * tree structure.
> + * @pvr_dev: The PowerVR device to which this context is bound.
> + *
> + * This binding is immutable for the life of the context.
> + * @mmu_ctx: The context for binding to physical memory.
> + * @gpuva_mgr: GPUVA manager object associated with this context.
> + * @lock: Global lock on this entire structure of page tables.
> + * @fw_mem_ctx_obj: Firmware object representing firmware memory context.
> + * @ref_count: Reference count of object.
> + */
> +struct pvr_vm_context {
> +	struct pvr_device *pvr_dev;
> +	struct pvr_mmu_context *mmu_ctx;
> +	struct drm_gpuva_manager gpuva_mgr;
> +	struct mutex lock;
> +	struct pvr_fw_object *fw_mem_ctx_obj;
> +	struct kref ref_count;
> +};
> +
> +/**
> + * pvr_vm_get_page_table_root_addr() - Get the DMA address of the root of the
> + *                                     page table structure behind a VM context.
> + * @vm_ctx: Target VM context.
> + */
> +dma_addr_t pvr_vm_get_page_table_root_addr(struct pvr_vm_context *vm_ctx)
> +{
> +	return pvr_mmu_get_root_table_dma_addr(vm_ctx->mmu_ctx);
> +}
> +
> +/**
> + * DOC: Memory mappings
> + */
> +
> +/**
> + * pvr_vm_gpuva_mapping_init() - Setup a mapping object with the specified
> + * parameters ready for mapping using pvr_vm_gpuva_mapping_map().
> + * @va: Pointer to drm_gpuva mapping object.
> + * @device_addr: Device-virtual address at the start of the mapping.
> + * @size: Size of the desired mapping.
> + * @pvr_obj: Target PowerVR memory object.
> + * @pvr_obj_offset: Offset into @pvr_obj to begin mapping from.
> + *
> + * Some parameters of this function are unchecked. It is therefore the callers
> + * responsibility to ensure certain constraints are met. Specifically:
> + *
> + * * @pvr_obj_offset must be less than the size of @pvr_obj,
> + * * The sum of @pvr_obj_offset and @size must be less than or equal to the
> + *   size of @pvr_obj,
> + * * The range specified by @pvr_obj_offset and @size (the "CPU range") must be
> + *   CPU page-aligned both in start position and size, and
> + * * The range specified by @device_addr and @size (the "device range") must be
> + *   device page-aligned both in start position and size.
> + *
> + * Furthermore, it is up to the caller to make sure that a reference to @pvr_obj
> + * is taken prior to mapping @va with the drm_gpuva_manager.
> + */
> +static void
> +pvr_vm_gpuva_mapping_init(struct drm_gpuva *va, u64 device_addr, u64 size,
> +			  struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset)

There's already drm_gpuva_init() doing the same thing.

> +{
> +	va->va.addr = device_addr;
> +	va->va.range = size;
> +	va->gem.obj = gem_from_pvr_gem(pvr_obj);
> +	va->gem.offset = pvr_obj_offset;
> +}
> +
> +struct pvr_vm_gpuva_op_ctx {
> +	struct pvr_vm_context *vm_ctx;
> +	struct pvr_mmu_op_context *mmu_op_ctx;
> +	struct drm_gpuva *new_va, *prev_va, *next_va;
> +};
> +
> +/**
> + * pvr_vm_gpuva_map() - Insert a mapping into a memory context.
> + * @op: gpuva op containing the remap details.
> + * @op_ctx: Operation context.
> + *
> + * Context: Called by drm_gpuva_sm_map following a successful mapping while
> + * @op_ctx.vm_ctx mutex is held.
> + *
> + * Return:
> + *  * 0 on success, or
> + *  * Any error returned by pvr_mmu_map().
> + */
> +static int
> +pvr_vm_gpuva_map(struct drm_gpuva_op *op, void *op_ctx)
> +{
> +	struct pvr_gem_object *pvr_gem = gem_to_pvr_gem(op->map.gem.obj);
> +	struct pvr_vm_gpuva_op_ctx *ctx = op_ctx;
> +	int err;
> +
> +	if ((op->map.gem.offset | op->map.va.range) & ~PVR_DEVICE_PAGE_MASK)
> +		return -EINVAL;
> +
> +	err = pvr_mmu_map(ctx->mmu_op_ctx, op->map.va.range, pvr_gem->flags,
> +			  op->map.va.addr);
> +	if (err)
> +		return err;
> +
> +	pvr_vm_gpuva_mapping_init(ctx->new_va, op->map.va.addr,
> +				  op->map.va.range, pvr_gem, op->map.gem.offset);
> +
> +	drm_gpuva_map(&ctx->vm_ctx->gpuva_mgr, ctx->new_va, &op->map);

drm_gpuva_map() does use drm_gpuva_init_from_op() internally, hence the extra
call to pvr_vm_gpuva_mapping_init() should be unnecessary.

> +	drm_gpuva_link(ctx->new_va);

How is this protected?

drm_gpuva_link() and drm_gpuva_unlink() require either the dma_resv lock of the
corresponding GEM object being held or, alternatively, the driver specific lock
indicated via drm_gem_gpuva_set_lock().

> +	ctx->new_va = NULL;
> +
> +	/*
> +	 * Increment the refcount on the underlying physical memory resource
> +	 * to prevent de-allocation while the mapping exists.
> +	 */
> +	pvr_gem_object_get(pvr_gem);
> +
> +	return 0;
> +}
> +
> +/**
> + * pvr_vm_gpuva_unmap() - Remove a mapping from a memory context.
> + * @op: gpuva op containing the unmap details.
> + * @op_ctx: Operation context.
> + *
> + * Context: Called by drm_gpuva_sm_unmap following a successful unmapping while
> + * @op_ctx.vm_ctx mutex is held.
> + *
> + * Return:
> + *  * 0 on success, or
> + *  * Any error returned by pvr_mmu_unmap().
> + */
> +static int
> +pvr_vm_gpuva_unmap(struct drm_gpuva_op *op, void *op_ctx)
> +{
> +	struct pvr_gem_object *pvr_gem = gem_to_pvr_gem(op->unmap.va->gem.obj);
> +	struct pvr_vm_gpuva_op_ctx *ctx = op_ctx;
> +
> +	int err = pvr_mmu_unmap(ctx->mmu_op_ctx, op->unmap.va->va.addr,
> +				op->unmap.va->va.range);
> +
> +	if (err)
> +		return err;
> +
> +	drm_gpuva_unmap(&op->unmap);
> +	drm_gpuva_unlink(op->unmap.va);
> +	kfree(op->unmap.va);
> +
> +	pvr_gem_object_put(pvr_gem);
> +
> +	return 0;
> +}
> +
> +/**
> + * pvr_vm_gpuva_remap() - Remap a mapping within a memory context.
> + * @op: gpuva op containing the remap details.
> + * @op_ctx: Operation context.
> + *
> + * Context: Called by either drm_gpuva_sm_map or drm_gpuva_sm_unmap when a
> + * mapping or unmapping operation causes a region to be split. The
> + * @op_ctx.vm_ctx mutex is held.
> + *
> + * Return:
> + *  * 0 on success, or
> + *  * Any error returned by pvr_vm_gpuva_unmap() or pvr_vm_gpuva_unmap().
> + */
> +static int
> +pvr_vm_gpuva_remap(struct drm_gpuva_op *op, void *op_ctx)
> +{
> +	struct pvr_vm_gpuva_op_ctx *ctx = op_ctx;
> +
> +	if (op->remap.unmap) {

You can omit this check, remap operations always contain a valid unmap
operation. However, you might want to know whether the remap operation was
generated due to a call to drm_gpuva_sm_map() or drm_gpuva_sm_unmap(), since for
the latter you might want to free page table structures.

> +		const u64 va_start = op->remap.prev ?
> +				     op->remap.prev->va.addr + op->remap.prev->va.range :
> +				     op->remap.unmap->va->va.addr;
> +		const u64 va_end = op->remap.next ?
> +				   op->remap.next->va.addr :
> +				   op->remap.unmap->va->va.addr + op->remap.unmap->va->va.range;

This seems to be a common calculation for drivers, it is probably worth to come
up with a helper, something like
drm_gpuva_op_unmap_range(struct drm_gpuva_op *op, u64 *addr, u64 *range).

> +
> +		int err = pvr_mmu_unmap(ctx->mmu_op_ctx, va_start,
> +					va_end - va_start);
> +
> +		if (err)
> +			return err;
> +	}
> +
> +	if (op->remap.prev)
> +		pvr_vm_gpuva_mapping_init(ctx->prev_va, op->remap.prev->va.addr,
> +					  op->remap.prev->va.range,
> +					  gem_to_pvr_gem(op->remap.prev->gem.obj),
> +					  op->remap.prev->gem.offset);
> +
> +	if (op->remap.next)
> +		pvr_vm_gpuva_mapping_init(ctx->next_va, op->remap.next->va.addr,
> +					  op->remap.next->va.range,
> +					  gem_to_pvr_gem(op->remap.next->gem.obj),
> +					  op->remap.next->gem.offset);
> +
> +	/* No actual remap required: the page table tree depth is fixed to 3,
> +	 * and we use 4k page table entries only for now.
> +	 */
> +	drm_gpuva_remap(ctx->prev_va, ctx->next_va, &op->remap);

As above, drm_gpuva_remap() does use drm_gpuva_init_from_op() internally, hence
the extra call to pvr_vm_gpuva_mapping_init() should be unnecessary.

> +
> +	if (op->remap.prev) {
> +		pvr_gem_object_get(gem_to_pvr_gem(ctx->prev_va->gem.obj));
> +		drm_gpuva_link(ctx->prev_va);
> +		ctx->prev_va = NULL;
> +	}
> +
> +	if (op->remap.next) {
> +		pvr_gem_object_get(gem_to_pvr_gem(ctx->next_va->gem.obj));
> +		drm_gpuva_link(ctx->next_va);
> +		ctx->next_va = NULL;
> +	}
> +
> +	if (op->remap.unmap) {

As above, no need for this check.

- Danilo

> +		struct pvr_gem_object *pvr_gem = gem_to_pvr_gem(op->remap.unmap->va->gem.obj);
> +
> +		drm_gpuva_unlink(op->unmap.va);
> +		kfree(op->unmap.va);
> +
> +		pvr_gem_object_put(pvr_gem);
> +	}
> +
> +	return 0;
> +}
> +
> +/*
> + * Public API
> + *
> + * For an overview of these functions, see *DOC: Public API* in "pvr_vm.h".
> + */
> +
> +/**
> + * pvr_device_addr_is_valid() - Tests whether a device-virtual address
> + *                              is valid.
> + * @device_addr: Virtual device address to test.
> + *
> + * Return:
> + *  * %true if @device_addr is within the valid range for a device page
> + *    table and is aligned to the device page size, or
> + *  * %false otherwise.
> + */
> +bool
> +pvr_device_addr_is_valid(u64 device_addr)
> +{
> +	return (device_addr & ~PVR_PAGE_TABLE_ADDR_MASK) == 0 &&
> +	       (device_addr & ~PVR_DEVICE_PAGE_MASK) == 0;
> +}
> +
> +/**
> + * pvr_device_addr_and_size_are_valid() - Tests whether a device-virtual
> + * address and associated size are both valid.
> + * @device_addr: Virtual device address to test.
> + * @size: Size of the range based at @device_addr to test.
> + *
> + * Calling pvr_device_addr_is_valid() twice (once on @size, and again on
> + * @device_addr + @size) to verify a device-virtual address range initially
> + * seems intuitive, but it produces a false-negative when the address range
> + * is right at the end of device-virtual address space.
> + *
> + * This function catches that corner case, as well as checking that
> + * @size is non-zero.
> + *
> + * Return:
> + *  * %true if @device_addr is device page aligned; @size is device page
> + *    aligned; the range specified by @device_addr and @size is within the
> + *    bounds of the device-virtual address space, and @size is non-zero, or
> + *  * %false otherwise.
> + */
> +bool
> +pvr_device_addr_and_size_are_valid(u64 device_addr, u64 size)
> +{
> +	return pvr_device_addr_is_valid(device_addr) &&
> +	       size != 0 && (size & ~PVR_DEVICE_PAGE_MASK) == 0 &&
> +	       (device_addr + size <= PVR_PAGE_TABLE_ADDR_SPACE_SIZE);
> +}
> +
> +static const struct drm_gpuva_fn_ops pvr_vm_gpuva_ops = {
> +	.sm_step_map = pvr_vm_gpuva_map,
> +	.sm_step_remap = pvr_vm_gpuva_remap,
> +	.sm_step_unmap = pvr_vm_gpuva_unmap,
> +};
> +
> +/**
> + * pvr_vm_create_context() - Create a new VM context.
> + * @pvr_dev: Target PowerVR device.
> + * @is_userspace_context: %true if this context is for userspace. This will
> + *                        create a firmware memory context for the VM context
> + *                        and disable warnings when tearing down mappings.
> + *
> + * Return:
> + *  * A handle to the newly-minted VM context on success,
> + *  * -%EINVAL if the feature "virtual address space bits" on @pvr_dev is
> + *    missing or has an unsupported value,
> + *  * -%ENOMEM if allocation of the structure behind the opaque handle fails,
> + *    or
> + *  * Any error encountered while setting up internal structures.
> + */
> +struct pvr_vm_context *
> +pvr_vm_create_context(struct pvr_device *pvr_dev, bool is_userspace_context)
> +{
> +	struct drm_device *drm_dev = from_pvr_device(pvr_dev);
> +
> +	struct pvr_vm_context *vm_ctx;
> +	u16 device_addr_bits;
> +
> +	int err;
> +
> +	err = PVR_FEATURE_VALUE(pvr_dev, virtual_address_space_bits,
> +				&device_addr_bits);
> +	if (err) {
> +		drm_err(drm_dev,
> +			"Failed to get device virtual address space bits\n");
> +		return ERR_PTR(err);
> +	}
> +
> +	if (device_addr_bits != PVR_PAGE_TABLE_ADDR_BITS) {
> +		drm_err(drm_dev,
> +			"Device has unsupported virtual address space size\n");
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	vm_ctx = kzalloc(sizeof(*vm_ctx), GFP_KERNEL);
> +	if (!vm_ctx)
> +		return ERR_PTR(-ENOMEM);
> +
> +	vm_ctx->pvr_dev = pvr_dev;
> +	kref_init(&vm_ctx->ref_count);
> +	mutex_init(&vm_ctx->lock);
> +
> +	drm_gpuva_manager_init(&vm_ctx->gpuva_mgr,
> +			       is_userspace_context ? "PowerVR-user-VM" : "PowerVR-FW-VM",
> +			       0, 1ULL << device_addr_bits, 0, 0, &pvr_vm_gpuva_ops);
> +
> +	vm_ctx->mmu_ctx = pvr_mmu_context_create(pvr_dev);
> +	err = PTR_ERR_OR_ZERO(&vm_ctx->mmu_ctx);
> +	if (err) {
> +		vm_ctx->mmu_ctx = NULL;
> +		goto err_put_ctx;
> +	}
> +
> +	if (is_userspace_context) {
> +		/* TODO: Create FW mem context */
> +		err = -ENODEV;
> +		goto err_put_ctx;
> +	}
> +
> +	return vm_ctx;
> +
> +err_put_ctx:
> +	pvr_vm_context_put(vm_ctx);
> +
> +	return ERR_PTR(err);
> +}
> +
> +/**
> + * pvr_vm_context_release() - Teardown a VM context.
> + * @ref_count: Pointer to reference counter of the VM context.
> + *
> + * This function ensures that no mappings are left dangling by unmapping them
> + * all in order of ascending device-virtual address.
> + */
> +static void
> +pvr_vm_context_release(struct kref *ref_count)
> +{
> +	struct pvr_vm_context *vm_ctx =
> +		container_of(ref_count, struct pvr_vm_context, ref_count);
> +
> +	/* TODO: Destroy FW mem context */
> +	WARN_ON(vm_ctx->fw_mem_ctx_obj);
> +
> +	WARN_ON(pvr_vm_unmap(vm_ctx, vm_ctx->gpuva_mgr.mm_start,
> +			     vm_ctx->gpuva_mgr.mm_range));
> +
> +	drm_gpuva_manager_destroy(&vm_ctx->gpuva_mgr);
> +	pvr_mmu_context_destroy(vm_ctx->mmu_ctx);
> +	mutex_destroy(&vm_ctx->lock);
> +
> +	kfree(vm_ctx);
> +}
> +
> +/**
> + * pvr_vm_context_lookup() - Look up VM context from handle
> + * @pvr_file: Pointer to pvr_file structure.
> + * @handle: Object handle.
> + *
> + * Takes reference on VM context object. Call pvr_vm_context_put() to release.
> + *
> + * Returns:
> + *  * The requested object on success, or
> + *  * %NULL on failure (object does not exist in list, or is not a VM context)
> + */
> +struct pvr_vm_context *
> +pvr_vm_context_lookup(struct pvr_file *pvr_file, u32 handle)
> +{
> +	struct pvr_vm_context *vm_ctx;
> +
> +	xa_lock(&pvr_file->vm_ctx_handles);
> +	vm_ctx = xa_load(&pvr_file->vm_ctx_handles, handle);
> +	if (vm_ctx)
> +		kref_get(&vm_ctx->ref_count);
> +
> +	xa_unlock(&pvr_file->vm_ctx_handles);
> +
> +	return vm_ctx;
> +}
> +
> +/**
> + * pvr_vm_context_put() - Release a reference on a VM context
> + * @vm_ctx: Target VM context.
> + *
> + * Returns:
> + *  * %true if the VM context was destroyed, or
> + *  * %false if there are any references still remaining.
> + */
> +bool
> +pvr_vm_context_put(struct pvr_vm_context *vm_ctx)
> +{
> +	WARN_ON(!vm_ctx);
> +
> +	if (vm_ctx)
> +		return kref_put(&vm_ctx->ref_count, pvr_vm_context_release);
> +
> +	return true;
> +}
> +
> +/**
> + * pvr_destroy_vm_contexts_for_file: Destroy any VM contexts associated with the
> + * given file.
> + * @pvr_file: Pointer to pvr_file structure.
> + *
> + * Removes all vm_contexts associated with @pvr_file from the device VM context
> + * list and drops initial references. vm_contexts will then be destroyed once
> + * all outstanding references are dropped.
> + */
> +void pvr_destroy_vm_contexts_for_file(struct pvr_file *pvr_file)
> +{
> +	struct pvr_vm_context *vm_ctx;
> +	unsigned long handle;
> +
> +	xa_for_each(&pvr_file->vm_ctx_handles, handle, vm_ctx) {
> +		/* vm_ctx is not used here because that would create a race with xa_erase */
> +		pvr_vm_context_put(xa_erase(&pvr_file->vm_ctx_handles, handle));
> +	}
> +}
> +
> +/**
> + * pvr_vm_map() - Map a section of physical memory into a section of device-virtual memory.
> + * @vm_ctx: Target VM context.
> + * @pvr_obj: Target PowerVR memory object.
> + * @pvr_obj_offset: Offset into @pvr_obj to map from.
> + * @device_addr: Virtual device address at the start of the requested mapping.
> + * @size: Size of the requested mapping.
> + *
> + * No handle is returned to represent the mapping. Instead, callers should
> + * remember @device_addr and use that as a handle.
> + *
> + * Return:
> + *  * 0 on success,
> + *  * -%EINVAL if @device_addr is not a valid page-aligned device-virtual
> + *    address; the region specified by @pvr_obj_offset and @size does not fall
> + *    entirely within @pvr_obj, or any part of the specified region of @pvr_obj
> + *    is not device-virtual page-aligned,
> + *  * Any error encountered while performing internal operations required to
> + *    destroy the mapping (returned from pvr_vm_gpuva_map or
> + *    pvr_vm_gpuva_remap).
> + */
> +int
> +pvr_vm_map(struct pvr_vm_context *vm_ctx,
> +	   struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset,
> +	   u64 device_addr, u64 size)
> +{
> +	const size_t pvr_obj_size = pvr_gem_object_size(pvr_obj);
> +	struct pvr_vm_gpuva_op_ctx op_ctx = { .vm_ctx = vm_ctx };
> +	struct sg_table *sgt;
> +	int err;
> +
> +	if (!pvr_device_addr_and_size_are_valid(device_addr, size) ||
> +	    pvr_obj_offset & ~PAGE_MASK || size & ~PAGE_MASK ||
> +	    pvr_obj_offset + size > pvr_obj_size ||
> +	    pvr_obj_offset > pvr_obj_size) {
> +		return -EINVAL;
> +	}
> +
> +	op_ctx.new_va = kzalloc(sizeof(*op_ctx.new_va), GFP_KERNEL);
> +	op_ctx.prev_va = kzalloc(sizeof(*op_ctx.prev_va), GFP_KERNEL);
> +	op_ctx.next_va = kzalloc(sizeof(*op_ctx.next_va), GFP_KERNEL);
> +	if (!op_ctx.new_va || !op_ctx.prev_va || !op_ctx.next_va) {
> +		err = -ENOMEM;
> +		goto out_free;
> +	}
> +
> +	sgt = pvr_gem_object_get_pages_sgt(pvr_obj);
> +	err = PTR_ERR_OR_ZERO(sgt);
> +	if (err)
> +		goto out_free;
> +
> +	op_ctx.mmu_op_ctx = pvr_mmu_op_context_create(vm_ctx->mmu_ctx, sgt,
> +						      pvr_obj_offset, size);
> +	err = PTR_ERR_OR_ZERO(op_ctx.mmu_op_ctx);
> +	if (err) {
> +		op_ctx.mmu_op_ctx = NULL;
> +		goto out_mmu_op_ctx_destroy;
> +	}
> +
> +	mutex_lock(&vm_ctx->lock);
> +	err = drm_gpuva_sm_map(&vm_ctx->gpuva_mgr, &op_ctx, device_addr, size,
> +			       gem_from_pvr_gem(pvr_obj), pvr_obj_offset);
> +	mutex_unlock(&vm_ctx->lock);
> +
> +out_mmu_op_ctx_destroy:
> +	pvr_mmu_op_context_destroy(op_ctx.mmu_op_ctx);
> +
> +out_free:
> +	kfree(op_ctx.next_va);
> +	kfree(op_ctx.prev_va);
> +	kfree(op_ctx.new_va);
> +
> +	return err;
> +}
> +
> +/**
> + * pvr_vm_unmap() - Unmap an already mapped section of device-virtual memory.
> + * @vm_ctx: Target VM context.
> + * @device_addr: Virtual device address at the start of the target mapping.
> + * @size: Size of the target mapping.
> + *
> + * Return:
> + *  * 0 on success,
> + *  * -%EINVAL if @device_addr is not a valid page-aligned device-virtual
> + *    address,
> + *  * Any error encountered while performing internal operations required to
> + *    destroy the mapping (returned from pvr_vm_gpuva_unmap or
> + *    pvr_vm_gpuva_remap).
> + */
> +int
> +pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size)
> +{
> +	struct pvr_vm_gpuva_op_ctx op_ctx = { .vm_ctx = vm_ctx };
> +	int err;
> +
> +	if (!pvr_device_addr_and_size_are_valid(device_addr, size))
> +		return -EINVAL;
> +
> +	op_ctx.prev_va = kzalloc(sizeof(*op_ctx.prev_va), GFP_KERNEL);
> +	op_ctx.next_va = kzalloc(sizeof(*op_ctx.next_va), GFP_KERNEL);
> +	if (!op_ctx.prev_va || !op_ctx.next_va) {
> +		err = -ENOMEM;
> +		goto out;
> +	}
> +
> +	op_ctx.mmu_op_ctx =
> +		pvr_mmu_op_context_create(vm_ctx->mmu_ctx, NULL, 0, 0);
> +	err = PTR_ERR_OR_ZERO(op_ctx.mmu_op_ctx);
> +	if (err) {
> +		op_ctx.mmu_op_ctx = NULL;
> +		goto out;
> +	}
> +
> +	mutex_lock(&vm_ctx->lock);
> +	err = drm_gpuva_sm_unmap(&vm_ctx->gpuva_mgr, &op_ctx, device_addr, size);
> +	mutex_unlock(&vm_ctx->lock);
> +
> +out:
> +	pvr_mmu_op_context_destroy(op_ctx.mmu_op_ctx);
> +	kfree(op_ctx.next_va);
> +	kfree(op_ctx.prev_va);
> +
> +	return err;
> +}
> +
> +/*
> + * Static data areas are determined by firmware.
> + *
> + * When adding a new static data area you will also need to update the reserved_size field for the
> + * heap in pvr_heaps[].
> + */
> +static const struct drm_pvr_static_data_area static_data_areas[] = {
> +	{
> +		.area_usage = DRM_PVR_STATIC_DATA_AREA_FENCE,
> +		.location_heap_id = DRM_PVR_HEAP_GENERAL,
> +		.offset = 0,
> +		.size = 128,
> +	},
> +	{
> +		.area_usage = DRM_PVR_STATIC_DATA_AREA_YUV_CSC,
> +		.location_heap_id = DRM_PVR_HEAP_GENERAL,
> +		.offset = 128,
> +		.size = 1024,
> +	},
> +	{
> +		.area_usage = DRM_PVR_STATIC_DATA_AREA_VDM_SYNC,
> +		.location_heap_id = DRM_PVR_HEAP_PDS_CODE_DATA,
> +		.offset = 0,
> +		.size = 128,
> +	},
> +	{
> +		.area_usage = DRM_PVR_STATIC_DATA_AREA_EOT,
> +		.location_heap_id = DRM_PVR_HEAP_PDS_CODE_DATA,
> +		.offset = 128,
> +		.size = 128,
> +	},
> +	{
> +		.area_usage = DRM_PVR_STATIC_DATA_AREA_VDM_SYNC,
> +		.location_heap_id = DRM_PVR_HEAP_USC_CODE,
> +		.offset = 0,
> +		.size = 128,
> +	},
> +};
> +
> +#define GET_RESERVED_SIZE(last_offset, last_size) round_up((last_offset) + (last_size), PAGE_SIZE)
> +
> +/*
> + * The values given to GET_RESERVED_SIZE() are taken from the last entry in the corresponding
> + * static data area for each heap.
> + */
> +static const struct drm_pvr_heap pvr_heaps[] = {
> +	[DRM_PVR_HEAP_GENERAL] = {
> +		.base = ROGUE_GENERAL_HEAP_BASE,
> +		.size = ROGUE_GENERAL_HEAP_SIZE,
> +		.flags = 0,
> +		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
> +	},
> +	[DRM_PVR_HEAP_PDS_CODE_DATA] = {
> +		.base = ROGUE_PDSCODEDATA_HEAP_BASE,
> +		.size = ROGUE_PDSCODEDATA_HEAP_SIZE,
> +		.flags = 0,
> +		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
> +	},
> +	[DRM_PVR_HEAP_USC_CODE] = {
> +		.base = ROGUE_USCCODE_HEAP_BASE,
> +		.size = ROGUE_USCCODE_HEAP_SIZE,
> +		.flags = 0,
> +		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
> +	},
> +	[DRM_PVR_HEAP_RGNHDR] = {
> +		.base = ROGUE_RGNHDR_HEAP_BASE,
> +		.size = ROGUE_RGNHDR_HEAP_SIZE,
> +		.flags = 0,
> +		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
> +	},
> +	[DRM_PVR_HEAP_VIS_TEST] = {
> +		.base = ROGUE_VISTEST_HEAP_BASE,
> +		.size = ROGUE_VISTEST_HEAP_SIZE,
> +		.flags = 0,
> +		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
> +	},
> +	[DRM_PVR_HEAP_TRANSFER_FRAG] = {
> +		.base = ROGUE_TRANSFER_FRAG_HEAP_BASE,
> +		.size = ROGUE_TRANSFER_FRAG_HEAP_SIZE,
> +		.flags = 0,
> +		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
> +	},
> +};
> +
> +int
> +pvr_static_data_areas_get(const struct pvr_device *pvr_dev,
> +			  struct drm_pvr_ioctl_dev_query_args *args)
> +{
> +	struct drm_pvr_dev_query_static_data_areas query = {0};
> +	int err;
> +
> +	if (!args->pointer) {
> +		args->size = sizeof(struct drm_pvr_dev_query_static_data_areas);
> +		return 0;
> +	}
> +
> +	err = PVR_UOBJ_GET(query, args->size, args->pointer);
> +	if (err < 0)
> +		return err;
> +
> +	if (!query.static_data_areas.array) {
> +		query.static_data_areas.count = ARRAY_SIZE(static_data_areas);
> +		query.static_data_areas.stride = sizeof(struct drm_pvr_static_data_area);
> +		goto copy_out;
> +	}
> +
> +	if (query.static_data_areas.count > ARRAY_SIZE(static_data_areas))
> +		query.static_data_areas.count = ARRAY_SIZE(static_data_areas);
> +
> +	err = PVR_UOBJ_SET_ARRAY(&query.static_data_areas, static_data_areas);
> +	if (err < 0)
> +		return err;
> +
> +copy_out:
> +	err = PVR_UOBJ_SET(args->pointer, args->size, query);
> +	if (err < 0)
> +		return err;
> +
> +	args->size = sizeof(query);
> +	return 0;
> +}
> +
> +int
> +pvr_heap_info_get(const struct pvr_device *pvr_dev,
> +		  struct drm_pvr_ioctl_dev_query_args *args)
> +{
> +	struct drm_pvr_dev_query_heap_info query = {0};
> +	u64 dest;
> +	int err;
> +
> +	if (!args->pointer) {
> +		args->size = sizeof(struct drm_pvr_dev_query_heap_info);
> +		return 0;
> +	}
> +
> +	err = PVR_UOBJ_GET(query, args->size, args->pointer);
> +	if (err < 0)
> +		return err;
> +
> +	if (!query.heaps.array) {
> +		query.heaps.count = ARRAY_SIZE(pvr_heaps);
> +		query.heaps.stride = sizeof(struct drm_pvr_heap);
> +		goto copy_out;
> +	}
> +
> +	if (query.heaps.count > ARRAY_SIZE(pvr_heaps))
> +		query.heaps.count = ARRAY_SIZE(pvr_heaps);
> +
> +	/* Region header heap is only present if BRN63142 is present. */
> +	dest = query.heaps.array;
> +	for (size_t i = 0; i < query.heaps.count; i++) {
> +		struct drm_pvr_heap heap = pvr_heaps[i];
> +
> +		if (i == DRM_PVR_HEAP_RGNHDR && !PVR_HAS_QUIRK(pvr_dev, 63142))
> +			heap.size = 0;
> +
> +		err = PVR_UOBJ_SET(dest, query.heaps.stride, heap);
> +		if (err < 0)
> +			return err;
> +
> +		dest += query.heaps.stride;
> +	}
> +
> +copy_out:
> +	err = PVR_UOBJ_SET(args->pointer, args->size, query);
> +	if (err < 0)
> +		return err;
> +
> +	args->size = sizeof(query);
> +	return 0;
> +}
> +
> +/**
> + * pvr_heap_contains_range() - Determine if a given heap contains the specified
> + *                             device-virtual address range.
> + * @pvr_heap: Target heap.
> + * @start: Inclusive start of the target range.
> + * @end: Inclusive end of the target range.
> + *
> + * It is an error to call this function with values of @start and @end that do
> + * not satisfy the condition @start <= @end.
> + */
> +static __always_inline bool
> +pvr_heap_contains_range(const struct drm_pvr_heap *pvr_heap, u64 start, u64 end)
> +{
> +	return pvr_heap->base <= start && end < pvr_heap->base + pvr_heap->size;
> +}
> +
> +/**
> + * pvr_find_heap_containing() - Find a heap which contains the specified
> + *                              device-virtual address range.
> + * @pvr_dev: Target PowerVR device.
> + * @start: Start of the target range.
> + * @size: Size of the target range.
> + *
> + * Return:
> + *  * A pointer to a constant instance of struct drm_pvr_heap representing the
> + *    heap containing the entire range specified by @start and @size on
> + *    success, or
> + *  * %NULL if no such heap exists.
> + */
> +const struct drm_pvr_heap *
> +pvr_find_heap_containing(struct pvr_device *pvr_dev, u64 start, u64 size)
> +{
> +	u64 end;
> +
> +	if (check_add_overflow(start, size - 1, &end))
> +		return NULL;
> +
> +	/*
> +	 * There are no guarantees about the order of address ranges in
> +	 * &pvr_heaps, so iterate over the entire array for a heap whose
> +	 * range completely encompasses the given range.
> +	 */
> +	for (u32 heap_id = 0; heap_id < ARRAY_SIZE(pvr_heaps); heap_id++) {
> +		/* Filter heaps that present only with an associated quirk */
> +		if (heap_id == DRM_PVR_HEAP_RGNHDR &&
> +		    !PVR_HAS_QUIRK(pvr_dev, 63142)) {
> +			continue;
> +		}
> +
> +		if (pvr_heap_contains_range(&pvr_heaps[heap_id], start, end))
> +			return &pvr_heaps[heap_id];
> +	}
> +
> +	return NULL;
> +}
> +
> +/**
> + * pvr_vm_find_gem_object() - Look up a buffer object from a given
> + *                            device-virtual address.
> + * @vm_ctx: [IN] Target VM context.
> + * @device_addr: [IN] Virtual device address at the start of the required
> + *               object.
> + * @mapped_offset_out: [OUT] Pointer to location to write offset of the start
> + *                     of the mapped region within the buffer object. May be
> + *                     %NULL if this information is not required.
> + * @mapped_size_out: [OUT] Pointer to location to write size of the mapped
> + *                   region. May be %NULL if this information is not required.
> + *
> + * If successful, a reference will be taken on the buffer object. The caller
> + * must drop the reference with pvr_gem_object_put().
> + *
> + * Return:
> + *  * The PowerVR buffer object mapped at @device_addr if one exists, or
> + *  * %NULL otherwise.
> + */
> +struct pvr_gem_object *
> +pvr_vm_find_gem_object(struct pvr_vm_context *vm_ctx, u64 device_addr,
> +		       u64 *mapped_offset_out, u64 *mapped_size_out)
> +{
> +	struct pvr_gem_object *pvr_obj;
> +	struct drm_gpuva *va;
> +
> +	mutex_lock(&vm_ctx->lock);
> +
> +	va = drm_gpuva_find_first(&vm_ctx->gpuva_mgr, device_addr, 1);
> +	if (!va)
> +		goto err_unlock;
> +
> +	pvr_obj = gem_to_pvr_gem(va->gem.obj);
> +	pvr_gem_object_get(pvr_obj);
> +
> +	if (mapped_offset_out)
> +		*mapped_offset_out = va->gem.offset;
> +	if (mapped_size_out)
> +		*mapped_size_out = va->va.range;
> +
> +	mutex_unlock(&vm_ctx->lock);
> +
> +	return pvr_obj;
> +
> +err_unlock:
> +	mutex_unlock(&vm_ctx->lock);
> +
> +	return NULL;
> +}
> +
> +/**
> + * pvr_vm_get_fw_mem_context: Get object representing firmware memory context
> + * @vm_ctx: Target VM context.
> + *
> + * Returns:
> + *  * FW object representing firmware memory context, or
> + *  * %NULL if this VM context does not have a firmware memory context.
> + */
> +struct pvr_fw_object *
> +pvr_vm_get_fw_mem_context(struct pvr_vm_context *vm_ctx)
> +{
> +	return vm_ctx->fw_mem_ctx_obj;
> +}
> diff --git a/drivers/gpu/drm/imagination/pvr_vm.h b/drivers/gpu/drm/imagination/pvr_vm.h
> new file mode 100644
> index 000000000000..b98bc3981807
> --- /dev/null
> +++ b/drivers/gpu/drm/imagination/pvr_vm.h
> @@ -0,0 +1,60 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/* Copyright (c) 2023 Imagination Technologies Ltd. */
> +
> +#ifndef PVR_VM_H
> +#define PVR_VM_H
> +
> +#include "pvr_rogue_mmu_defs.h"
> +
> +#include <uapi/drm/pvr_drm.h>
> +
> +#include <linux/types.h>
> +
> +/* Forward declaration from "pvr_device.h" */
> +struct pvr_device;
> +struct pvr_file;
> +
> +/* Forward declaration from "pvr_gem.h" */
> +struct pvr_gem_object;
> +
> +/* Forward declaration from "pvr_vm.c" */
> +struct pvr_vm_context;
> +
> +/* Forward declaration from <uapi/drm/pvr_drm.h> */
> +struct drm_pvr_ioctl_get_heap_info_args;
> +
> +/* Functions defined in pvr_vm.c */
> +
> +bool pvr_device_addr_is_valid(u64 device_addr);
> +bool pvr_device_addr_and_size_are_valid(u64 device_addr, u64 size);
> +
> +struct pvr_vm_context *pvr_vm_create_context(struct pvr_device *pvr_dev,
> +					     bool is_userspace_context);
> +
> +int pvr_vm_map(struct pvr_vm_context *vm_ctx,
> +	       struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset,
> +	       u64 device_addr, u64 size);
> +int pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size);
> +
> +dma_addr_t pvr_vm_get_page_table_root_addr(struct pvr_vm_context *vm_ctx);
> +
> +int pvr_static_data_areas_get(const struct pvr_device *pvr_dev,
> +			      struct drm_pvr_ioctl_dev_query_args *args);
> +int pvr_heap_info_get(const struct pvr_device *pvr_dev,
> +		      struct drm_pvr_ioctl_dev_query_args *args);
> +const struct drm_pvr_heap *pvr_find_heap_containing(struct pvr_device *pvr_dev,
> +						    u64 addr, u64 size);
> +
> +struct pvr_gem_object *pvr_vm_find_gem_object(struct pvr_vm_context *vm_ctx,
> +					      u64 device_addr,
> +					      u64 *mapped_offset_out,
> +					      u64 *mapped_size_out);
> +
> +struct pvr_fw_object *
> +pvr_vm_get_fw_mem_context(struct pvr_vm_context *vm_ctx);
> +
> +struct pvr_vm_context *pvr_vm_context_lookup(struct pvr_file *pvr_file, u32 handle);
> +bool pvr_vm_context_put(struct pvr_vm_context *vm_ctx);
> +void pvr_destroy_vm_contexts_for_file(struct pvr_file *pvr_file);
> +
> +#endif /* PVR_VM_H */
> -- 
> 2.41.0
>
Jann Horn Aug. 18, 2023, 10:59 p.m. UTC | #4
On Wed, Aug 16, 2023 at 10:25 AM Sarah Walker <sarah.walker@imgtec.com> wrote:
> Add a GEM implementation based on drm_gem_shmem, and support code for the
> PowerVR GPU MMU. The GPU VA manager is used for address space management.
[...]
> +/**
> + * pvr_mmu_flush() - Request flush of all MMU caches.
> + * @pvr_dev: Target PowerVR device.
> + *
> + * This function must be called following any possible change to the MMU page
> + * tables.
> + *
> + * Returns:
> + *  * 0 on success, or
> + *  * Any error encountered while submitting the flush command via the KCCB.
> + */
> +int
> +pvr_mmu_flush(struct pvr_device *pvr_dev)
> +{
> +       /* TODO: implement */
> +       return -ENODEV;
> +}

pvr_mmu_flush() being an operation that can fail looks dodgy to me.
Especially given that a later patch implements pvr_mmu_flush() such
that it looks like it can hit a transient failure on the path:

pvr_mmu_flush
  pvr_kccb_send_cmd
    pvr_kccb_send_cmd_powered
      pvr_kccb_reserve_slot_sync

pvr_kccb_reserve_slot_sync() even looks like it could transiently fail
without even once calling pvr_kccb_try_reserve_slot() if it gets
preempted until RESERVE_SLOT_TIMEOUT time has passed between the first
and the second read of "jiffies". (Which is probably not very likely
to happen by chance, but Android devices are typically configured with
full kernel preemption, so if an attacker causes pvr_mmu_flush() to
run in a process with a SCHED_IDLE scheduling policy and deliberately
preempts the process at the right point, they might be able to achieve
this.) A more robust retry pattern might be to do a fixed number of
retry iterations with sleeps in between instead of retrying until a
fixed amount of time has passed; though I still wouldn't want to rely
on that for making sure that a TLB flush happens.

In my opinion, any error path of pvr_mmu_flush() should guarantee that
by the time it returns, the address space is no longer used by the GPU
and can never be used by the GPU again.

[...]
> +/**
> + * struct pvr_mmu_op_context - context holding data for individual
> + * device-virtual mapping operations. Intended for use with a VM bind operation.
> + */
> +struct pvr_mmu_op_context {
> +       /** @mmu_ctx: The MMU context associated with the owning VM context. */
> +       struct pvr_mmu_context *mmu_ctx;
> +
> +       /** @map: Data specifically for map operations. */
> +       struct {
> +               /**
> +                * @sgt: Scatter gather table containing pages pinned for use by
> +                * this context - these are currently pinned when initialising
> +                * the VM bind operation.
> +                */
> +               struct sg_table *sgt;
> +
> +               /** @sgt_offset: Start address of the device-virtual mapping. */
> +               u64 sgt_offset;
> +       } map;
> +
> +       /**
> +        * @l1_free_tables: Preallocated l1 page table objects for use by this
> +        * context when creating a page mapping. Linked list created during
> +        * initialisation. Also used to collect page table objects freed by an
> +        * unmap.
> +        */
> +       struct pvr_page_table_l1 *l1_free_tables;
> +
> +       /**
> +        * @l0_free_tables: Preallocated l0 page table objects for use by this
> +        * context when creating a page mapping. Linked list created during
> +        * initialisation. Also used to collect page table objects freed by an
> +        * unmap.
> +        */
> +       struct pvr_page_table_l0 *l0_free_tables;

The free page table lists are shared between page table allocation and
freeing within one operation, and they have last-in-first-out (stack)
behavior, which means that when a pvr_vm_map() invocation does
pvr_vm_gpuva_unmap() invocations followed by pvr_vm_gpuva_map()
invocations, it can end up immediately reusing the freed page tables
within the same operation, right?
Since pvr_mmu_flush() only happens in pvr_mmu_op_context_destroy() at
the end of pvr_vm_map(), that means a concurrent GPU page table walk
could walk down to the old address where a page table used to be
mapped, and then observe the page table entries that were created at
the new address to which the page table was moved? Like:


GPU         AP
===         ==
load L2 PTE for VA A
load L1 PTE for VA A, it contains a reference to L0 table T1
            pvr_page_table_l1_remove() removes L0 table T1 for VA A
            pvr_page_table_l1_remove() removes L0 table T2 for VA B
            pvr_page_table_l1_insert() inserts L0 table T2 at VA A
            pvr_page_table_l1_insert() inserts L0 table T1 at VA B
            pvr_page_table_l0_insert() inserts PTE into T1 for VA B
load L0 PTE for VA A from L0 table T1


And since page tables also don't seem to be cache-flushed when they
are put on these freelists, this could maybe also happen the other way
around: The GPU walks down into a page table that was moved to a new
address, but observes PTEs from the old address at which the page
table was previously mapped?

> +
> +       /**
> +        * @curr_page - A reference to a single physical page as indexed by
> +        * the page table structure.
> +        */
> +       struct pvr_page_table_ptr curr_page;
> +
> +       /**
> +        * @sync_level_required: The maximum level of the page table tree
> +        * structure which has (possibly) been modified since it was last
> +        * flushed to the device.
> +        *
> +        * This field should only be set with pvr_mmu_op_context_require_sync()
> +        * or indirectly by pvr_mmu_op_context_sync_partial().
> +        */
> +       enum pvr_mmu_sync_level sync_level_required;
> +};
[...]
> +/**
> + * pvr_mmu_unmap() - Unmap pages from a memory context.
> + * @op_ctx: Target MMU op context.
> + * @device_addr: First device-virtual address to unmap.
> + * @size: Size in bytes to unmap.
> + *
> + * The total amount of device-virtual memory unmapped is
> + * @nr_pages * %PVR_DEVICE_PAGE_SIZE.
> + *
> + * Returns:
> + *  * 0 on success, or
> + *  * Any error code returned by pvr_page_table_ptr_init(), or
> + *  * Any error code returned by pvr_page_table_ptr_unmap().
> + */
> +int pvr_mmu_unmap(struct pvr_mmu_op_context *op_ctx, u64 device_addr, u64 size)
> +{
> +       int err = pvr_mmu_op_context_set_curr_page(op_ctx, device_addr, false);
> +
> +       if (err)
> +               return err;
> +
> +       return pvr_mmu_op_context_unmap_curr_page(op_ctx,
> +                                                 size >> PVR_DEVICE_PAGE_SHIFT);
> +}

I think we can get here in the middle of this call path:

  pvr_ioctl_vm_unmap
    pvr_vm_unmap
      pvr_mmu_op_context_create
      mutex_lock(&vm_ctx->lock);
      drm_gpuva_sm_unmap
        __drm_gpuva_sm_unmap
          loop:
            op_unmap_cb [conditional]
              pvr_vm_gpuva_unmap
                pvr_mmu_unmap [WE ARE HERE]
                  pvr_mmu_op_context_set_curr_page
                    pvr_mmu_op_context_sync [CACHE FLUSH]
                    pvr_mmu_op_context_load_tables
                  pvr_mmu_op_context_unmap_curr_page
                    pvr_page_destroy [conditional]
                    loop:
                      pvr_mmu_op_context_next_page
                        pvr_mmu_op_context_sync_partial [CACHE FLUSH]
                        pvr_mmu_op_context_load_tables
                      pvr_page_destroy
                        pvr_page_table_l0_remove [REMOVES PTE]
                          pvr_page_table_l1_remove [conditional]
                        pvr_mmu_op_context_require_sync
                drm_gpuva_unmap
                drm_gpuva_unlink
                kfree(op->unmap.va)
                pvr_gem_object_put [FREES PAGES]
      mutex_unlock(&vm_ctx->lock);
      pvr_mmu_op_context_destroy
        pvr_mmu_op_context_sync [CACHE FLUSH]
        pvr_mmu_flush [FLUSHES MMU]
        loop:
          pvr_page_table_l0_free
        loop:
          pvr_page_table_l1_free

From what I can tell, we can get from `pvr_page_table_l0_remove`
(where the GPU PTE is cleared) to `pvr_gem_object_put` (where the page
referenced by the PTE can be freed) without going through a page table
cache flush or a MMU flush; I think we need both (unless the
pvr_gem_object_put() is somehow deferred until
pvr_mmu_op_context_destroy() is reached).
Donald Robson Aug. 21, 2023, 8:30 a.m. UTC | #5
Hi Danilo,
Thanks for the feedback.  On the subject of locking, I have dma_resv locking
in another branch where I'm trying to enable bind queues, but I didn't think
I needed locking for the single, synchronous operations seen here.  Would a 
mutex on the gem object wrapper suffice?
Thanks,
Donald
On Fri, 2023-08-18 at 17:30 +0200, Danilo Krummrich wrote:
> 
> Hi Sarah,
> 
> On Wed, Aug 16, 2023 at 09:25:23AM +0100, Sarah Walker wrote:
> > Add a GEM implementation based on drm_gem_shmem, and support code for the
> > PowerVR GPU MMU. The GPU VA manager is used for address space management.
> > 
> > Changes since v4:
> > - Correct sync function in vmap/vunmap function documentation
> > - Update for upstream GPU VA manager
> > - Fix missing frees when unmapping drm_gpuva objects
> > - Always zero GEM BOs on creation
> > 
> > Changes since v3:
> > - Split MMU and VM code
> > - Register page table allocations with kmemleak
> > - Use drm_dev_{enter,exit}
> > 
> > Changes since v2:
> > - Use GPU VA manager
> > - Use drm_gem_shmem
> > 
> > Co-developed-by: Matt Coster <matt.coster@imgtec.com>
> > Signed-off-by: Matt Coster <matt.coster@imgtec.com>
> > Co-developed-by: Donald Robson <donald.robson@imgtec.com>
> > Signed-off-by: Donald Robson <donald.robson@imgtec.com>
> > Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
> > ---
> >  drivers/gpu/drm/imagination/Makefile     |    5 +-
> >  drivers/gpu/drm/imagination/pvr_device.c |   23 +-
> >  drivers/gpu/drm/imagination/pvr_device.h |   18 +
> >  drivers/gpu/drm/imagination/pvr_drv.c    |  302 ++-
> >  drivers/gpu/drm/imagination/pvr_gem.c    |  396 ++++
> >  drivers/gpu/drm/imagination/pvr_gem.h    |  177 ++
> >  drivers/gpu/drm/imagination/pvr_mmu.c    | 2487 ++++++++++++++++++++++
> >  drivers/gpu/drm/imagination/pvr_mmu.h    |  108 +
> >  drivers/gpu/drm/imagination/pvr_vm.c     |  890 ++++++++
> >  drivers/gpu/drm/imagination/pvr_vm.h     |   60 +
> >  10 files changed, 4455 insertions(+), 11 deletions(-)
> >  create mode 100644 drivers/gpu/drm/imagination/pvr_gem.c
> >  create mode 100644 drivers/gpu/drm/imagination/pvr_gem.h
> >  create mode 100644 drivers/gpu/drm/imagination/pvr_mmu.c
> >  create mode 100644 drivers/gpu/drm/imagination/pvr_mmu.h
> >  create mode 100644 drivers/gpu/drm/imagination/pvr_vm.c
> >  create mode 100644 drivers/gpu/drm/imagination/pvr_vm.h
> 
> <snip>
> 
> > diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c
> > new file mode 100644
> > index 000000000000..616fad3a3325
> > --- /dev/null
> > +++ b/drivers/gpu/drm/imagination/pvr_vm.c
> > @@ -0,0 +1,890 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright (c) 2023 Imagination Technologies Ltd. */
> > +
> > +#include "pvr_vm.h"
> > +
> > +#include "pvr_device.h"
> > +#include "pvr_drv.h"
> > +#include "pvr_gem.h"
> > +#include "pvr_mmu.h"
> > +#include "pvr_rogue_fwif.h"
> > +#include "pvr_rogue_heap_config.h"
> > +
> > +#include <drm/drm_gem.h>
> > +#include <drm/drm_gpuva_mgr.h>
> > +
> > +#include <linux/container_of.h>
> > +#include <linux/err.h>
> > +#include <linux/errno.h>
> > +#include <linux/gfp_types.h>
> > +#include <linux/kref.h>
> > +#include <linux/mutex.h>
> > +#include <linux/stddef.h>
> > +
> > +/**
> > + * DOC: Memory context
> > + *
> > + * This is the "top level" datatype in the VM code. It's exposed in the public
> > + * API as an opaque handle.
> > + */
> > +
> > +/**
> > + * struct pvr_vm_context - Context type which encapsulates an entire page table
> > + * tree structure.
> > + * @pvr_dev: The PowerVR device to which this context is bound.
> > + *
> > + * This binding is immutable for the life of the context.
> > + * @mmu_ctx: The context for binding to physical memory.
> > + * @gpuva_mgr: GPUVA manager object associated with this context.
> > + * @lock: Global lock on this entire structure of page tables.
> > + * @fw_mem_ctx_obj: Firmware object representing firmware memory context.
> > + * @ref_count: Reference count of object.
> > + */
> > +struct pvr_vm_context {
> > +	struct pvr_device *pvr_dev;
> > +	struct pvr_mmu_context *mmu_ctx;
> > +	struct drm_gpuva_manager gpuva_mgr;
> > +	struct mutex lock;
> > +	struct pvr_fw_object *fw_mem_ctx_obj;
> > +	struct kref ref_count;
> > +};
> > +
> > +/**
> > + * pvr_vm_get_page_table_root_addr() - Get the DMA address of the root of the
> > + *                                     page table structure behind a VM context.
> > + * @vm_ctx: Target VM context.
> > + */
> > +dma_addr_t pvr_vm_get_page_table_root_addr(struct pvr_vm_context *vm_ctx)
> > +{
> > +	return pvr_mmu_get_root_table_dma_addr(vm_ctx->mmu_ctx);
> > +}
> > +
> > +/**
> > + * DOC: Memory mappings
> > + */
> > +
> > +/**
> > + * pvr_vm_gpuva_mapping_init() - Setup a mapping object with the specified
> > + * parameters ready for mapping using pvr_vm_gpuva_mapping_map().
> > + * @va: Pointer to drm_gpuva mapping object.
> > + * @device_addr: Device-virtual address at the start of the mapping.
> > + * @size: Size of the desired mapping.
> > + * @pvr_obj: Target PowerVR memory object.
> > + * @pvr_obj_offset: Offset into @pvr_obj to begin mapping from.
> > + *
> > + * Some parameters of this function are unchecked. It is therefore the callers
> > + * responsibility to ensure certain constraints are met. Specifically:
> > + *
> > + * * @pvr_obj_offset must be less than the size of @pvr_obj,
> > + * * The sum of @pvr_obj_offset and @size must be less than or equal to the
> > + *   size of @pvr_obj,
> > + * * The range specified by @pvr_obj_offset and @size (the "CPU range") must be
> > + *   CPU page-aligned both in start position and size, and
> > + * * The range specified by @device_addr and @size (the "device range") must be
> > + *   device page-aligned both in start position and size.
> > + *
> > + * Furthermore, it is up to the caller to make sure that a reference to @pvr_obj
> > + * is taken prior to mapping @va with the drm_gpuva_manager.
> > + */
> > +static void
> > +pvr_vm_gpuva_mapping_init(struct drm_gpuva *va, u64 device_addr, u64 size,
> > +			  struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset)
> 
> There's already drm_gpuva_init() doing the same thing.
> 
> > +{
> > +	va->va.addr = device_addr;
> > +	va->va.range = size;
> > +	va->gem.obj = gem_from_pvr_gem(pvr_obj);
> > +	va->gem.offset = pvr_obj_offset;
> > +}
> > +
> > +struct pvr_vm_gpuva_op_ctx {
> > +	struct pvr_vm_context *vm_ctx;
> > +	struct pvr_mmu_op_context *mmu_op_ctx;
> > +	struct drm_gpuva *new_va, *prev_va, *next_va;
> > +};
> > +
> > +/**
> > + * pvr_vm_gpuva_map() - Insert a mapping into a memory context.
> > + * @op: gpuva op containing the remap details.
> > + * @op_ctx: Operation context.
> > + *
> > + * Context: Called by drm_gpuva_sm_map following a successful mapping while
> > + * @op_ctx.vm_ctx mutex is held.
> > + *
> > + * Return:
> > + *  * 0 on success, or
> > + *  * Any error returned by pvr_mmu_map().
> > + */
> > +static int
> > +pvr_vm_gpuva_map(struct drm_gpuva_op *op, void *op_ctx)
> > +{
> > +	struct pvr_gem_object *pvr_gem = gem_to_pvr_gem(op->map.gem.obj);
> > +	struct pvr_vm_gpuva_op_ctx *ctx = op_ctx;
> > +	int err;
> > +
> > +	if ((op->map.gem.offset | op->map.va.range) & ~PVR_DEVICE_PAGE_MASK)
> > +		return -EINVAL;
> > +
> > +	err = pvr_mmu_map(ctx->mmu_op_ctx, op->map.va.range, pvr_gem->flags,
> > +			  op->map.va.addr);
> > +	if (err)
> > +		return err;
> > +
> > +	pvr_vm_gpuva_mapping_init(ctx->new_va, op->map.va.addr,
> > +				  op->map.va.range, pvr_gem, op->map.gem.offset);
> > +
> > +	drm_gpuva_map(&ctx->vm_ctx->gpuva_mgr, ctx->new_va, &op->map);
> 
> drm_gpuva_map() does use drm_gpuva_init_from_op() internally, hence the extra
> call to pvr_vm_gpuva_mapping_init() should be unnecessary.
> 
> > +	drm_gpuva_link(ctx->new_va);
> 
> How is this protected?
> 
> drm_gpuva_link() and drm_gpuva_unlink() require either the dma_resv lock of the
> corresponding GEM object being held or, alternatively, the driver specific lock
> indicated via drm_gem_gpuva_set_lock().
> 
> > +	ctx->new_va = NULL;
> > +
> > +	/*
> > +	 * Increment the refcount on the underlying physical memory resource
> > +	 * to prevent de-allocation while the mapping exists.
> > +	 */
> > +	pvr_gem_object_get(pvr_gem);
> > +
> > +	return 0;
> > +}
> > +
> > +/**
> > + * pvr_vm_gpuva_unmap() - Remove a mapping from a memory context.
> > + * @op: gpuva op containing the unmap details.
> > + * @op_ctx: Operation context.
> > + *
> > + * Context: Called by drm_gpuva_sm_unmap following a successful unmapping while
> > + * @op_ctx.vm_ctx mutex is held.
> > + *
> > + * Return:
> > + *  * 0 on success, or
> > + *  * Any error returned by pvr_mmu_unmap().
> > + */
> > +static int
> > +pvr_vm_gpuva_unmap(struct drm_gpuva_op *op, void *op_ctx)
> > +{
> > +	struct pvr_gem_object *pvr_gem = gem_to_pvr_gem(op->unmap.va->gem.obj);
> > +	struct pvr_vm_gpuva_op_ctx *ctx = op_ctx;
> > +
> > +	int err = pvr_mmu_unmap(ctx->mmu_op_ctx, op->unmap.va->va.addr,
> > +				op->unmap.va->va.range);
> > +
> > +	if (err)
> > +		return err;
> > +
> > +	drm_gpuva_unmap(&op->unmap);
> > +	drm_gpuva_unlink(op->unmap.va);
> > +	kfree(op->unmap.va);
> > +
> > +	pvr_gem_object_put(pvr_gem);
> > +
> > +	return 0;
> > +}
> > +
> > +/**
> > + * pvr_vm_gpuva_remap() - Remap a mapping within a memory context.
> > + * @op: gpuva op containing the remap details.
> > + * @op_ctx: Operation context.
> > + *
> > + * Context: Called by either drm_gpuva_sm_map or drm_gpuva_sm_unmap when a
> > + * mapping or unmapping operation causes a region to be split. The
> > + * @op_ctx.vm_ctx mutex is held.
> > + *
> > + * Return:
> > + *  * 0 on success, or
> > + *  * Any error returned by pvr_vm_gpuva_unmap() or pvr_vm_gpuva_unmap().
> > + */
> > +static int
> > +pvr_vm_gpuva_remap(struct drm_gpuva_op *op, void *op_ctx)
> > +{
> > +	struct pvr_vm_gpuva_op_ctx *ctx = op_ctx;
> > +
> > +	if (op->remap.unmap) {
> 
> You can omit this check, remap operations always contain a valid unmap
> operation. However, you might want to know whether the remap operation was
> generated due to a call to drm_gpuva_sm_map() or drm_gpuva_sm_unmap(), since for
> the latter you might want to free page table structures.
> 
> > +		const u64 va_start = op->remap.prev ?
> > +				     op->remap.prev->va.addr + op->remap.prev->va.range :
> > +				     op->remap.unmap->va->va.addr;
> > +		const u64 va_end = op->remap.next ?
> > +				   op->remap.next->va.addr :
> > +				   op->remap.unmap->va->va.addr + op->remap.unmap->va->va.range;
> 
> This seems to be a common calculation for drivers, it is probably worth to come
> up with a helper, something like
> drm_gpuva_op_unmap_range(struct drm_gpuva_op *op, u64 *addr, u64 *range).
> 
> > +
> > +		int err = pvr_mmu_unmap(ctx->mmu_op_ctx, va_start,
> > +					va_end - va_start);
> > +
> > +		if (err)
> > +			return err;
> > +	}
> > +
> > +	if (op->remap.prev)
> > +		pvr_vm_gpuva_mapping_init(ctx->prev_va, op->remap.prev->va.addr,
> > +					  op->remap.prev->va.range,
> > +					  gem_to_pvr_gem(op->remap.prev->gem.obj),
> > +					  op->remap.prev->gem.offset);
> > +
> > +	if (op->remap.next)
> > +		pvr_vm_gpuva_mapping_init(ctx->next_va, op->remap.next->va.addr,
> > +					  op->remap.next->va.range,
> > +					  gem_to_pvr_gem(op->remap.next->gem.obj),
> > +					  op->remap.next->gem.offset);
> > +
> > +	/* No actual remap required: the page table tree depth is fixed to 3,
> > +	 * and we use 4k page table entries only for now.
> > +	 */
> > +	drm_gpuva_remap(ctx->prev_va, ctx->next_va, &op->remap);
> 
> As above, drm_gpuva_remap() does use drm_gpuva_init_from_op() internally, hence
> the extra call to pvr_vm_gpuva_mapping_init() should be unnecessary.
> 
> > +
> > +	if (op->remap.prev) {
> > +		pvr_gem_object_get(gem_to_pvr_gem(ctx->prev_va->gem.obj));
> > +		drm_gpuva_link(ctx->prev_va);
> > +		ctx->prev_va = NULL;
> > +	}
> > +
> > +	if (op->remap.next) {
> > +		pvr_gem_object_get(gem_to_pvr_gem(ctx->next_va->gem.obj));
> > +		drm_gpuva_link(ctx->next_va);
> > +		ctx->next_va = NULL;
> > +	}
> > +
> > +	if (op->remap.unmap) {
> 
> As above, no need for this check.
> 
> - Danilo
> 
> > +		struct pvr_gem_object *pvr_gem = gem_to_pvr_gem(op->remap.unmap->va->gem.obj);
> > +
> > +		drm_gpuva_unlink(op->unmap.va);
> > +		kfree(op->unmap.va);
> > +
> > +		pvr_gem_object_put(pvr_gem);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +/*
> > + * Public API
> > + *
> > + * For an overview of these functions, see *DOC: Public API* in "pvr_vm.h".
> > + */
> > +
> > +/**
> > + * pvr_device_addr_is_valid() - Tests whether a device-virtual address
> > + *                              is valid.
> > + * @device_addr: Virtual device address to test.
> > + *
> > + * Return:
> > + *  * %true if @device_addr is within the valid range for a device page
> > + *    table and is aligned to the device page size, or
> > + *  * %false otherwise.
> > + */
> > +bool
> > +pvr_device_addr_is_valid(u64 device_addr)
> > +{
> > +	return (device_addr & ~PVR_PAGE_TABLE_ADDR_MASK) == 0 &&
> > +	       (device_addr & ~PVR_DEVICE_PAGE_MASK) == 0;
> > +}
> > +
> > +/**
> > + * pvr_device_addr_and_size_are_valid() - Tests whether a device-virtual
> > + * address and associated size are both valid.
> > + * @device_addr: Virtual device address to test.
> > + * @size: Size of the range based at @device_addr to test.
> > + *
> > + * Calling pvr_device_addr_is_valid() twice (once on @size, and again on
> > + * @device_addr + @size) to verify a device-virtual address range initially
> > + * seems intuitive, but it produces a false-negative when the address range
> > + * is right at the end of device-virtual address space.
> > + *
> > + * This function catches that corner case, as well as checking that
> > + * @size is non-zero.
> > + *
> > + * Return:
> > + *  * %true if @device_addr is device page aligned; @size is device page
> > + *    aligned; the range specified by @device_addr and @size is within the
> > + *    bounds of the device-virtual address space, and @size is non-zero, or
> > + *  * %false otherwise.
> > + */
> > +bool
> > +pvr_device_addr_and_size_are_valid(u64 device_addr, u64 size)
> > +{
> > +	return pvr_device_addr_is_valid(device_addr) &&
> > +	       size != 0 && (size & ~PVR_DEVICE_PAGE_MASK) == 0 &&
> > +	       (device_addr + size <= PVR_PAGE_TABLE_ADDR_SPACE_SIZE);
> > +}
> > +
> > +static const struct drm_gpuva_fn_ops pvr_vm_gpuva_ops = {
> > +	.sm_step_map = pvr_vm_gpuva_map,
> > +	.sm_step_remap = pvr_vm_gpuva_remap,
> > +	.sm_step_unmap = pvr_vm_gpuva_unmap,
> > +};
> > +
> > +/**
> > + * pvr_vm_create_context() - Create a new VM context.
> > + * @pvr_dev: Target PowerVR device.
> > + * @is_userspace_context: %true if this context is for userspace. This will
> > + *                        create a firmware memory context for the VM context
> > + *                        and disable warnings when tearing down mappings.
> > + *
> > + * Return:
> > + *  * A handle to the newly-minted VM context on success,
> > + *  * -%EINVAL if the feature "virtual address space bits" on @pvr_dev is
> > + *    missing or has an unsupported value,
> > + *  * -%ENOMEM if allocation of the structure behind the opaque handle fails,
> > + *    or
> > + *  * Any error encountered while setting up internal structures.
> > + */
> > +struct pvr_vm_context *
> > +pvr_vm_create_context(struct pvr_device *pvr_dev, bool is_userspace_context)
> > +{
> > +	struct drm_device *drm_dev = from_pvr_device(pvr_dev);
> > +
> > +	struct pvr_vm_context *vm_ctx;
> > +	u16 device_addr_bits;
> > +
> > +	int err;
> > +
> > +	err = PVR_FEATURE_VALUE(pvr_dev, virtual_address_space_bits,
> > +				&device_addr_bits);
> > +	if (err) {
> > +		drm_err(drm_dev,
> > +			"Failed to get device virtual address space bits\n");
> > +		return ERR_PTR(err);
> > +	}
> > +
> > +	if (device_addr_bits != PVR_PAGE_TABLE_ADDR_BITS) {
> > +		drm_err(drm_dev,
> > +			"Device has unsupported virtual address space size\n");
> > +		return ERR_PTR(-EINVAL);
> > +	}
> > +
> > +	vm_ctx = kzalloc(sizeof(*vm_ctx), GFP_KERNEL);
> > +	if (!vm_ctx)
> > +		return ERR_PTR(-ENOMEM);
> > +
> > +	vm_ctx->pvr_dev = pvr_dev;
> > +	kref_init(&vm_ctx->ref_count);
> > +	mutex_init(&vm_ctx->lock);
> > +
> > +	drm_gpuva_manager_init(&vm_ctx->gpuva_mgr,
> > +			       is_userspace_context ? "PowerVR-user-VM" : "PowerVR-FW-VM",
> > +			       0, 1ULL << device_addr_bits, 0, 0, &pvr_vm_gpuva_ops);
> > +
> > +	vm_ctx->mmu_ctx = pvr_mmu_context_create(pvr_dev);
> > +	err = PTR_ERR_OR_ZERO(&vm_ctx->mmu_ctx);
> > +	if (err) {
> > +		vm_ctx->mmu_ctx = NULL;
> > +		goto err_put_ctx;
> > +	}
> > +
> > +	if (is_userspace_context) {
> > +		/* TODO: Create FW mem context */
> > +		err = -ENODEV;
> > +		goto err_put_ctx;
> > +	}
> > +
> > +	return vm_ctx;
> > +
> > +err_put_ctx:
> > +	pvr_vm_context_put(vm_ctx);
> > +
> > +	return ERR_PTR(err);
> > +}
> > +
> > +/**
> > + * pvr_vm_context_release() - Teardown a VM context.
> > + * @ref_count: Pointer to reference counter of the VM context.
> > + *
> > + * This function ensures that no mappings are left dangling by unmapping them
> > + * all in order of ascending device-virtual address.
> > + */
> > +static void
> > +pvr_vm_context_release(struct kref *ref_count)
> > +{
> > +	struct pvr_vm_context *vm_ctx =
> > +		container_of(ref_count, struct pvr_vm_context, ref_count);
> > +
> > +	/* TODO: Destroy FW mem context */
> > +	WARN_ON(vm_ctx->fw_mem_ctx_obj);
> > +
> > +	WARN_ON(pvr_vm_unmap(vm_ctx, vm_ctx->gpuva_mgr.mm_start,
> > +			     vm_ctx->gpuva_mgr.mm_range));
> > +
> > +	drm_gpuva_manager_destroy(&vm_ctx->gpuva_mgr);
> > +	pvr_mmu_context_destroy(vm_ctx->mmu_ctx);
> > +	mutex_destroy(&vm_ctx->lock);
> > +
> > +	kfree(vm_ctx);
> > +}
> > +
> > +/**
> > + * pvr_vm_context_lookup() - Look up VM context from handle
> > + * @pvr_file: Pointer to pvr_file structure.
> > + * @handle: Object handle.
> > + *
> > + * Takes reference on VM context object. Call pvr_vm_context_put() to release.
> > + *
> > + * Returns:
> > + *  * The requested object on success, or
> > + *  * %NULL on failure (object does not exist in list, or is not a VM context)
> > + */
> > +struct pvr_vm_context *
> > +pvr_vm_context_lookup(struct pvr_file *pvr_file, u32 handle)
> > +{
> > +	struct pvr_vm_context *vm_ctx;
> > +
> > +	xa_lock(&pvr_file->vm_ctx_handles);
> > +	vm_ctx = xa_load(&pvr_file->vm_ctx_handles, handle);
> > +	if (vm_ctx)
> > +		kref_get(&vm_ctx->ref_count);
> > +
> > +	xa_unlock(&pvr_file->vm_ctx_handles);
> > +
> > +	return vm_ctx;
> > +}
> > +
> > +/**
> > + * pvr_vm_context_put() - Release a reference on a VM context
> > + * @vm_ctx: Target VM context.
> > + *
> > + * Returns:
> > + *  * %true if the VM context was destroyed, or
> > + *  * %false if there are any references still remaining.
> > + */
> > +bool
> > +pvr_vm_context_put(struct pvr_vm_context *vm_ctx)
> > +{
> > +	WARN_ON(!vm_ctx);
> > +
> > +	if (vm_ctx)
> > +		return kref_put(&vm_ctx->ref_count, pvr_vm_context_release);
> > +
> > +	return true;
> > +}
> > +
> > +/**
> > + * pvr_destroy_vm_contexts_for_file: Destroy any VM contexts associated with the
> > + * given file.
> > + * @pvr_file: Pointer to pvr_file structure.
> > + *
> > + * Removes all vm_contexts associated with @pvr_file from the device VM context
> > + * list and drops initial references. vm_contexts will then be destroyed once
> > + * all outstanding references are dropped.
> > + */
> > +void pvr_destroy_vm_contexts_for_file(struct pvr_file *pvr_file)
> > +{
> > +	struct pvr_vm_context *vm_ctx;
> > +	unsigned long handle;
> > +
> > +	xa_for_each(&pvr_file->vm_ctx_handles, handle, vm_ctx) {
> > +		/* vm_ctx is not used here because that would create a race with xa_erase */
> > +		pvr_vm_context_put(xa_erase(&pvr_file->vm_ctx_handles, handle));
> > +	}
> > +}
> > +
> > +/**
> > + * pvr_vm_map() - Map a section of physical memory into a section of device-virtual memory.
> > + * @vm_ctx: Target VM context.
> > + * @pvr_obj: Target PowerVR memory object.
> > + * @pvr_obj_offset: Offset into @pvr_obj to map from.
> > + * @device_addr: Virtual device address at the start of the requested mapping.
> > + * @size: Size of the requested mapping.
> > + *
> > + * No handle is returned to represent the mapping. Instead, callers should
> > + * remember @device_addr and use that as a handle.
> > + *
> > + * Return:
> > + *  * 0 on success,
> > + *  * -%EINVAL if @device_addr is not a valid page-aligned device-virtual
> > + *    address; the region specified by @pvr_obj_offset and @size does not fall
> > + *    entirely within @pvr_obj, or any part of the specified region of @pvr_obj
> > + *    is not device-virtual page-aligned,
> > + *  * Any error encountered while performing internal operations required to
> > + *    destroy the mapping (returned from pvr_vm_gpuva_map or
> > + *    pvr_vm_gpuva_remap).
> > + */
> > +int
> > +pvr_vm_map(struct pvr_vm_context *vm_ctx,
> > +	   struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset,
> > +	   u64 device_addr, u64 size)
> > +{
> > +	const size_t pvr_obj_size = pvr_gem_object_size(pvr_obj);
> > +	struct pvr_vm_gpuva_op_ctx op_ctx = { .vm_ctx = vm_ctx };
> > +	struct sg_table *sgt;
> > +	int err;
> > +
> > +	if (!pvr_device_addr_and_size_are_valid(device_addr, size) ||
> > +	    pvr_obj_offset & ~PAGE_MASK || size & ~PAGE_MASK ||
> > +	    pvr_obj_offset + size > pvr_obj_size ||
> > +	    pvr_obj_offset > pvr_obj_size) {
> > +		return -EINVAL;
> > +	}
> > +
> > +	op_ctx.new_va = kzalloc(sizeof(*op_ctx.new_va), GFP_KERNEL);
> > +	op_ctx.prev_va = kzalloc(sizeof(*op_ctx.prev_va), GFP_KERNEL);
> > +	op_ctx.next_va = kzalloc(sizeof(*op_ctx.next_va), GFP_KERNEL);
> > +	if (!op_ctx.new_va || !op_ctx.prev_va || !op_ctx.next_va) {
> > +		err = -ENOMEM;
> > +		goto out_free;
> > +	}
> > +
> > +	sgt = pvr_gem_object_get_pages_sgt(pvr_obj);
> > +	err = PTR_ERR_OR_ZERO(sgt);
> > +	if (err)
> > +		goto out_free;
> > +
> > +	op_ctx.mmu_op_ctx = pvr_mmu_op_context_create(vm_ctx->mmu_ctx, sgt,
> > +						      pvr_obj_offset, size);
> > +	err = PTR_ERR_OR_ZERO(op_ctx.mmu_op_ctx);
> > +	if (err) {
> > +		op_ctx.mmu_op_ctx = NULL;
> > +		goto out_mmu_op_ctx_destroy;
> > +	}
> > +
> > +	mutex_lock(&vm_ctx->lock);
> > +	err = drm_gpuva_sm_map(&vm_ctx->gpuva_mgr, &op_ctx, device_addr, size,
> > +			       gem_from_pvr_gem(pvr_obj), pvr_obj_offset);
> > +	mutex_unlock(&vm_ctx->lock);
> > +
> > +out_mmu_op_ctx_destroy:
> > +	pvr_mmu_op_context_destroy(op_ctx.mmu_op_ctx);
> > +
> > +out_free:
> > +	kfree(op_ctx.next_va);
> > +	kfree(op_ctx.prev_va);
> > +	kfree(op_ctx.new_va);
> > +
> > +	return err;
> > +}
> > +
> > +/**
> > + * pvr_vm_unmap() - Unmap an already mapped section of device-virtual memory.
> > + * @vm_ctx: Target VM context.
> > + * @device_addr: Virtual device address at the start of the target mapping.
> > + * @size: Size of the target mapping.
> > + *
> > + * Return:
> > + *  * 0 on success,
> > + *  * -%EINVAL if @device_addr is not a valid page-aligned device-virtual
> > + *    address,
> > + *  * Any error encountered while performing internal operations required to
> > + *    destroy the mapping (returned from pvr_vm_gpuva_unmap or
> > + *    pvr_vm_gpuva_remap).
> > + */
> > +int
> > +pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size)
> > +{
> > +	struct pvr_vm_gpuva_op_ctx op_ctx = { .vm_ctx = vm_ctx };
> > +	int err;
> > +
> > +	if (!pvr_device_addr_and_size_are_valid(device_addr, size))
> > +		return -EINVAL;
> > +
> > +	op_ctx.prev_va = kzalloc(sizeof(*op_ctx.prev_va), GFP_KERNEL);
> > +	op_ctx.next_va = kzalloc(sizeof(*op_ctx.next_va), GFP_KERNEL);
> > +	if (!op_ctx.prev_va || !op_ctx.next_va) {
> > +		err = -ENOMEM;
> > +		goto out;
> > +	}
> > +
> > +	op_ctx.mmu_op_ctx =
> > +		pvr_mmu_op_context_create(vm_ctx->mmu_ctx, NULL, 0, 0);
> > +	err = PTR_ERR_OR_ZERO(op_ctx.mmu_op_ctx);
> > +	if (err) {
> > +		op_ctx.mmu_op_ctx = NULL;
> > +		goto out;
> > +	}
> > +
> > +	mutex_lock(&vm_ctx->lock);
> > +	err = drm_gpuva_sm_unmap(&vm_ctx->gpuva_mgr, &op_ctx, device_addr, size);
> > +	mutex_unlock(&vm_ctx->lock);
> > +
> > +out:
> > +	pvr_mmu_op_context_destroy(op_ctx.mmu_op_ctx);
> > +	kfree(op_ctx.next_va);
> > +	kfree(op_ctx.prev_va);
> > +
> > +	return err;
> > +}
> > +
> > +/*
> > + * Static data areas are determined by firmware.
> > + *
> > + * When adding a new static data area you will also need to update the reserved_size field for the
> > + * heap in pvr_heaps[].
> > + */
> > +static const struct drm_pvr_static_data_area static_data_areas[] = {
> > +	{
> > +		.area_usage = DRM_PVR_STATIC_DATA_AREA_FENCE,
> > +		.location_heap_id = DRM_PVR_HEAP_GENERAL,
> > +		.offset = 0,
> > +		.size = 128,
> > +	},
> > +	{
> > +		.area_usage = DRM_PVR_STATIC_DATA_AREA_YUV_CSC,
> > +		.location_heap_id = DRM_PVR_HEAP_GENERAL,
> > +		.offset = 128,
> > +		.size = 1024,
> > +	},
> > +	{
> > +		.area_usage = DRM_PVR_STATIC_DATA_AREA_VDM_SYNC,
> > +		.location_heap_id = DRM_PVR_HEAP_PDS_CODE_DATA,
> > +		.offset = 0,
> > +		.size = 128,
> > +	},
> > +	{
> > +		.area_usage = DRM_PVR_STATIC_DATA_AREA_EOT,
> > +		.location_heap_id = DRM_PVR_HEAP_PDS_CODE_DATA,
> > +		.offset = 128,
> > +		.size = 128,
> > +	},
> > +	{
> > +		.area_usage = DRM_PVR_STATIC_DATA_AREA_VDM_SYNC,
> > +		.location_heap_id = DRM_PVR_HEAP_USC_CODE,
> > +		.offset = 0,
> > +		.size = 128,
> > +	},
> > +};
> > +
> > +#define GET_RESERVED_SIZE(last_offset, last_size) round_up((last_offset) + (last_size), PAGE_SIZE)
> > +
> > +/*
> > + * The values given to GET_RESERVED_SIZE() are taken from the last entry in the corresponding
> > + * static data area for each heap.
> > + */
> > +static const struct drm_pvr_heap pvr_heaps[] = {
> > +	[DRM_PVR_HEAP_GENERAL] = {
> > +		.base = ROGUE_GENERAL_HEAP_BASE,
> > +		.size = ROGUE_GENERAL_HEAP_SIZE,
> > +		.flags = 0,
> > +		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
> > +	},
> > +	[DRM_PVR_HEAP_PDS_CODE_DATA] = {
> > +		.base = ROGUE_PDSCODEDATA_HEAP_BASE,
> > +		.size = ROGUE_PDSCODEDATA_HEAP_SIZE,
> > +		.flags = 0,
> > +		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
> > +	},
> > +	[DRM_PVR_HEAP_USC_CODE] = {
> > +		.base = ROGUE_USCCODE_HEAP_BASE,
> > +		.size = ROGUE_USCCODE_HEAP_SIZE,
> > +		.flags = 0,
> > +		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
> > +	},
> > +	[DRM_PVR_HEAP_RGNHDR] = {
> > +		.base = ROGUE_RGNHDR_HEAP_BASE,
> > +		.size = ROGUE_RGNHDR_HEAP_SIZE,
> > +		.flags = 0,
> > +		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
> > +	},
> > +	[DRM_PVR_HEAP_VIS_TEST] = {
> > +		.base = ROGUE_VISTEST_HEAP_BASE,
> > +		.size = ROGUE_VISTEST_HEAP_SIZE,
> > +		.flags = 0,
> > +		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
> > +	},
> > +	[DRM_PVR_HEAP_TRANSFER_FRAG] = {
> > +		.base = ROGUE_TRANSFER_FRAG_HEAP_BASE,
> > +		.size = ROGUE_TRANSFER_FRAG_HEAP_SIZE,
> > +		.flags = 0,
> > +		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
> > +	},
> > +};
> > +
> > +int
> > +pvr_static_data_areas_get(const struct pvr_device *pvr_dev,
> > +			  struct drm_pvr_ioctl_dev_query_args *args)
> > +{
> > +	struct drm_pvr_dev_query_static_data_areas query = {0};
> > +	int err;
> > +
> > +	if (!args->pointer) {
> > +		args->size = sizeof(struct drm_pvr_dev_query_static_data_areas);
> > +		return 0;
> > +	}
> > +
> > +	err = PVR_UOBJ_GET(query, args->size, args->pointer);
> > +	if (err < 0)
> > +		return err;
> > +
> > +	if (!query.static_data_areas.array) {
> > +		query.static_data_areas.count = ARRAY_SIZE(static_data_areas);
> > +		query.static_data_areas.stride = sizeof(struct drm_pvr_static_data_area);
> > +		goto copy_out;
> > +	}
> > +
> > +	if (query.static_data_areas.count > ARRAY_SIZE(static_data_areas))
> > +		query.static_data_areas.count = ARRAY_SIZE(static_data_areas);
> > +
> > +	err = PVR_UOBJ_SET_ARRAY(&query.static_data_areas, static_data_areas);
> > +	if (err < 0)
> > +		return err;
> > +
> > +copy_out:
> > +	err = PVR_UOBJ_SET(args->pointer, args->size, query);
> > +	if (err < 0)
> > +		return err;
> > +
> > +	args->size = sizeof(query);
> > +	return 0;
> > +}
> > +
> > +int
> > +pvr_heap_info_get(const struct pvr_device *pvr_dev,
> > +		  struct drm_pvr_ioctl_dev_query_args *args)
> > +{
> > +	struct drm_pvr_dev_query_heap_info query = {0};
> > +	u64 dest;
> > +	int err;
> > +
> > +	if (!args->pointer) {
> > +		args->size = sizeof(struct drm_pvr_dev_query_heap_info);
> > +		return 0;
> > +	}
> > +
> > +	err = PVR_UOBJ_GET(query, args->size, args->pointer);
> > +	if (err < 0)
> > +		return err;
> > +
> > +	if (!query.heaps.array) {
> > +		query.heaps.count = ARRAY_SIZE(pvr_heaps);
> > +		query.heaps.stride = sizeof(struct drm_pvr_heap);
> > +		goto copy_out;
> > +	}
> > +
> > +	if (query.heaps.count > ARRAY_SIZE(pvr_heaps))
> > +		query.heaps.count = ARRAY_SIZE(pvr_heaps);
> > +
> > +	/* Region header heap is only present if BRN63142 is present. */
> > +	dest = query.heaps.array;
> > +	for (size_t i = 0; i < query.heaps.count; i++) {
> > +		struct drm_pvr_heap heap = pvr_heaps[i];
> > +
> > +		if (i == DRM_PVR_HEAP_RGNHDR && !PVR_HAS_QUIRK(pvr_dev, 63142))
> > +			heap.size = 0;
> > +
> > +		err = PVR_UOBJ_SET(dest, query.heaps.stride, heap);
> > +		if (err < 0)
> > +			return err;
> > +
> > +		dest += query.heaps.stride;
> > +	}
> > +
> > +copy_out:
> > +	err = PVR_UOBJ_SET(args->pointer, args->size, query);
> > +	if (err < 0)
> > +		return err;
> > +
> > +	args->size = sizeof(query);
> > +	return 0;
> > +}
> > +
> > +/**
> > + * pvr_heap_contains_range() - Determine if a given heap contains the specified
> > + *                             device-virtual address range.
> > + * @pvr_heap: Target heap.
> > + * @start: Inclusive start of the target range.
> > + * @end: Inclusive end of the target range.
> > + *
> > + * It is an error to call this function with values of @start and @end that do
> > + * not satisfy the condition @start <= @end.
> > + */
> > +static __always_inline bool
> > +pvr_heap_contains_range(const struct drm_pvr_heap *pvr_heap, u64 start, u64 end)
> > +{
> > +	return pvr_heap->base <= start && end < pvr_heap->base + pvr_heap->size;
> > +}
> > +
> > +/**
> > + * pvr_find_heap_containing() - Find a heap which contains the specified
> > + *                              device-virtual address range.
> > + * @pvr_dev: Target PowerVR device.
> > + * @start: Start of the target range.
> > + * @size: Size of the target range.
> > + *
> > + * Return:
> > + *  * A pointer to a constant instance of struct drm_pvr_heap representing the
> > + *    heap containing the entire range specified by @start and @size on
> > + *    success, or
> > + *  * %NULL if no such heap exists.
> > + */
> > +const struct drm_pvr_heap *
> > +pvr_find_heap_containing(struct pvr_device *pvr_dev, u64 start, u64 size)
> > +{
> > +	u64 end;
> > +
> > +	if (check_add_overflow(start, size - 1, &end))
> > +		return NULL;
> > +
> > +	/*
> > +	 * There are no guarantees about the order of address ranges in
> > +	 * &pvr_heaps, so iterate over the entire array for a heap whose
> > +	 * range completely encompasses the given range.
> > +	 */
> > +	for (u32 heap_id = 0; heap_id < ARRAY_SIZE(pvr_heaps); heap_id++) {
> > +		/* Filter heaps that present only with an associated quirk */
> > +		if (heap_id == DRM_PVR_HEAP_RGNHDR &&
> > +		    !PVR_HAS_QUIRK(pvr_dev, 63142)) {
> > +			continue;
> > +		}
> > +
> > +		if (pvr_heap_contains_range(&pvr_heaps[heap_id], start, end))
> > +			return &pvr_heaps[heap_id];
> > +	}
> > +
> > +	return NULL;
> > +}
> > +
> > +/**
> > + * pvr_vm_find_gem_object() - Look up a buffer object from a given
> > + *                            device-virtual address.
> > + * @vm_ctx: [IN] Target VM context.
> > + * @device_addr: [IN] Virtual device address at the start of the required
> > + *               object.
> > + * @mapped_offset_out: [OUT] Pointer to location to write offset of the start
> > + *                     of the mapped region within the buffer object. May be
> > + *                     %NULL if this information is not required.
> > + * @mapped_size_out: [OUT] Pointer to location to write size of the mapped
> > + *                   region. May be %NULL if this information is not required.
> > + *
> > + * If successful, a reference will be taken on the buffer object. The caller
> > + * must drop the reference with pvr_gem_object_put().
> > + *
> > + * Return:
> > + *  * The PowerVR buffer object mapped at @device_addr if one exists, or
> > + *  * %NULL otherwise.
> > + */
> > +struct pvr_gem_object *
> > +pvr_vm_find_gem_object(struct pvr_vm_context *vm_ctx, u64 device_addr,
> > +		       u64 *mapped_offset_out, u64 *mapped_size_out)
> > +{
> > +	struct pvr_gem_object *pvr_obj;
> > +	struct drm_gpuva *va;
> > +
> > +	mutex_lock(&vm_ctx->lock);
> > +
> > +	va = drm_gpuva_find_first(&vm_ctx->gpuva_mgr, device_addr, 1);
> > +	if (!va)
> > +		goto err_unlock;
> > +
> > +	pvr_obj = gem_to_pvr_gem(va->gem.obj);
> > +	pvr_gem_object_get(pvr_obj);
> > +
> > +	if (mapped_offset_out)
> > +		*mapped_offset_out = va->gem.offset;
> > +	if (mapped_size_out)
> > +		*mapped_size_out = va->va.range;
> > +
> > +	mutex_unlock(&vm_ctx->lock);
> > +
> > +	return pvr_obj;
> > +
> > +err_unlock:
> > +	mutex_unlock(&vm_ctx->lock);
> > +
> > +	return NULL;
> > +}
> > +
> > +/**
> > + * pvr_vm_get_fw_mem_context: Get object representing firmware memory context
> > + * @vm_ctx: Target VM context.
> > + *
> > + * Returns:
> > + *  * FW object representing firmware memory context, or
> > + *  * %NULL if this VM context does not have a firmware memory context.
> > + */
> > +struct pvr_fw_object *
> > +pvr_vm_get_fw_mem_context(struct pvr_vm_context *vm_ctx)
> > +{
> > +	return vm_ctx->fw_mem_ctx_obj;
> > +}
> > diff --git a/drivers/gpu/drm/imagination/pvr_vm.h b/drivers/gpu/drm/imagination/pvr_vm.h
> > new file mode 100644
> > index 000000000000..b98bc3981807
> > --- /dev/null
> > +++ b/drivers/gpu/drm/imagination/pvr_vm.h
> > @@ -0,0 +1,60 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright (c) 2023 Imagination Technologies Ltd. */
> > +
> > +#ifndef PVR_VM_H
> > +#define PVR_VM_H
> > +
> > +#include "pvr_rogue_mmu_defs.h"
> > +
> > +#include <uapi/drm/pvr_drm.h>
> > +
> > +#include <linux/types.h>
> > +
> > +/* Forward declaration from "pvr_device.h" */
> > +struct pvr_device;
> > +struct pvr_file;
> > +
> > +/* Forward declaration from "pvr_gem.h" */
> > +struct pvr_gem_object;
> > +
> > +/* Forward declaration from "pvr_vm.c" */
> > +struct pvr_vm_context;
> > +
> > +/* Forward declaration from <uapi/drm/pvr_drm.h> */
> > +struct drm_pvr_ioctl_get_heap_info_args;
> > +
> > +/* Functions defined in pvr_vm.c */
> > +
> > +bool pvr_device_addr_is_valid(u64 device_addr);
> > +bool pvr_device_addr_and_size_are_valid(u64 device_addr, u64 size);
> > +
> > +struct pvr_vm_context *pvr_vm_create_context(struct pvr_device *pvr_dev,
> > +					     bool is_userspace_context);
> > +
> > +int pvr_vm_map(struct pvr_vm_context *vm_ctx,
> > +	       struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset,
> > +	       u64 device_addr, u64 size);
> > +int pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size);
> > +
> > +dma_addr_t pvr_vm_get_page_table_root_addr(struct pvr_vm_context *vm_ctx);
> > +
> > +int pvr_static_data_areas_get(const struct pvr_device *pvr_dev,
> > +			      struct drm_pvr_ioctl_dev_query_args *args);
> > +int pvr_heap_info_get(const struct pvr_device *pvr_dev,
> > +		      struct drm_pvr_ioctl_dev_query_args *args);
> > +const struct drm_pvr_heap *pvr_find_heap_containing(struct pvr_device *pvr_dev,
> > +						    u64 addr, u64 size);
> > +
> > +struct pvr_gem_object *pvr_vm_find_gem_object(struct pvr_vm_context *vm_ctx,
> > +					      u64 device_addr,
> > +					      u64 *mapped_offset_out,
> > +					      u64 *mapped_size_out);
> > +
> > +struct pvr_fw_object *
> > +pvr_vm_get_fw_mem_context(struct pvr_vm_context *vm_ctx);
> > +
> > +struct pvr_vm_context *pvr_vm_context_lookup(struct pvr_file *pvr_file, u32 handle);
> > +bool pvr_vm_context_put(struct pvr_vm_context *vm_ctx);
> > +void pvr_destroy_vm_contexts_for_file(struct pvr_file *pvr_file);
> > +
> > +#endif /* PVR_VM_H */
> > -- 
> > 2.41.0
> >
Danilo Krummrich Aug. 21, 2023, 11:05 a.m. UTC | #6
On 8/21/23 10:30, Donald Robson wrote:
> Hi Danilo,
> Thanks for the feedback.  On the subject of locking, I have dma_resv locking
> in another branch where I'm trying to enable bind queues, but I didn't think
> I needed locking for the single, synchronous operations seen here.  Would a
> mutex on the gem object wrapper suffice?

The reason you need to either acquire the dma-resv lock or a driver 
specific lock for drm_gpuva_link() and drm_gpuva_unlink() is that you 
probably iterate the linked drm_gpuvas somewhere else, typically from a
ttm_device_funcs.move callback to unmap mappings when their backing GEM 
objects are evicted. If you handle that a different way and never 
iterate linked drm_gpuvas, you can probably omit drm_gpuva_(un)link() 
entirely.

> Thanks,
> Donald
> On Fri, 2023-08-18 at 17:30 +0200, Danilo Krummrich wrote:
>>
>> Hi Sarah,
>>
>> On Wed, Aug 16, 2023 at 09:25:23AM +0100, Sarah Walker wrote:
>>> Add a GEM implementation based on drm_gem_shmem, and support code for the
>>> PowerVR GPU MMU. The GPU VA manager is used for address space management.
>>>
>>> Changes since v4:
>>> - Correct sync function in vmap/vunmap function documentation
>>> - Update for upstream GPU VA manager
>>> - Fix missing frees when unmapping drm_gpuva objects
>>> - Always zero GEM BOs on creation
>>>
>>> Changes since v3:
>>> - Split MMU and VM code
>>> - Register page table allocations with kmemleak
>>> - Use drm_dev_{enter,exit}
>>>
>>> Changes since v2:
>>> - Use GPU VA manager
>>> - Use drm_gem_shmem
>>>
>>> Co-developed-by: Matt Coster <matt.coster@imgtec.com>
>>> Signed-off-by: Matt Coster <matt.coster@imgtec.com>
>>> Co-developed-by: Donald Robson <donald.robson@imgtec.com>
>>> Signed-off-by: Donald Robson <donald.robson@imgtec.com>
>>> Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
>>> ---
>>>   drivers/gpu/drm/imagination/Makefile     |    5 +-
>>>   drivers/gpu/drm/imagination/pvr_device.c |   23 +-
>>>   drivers/gpu/drm/imagination/pvr_device.h |   18 +
>>>   drivers/gpu/drm/imagination/pvr_drv.c    |  302 ++-
>>>   drivers/gpu/drm/imagination/pvr_gem.c    |  396 ++++
>>>   drivers/gpu/drm/imagination/pvr_gem.h    |  177 ++
>>>   drivers/gpu/drm/imagination/pvr_mmu.c    | 2487 ++++++++++++++++++++++
>>>   drivers/gpu/drm/imagination/pvr_mmu.h    |  108 +
>>>   drivers/gpu/drm/imagination/pvr_vm.c     |  890 ++++++++
>>>   drivers/gpu/drm/imagination/pvr_vm.h     |   60 +
>>>   10 files changed, 4455 insertions(+), 11 deletions(-)
>>>   create mode 100644 drivers/gpu/drm/imagination/pvr_gem.c
>>>   create mode 100644 drivers/gpu/drm/imagination/pvr_gem.h
>>>   create mode 100644 drivers/gpu/drm/imagination/pvr_mmu.c
>>>   create mode 100644 drivers/gpu/drm/imagination/pvr_mmu.h
>>>   create mode 100644 drivers/gpu/drm/imagination/pvr_vm.c
>>>   create mode 100644 drivers/gpu/drm/imagination/pvr_vm.h
>>
>> <snip>
>>
>>> diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c
>>> new file mode 100644
>>> index 000000000000..616fad3a3325
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/imagination/pvr_vm.c
>>> @@ -0,0 +1,890 @@
>>> +// SPDX-License-Identifier: GPL-2.0 OR MIT
>>> +/* Copyright (c) 2023 Imagination Technologies Ltd. */
>>> +
>>> +#include "pvr_vm.h"
>>> +
>>> +#include "pvr_device.h"
>>> +#include "pvr_drv.h"
>>> +#include "pvr_gem.h"
>>> +#include "pvr_mmu.h"
>>> +#include "pvr_rogue_fwif.h"
>>> +#include "pvr_rogue_heap_config.h"
>>> +
>>> +#include <drm/drm_gem.h>
>>> +#include <drm/drm_gpuva_mgr.h>
>>> +
>>> +#include <linux/container_of.h>
>>> +#include <linux/err.h>
>>> +#include <linux/errno.h>
>>> +#include <linux/gfp_types.h>
>>> +#include <linux/kref.h>
>>> +#include <linux/mutex.h>
>>> +#include <linux/stddef.h>
>>> +
>>> +/**
>>> + * DOC: Memory context
>>> + *
>>> + * This is the "top level" datatype in the VM code. It's exposed in the public
>>> + * API as an opaque handle.
>>> + */
>>> +
>>> +/**
>>> + * struct pvr_vm_context - Context type which encapsulates an entire page table
>>> + * tree structure.
>>> + * @pvr_dev: The PowerVR device to which this context is bound.
>>> + *
>>> + * This binding is immutable for the life of the context.
>>> + * @mmu_ctx: The context for binding to physical memory.
>>> + * @gpuva_mgr: GPUVA manager object associated with this context.
>>> + * @lock: Global lock on this entire structure of page tables.
>>> + * @fw_mem_ctx_obj: Firmware object representing firmware memory context.
>>> + * @ref_count: Reference count of object.
>>> + */
>>> +struct pvr_vm_context {
>>> +	struct pvr_device *pvr_dev;
>>> +	struct pvr_mmu_context *mmu_ctx;
>>> +	struct drm_gpuva_manager gpuva_mgr;
>>> +	struct mutex lock;
>>> +	struct pvr_fw_object *fw_mem_ctx_obj;
>>> +	struct kref ref_count;
>>> +};
>>> +
>>> +/**
>>> + * pvr_vm_get_page_table_root_addr() - Get the DMA address of the root of the
>>> + *                                     page table structure behind a VM context.
>>> + * @vm_ctx: Target VM context.
>>> + */
>>> +dma_addr_t pvr_vm_get_page_table_root_addr(struct pvr_vm_context *vm_ctx)
>>> +{
>>> +	return pvr_mmu_get_root_table_dma_addr(vm_ctx->mmu_ctx);
>>> +}
>>> +
>>> +/**
>>> + * DOC: Memory mappings
>>> + */
>>> +
>>> +/**
>>> + * pvr_vm_gpuva_mapping_init() - Setup a mapping object with the specified
>>> + * parameters ready for mapping using pvr_vm_gpuva_mapping_map().
>>> + * @va: Pointer to drm_gpuva mapping object.
>>> + * @device_addr: Device-virtual address at the start of the mapping.
>>> + * @size: Size of the desired mapping.
>>> + * @pvr_obj: Target PowerVR memory object.
>>> + * @pvr_obj_offset: Offset into @pvr_obj to begin mapping from.
>>> + *
>>> + * Some parameters of this function are unchecked. It is therefore the callers
>>> + * responsibility to ensure certain constraints are met. Specifically:
>>> + *
>>> + * * @pvr_obj_offset must be less than the size of @pvr_obj,
>>> + * * The sum of @pvr_obj_offset and @size must be less than or equal to the
>>> + *   size of @pvr_obj,
>>> + * * The range specified by @pvr_obj_offset and @size (the "CPU range") must be
>>> + *   CPU page-aligned both in start position and size, and
>>> + * * The range specified by @device_addr and @size (the "device range") must be
>>> + *   device page-aligned both in start position and size.
>>> + *
>>> + * Furthermore, it is up to the caller to make sure that a reference to @pvr_obj
>>> + * is taken prior to mapping @va with the drm_gpuva_manager.
>>> + */
>>> +static void
>>> +pvr_vm_gpuva_mapping_init(struct drm_gpuva *va, u64 device_addr, u64 size,
>>> +			  struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset)
>>
>> There's already drm_gpuva_init() doing the same thing.
>>
>>> +{
>>> +	va->va.addr = device_addr;
>>> +	va->va.range = size;
>>> +	va->gem.obj = gem_from_pvr_gem(pvr_obj);
>>> +	va->gem.offset = pvr_obj_offset;
>>> +}
>>> +
>>> +struct pvr_vm_gpuva_op_ctx {
>>> +	struct pvr_vm_context *vm_ctx;
>>> +	struct pvr_mmu_op_context *mmu_op_ctx;
>>> +	struct drm_gpuva *new_va, *prev_va, *next_va;
>>> +};
>>> +
>>> +/**
>>> + * pvr_vm_gpuva_map() - Insert a mapping into a memory context.
>>> + * @op: gpuva op containing the remap details.
>>> + * @op_ctx: Operation context.
>>> + *
>>> + * Context: Called by drm_gpuva_sm_map following a successful mapping while
>>> + * @op_ctx.vm_ctx mutex is held.
>>> + *
>>> + * Return:
>>> + *  * 0 on success, or
>>> + *  * Any error returned by pvr_mmu_map().
>>> + */
>>> +static int
>>> +pvr_vm_gpuva_map(struct drm_gpuva_op *op, void *op_ctx)
>>> +{
>>> +	struct pvr_gem_object *pvr_gem = gem_to_pvr_gem(op->map.gem.obj);
>>> +	struct pvr_vm_gpuva_op_ctx *ctx = op_ctx;
>>> +	int err;
>>> +
>>> +	if ((op->map.gem.offset | op->map.va.range) & ~PVR_DEVICE_PAGE_MASK)
>>> +		return -EINVAL;
>>> +
>>> +	err = pvr_mmu_map(ctx->mmu_op_ctx, op->map.va.range, pvr_gem->flags,
>>> +			  op->map.va.addr);
>>> +	if (err)
>>> +		return err;
>>> +
>>> +	pvr_vm_gpuva_mapping_init(ctx->new_va, op->map.va.addr,
>>> +				  op->map.va.range, pvr_gem, op->map.gem.offset);
>>> +
>>> +	drm_gpuva_map(&ctx->vm_ctx->gpuva_mgr, ctx->new_va, &op->map);
>>
>> drm_gpuva_map() does use drm_gpuva_init_from_op() internally, hence the extra
>> call to pvr_vm_gpuva_mapping_init() should be unnecessary.
>>
>>> +	drm_gpuva_link(ctx->new_va);
>>
>> How is this protected?
>>
>> drm_gpuva_link() and drm_gpuva_unlink() require either the dma_resv lock of the
>> corresponding GEM object being held or, alternatively, the driver specific lock
>> indicated via drm_gem_gpuva_set_lock().
>>
>>> +	ctx->new_va = NULL;
>>> +
>>> +	/*
>>> +	 * Increment the refcount on the underlying physical memory resource
>>> +	 * to prevent de-allocation while the mapping exists.
>>> +	 */
>>> +	pvr_gem_object_get(pvr_gem);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +/**
>>> + * pvr_vm_gpuva_unmap() - Remove a mapping from a memory context.
>>> + * @op: gpuva op containing the unmap details.
>>> + * @op_ctx: Operation context.
>>> + *
>>> + * Context: Called by drm_gpuva_sm_unmap following a successful unmapping while
>>> + * @op_ctx.vm_ctx mutex is held.
>>> + *
>>> + * Return:
>>> + *  * 0 on success, or
>>> + *  * Any error returned by pvr_mmu_unmap().
>>> + */
>>> +static int
>>> +pvr_vm_gpuva_unmap(struct drm_gpuva_op *op, void *op_ctx)
>>> +{
>>> +	struct pvr_gem_object *pvr_gem = gem_to_pvr_gem(op->unmap.va->gem.obj);
>>> +	struct pvr_vm_gpuva_op_ctx *ctx = op_ctx;
>>> +
>>> +	int err = pvr_mmu_unmap(ctx->mmu_op_ctx, op->unmap.va->va.addr,
>>> +				op->unmap.va->va.range);
>>> +
>>> +	if (err)
>>> +		return err;
>>> +
>>> +	drm_gpuva_unmap(&op->unmap);
>>> +	drm_gpuva_unlink(op->unmap.va);
>>> +	kfree(op->unmap.va);
>>> +
>>> +	pvr_gem_object_put(pvr_gem);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +/**
>>> + * pvr_vm_gpuva_remap() - Remap a mapping within a memory context.
>>> + * @op: gpuva op containing the remap details.
>>> + * @op_ctx: Operation context.
>>> + *
>>> + * Context: Called by either drm_gpuva_sm_map or drm_gpuva_sm_unmap when a
>>> + * mapping or unmapping operation causes a region to be split. The
>>> + * @op_ctx.vm_ctx mutex is held.
>>> + *
>>> + * Return:
>>> + *  * 0 on success, or
>>> + *  * Any error returned by pvr_vm_gpuva_unmap() or pvr_vm_gpuva_unmap().
>>> + */
>>> +static int
>>> +pvr_vm_gpuva_remap(struct drm_gpuva_op *op, void *op_ctx)
>>> +{
>>> +	struct pvr_vm_gpuva_op_ctx *ctx = op_ctx;
>>> +
>>> +	if (op->remap.unmap) {
>>
>> You can omit this check, remap operations always contain a valid unmap
>> operation. However, you might want to know whether the remap operation was
>> generated due to a call to drm_gpuva_sm_map() or drm_gpuva_sm_unmap(), since for
>> the latter you might want to free page table structures.
>>
>>> +		const u64 va_start = op->remap.prev ?
>>> +				     op->remap.prev->va.addr + op->remap.prev->va.range :
>>> +				     op->remap.unmap->va->va.addr;
>>> +		const u64 va_end = op->remap.next ?
>>> +				   op->remap.next->va.addr :
>>> +				   op->remap.unmap->va->va.addr + op->remap.unmap->va->va.range;
>>
>> This seems to be a common calculation for drivers, it is probably worth to come
>> up with a helper, something like
>> drm_gpuva_op_unmap_range(struct drm_gpuva_op *op, u64 *addr, u64 *range).
>>
>>> +
>>> +		int err = pvr_mmu_unmap(ctx->mmu_op_ctx, va_start,
>>> +					va_end - va_start);
>>> +
>>> +		if (err)
>>> +			return err;
>>> +	}
>>> +
>>> +	if (op->remap.prev)
>>> +		pvr_vm_gpuva_mapping_init(ctx->prev_va, op->remap.prev->va.addr,
>>> +					  op->remap.prev->va.range,
>>> +					  gem_to_pvr_gem(op->remap.prev->gem.obj),
>>> +					  op->remap.prev->gem.offset);
>>> +
>>> +	if (op->remap.next)
>>> +		pvr_vm_gpuva_mapping_init(ctx->next_va, op->remap.next->va.addr,
>>> +					  op->remap.next->va.range,
>>> +					  gem_to_pvr_gem(op->remap.next->gem.obj),
>>> +					  op->remap.next->gem.offset);
>>> +
>>> +	/* No actual remap required: the page table tree depth is fixed to 3,
>>> +	 * and we use 4k page table entries only for now.
>>> +	 */
>>> +	drm_gpuva_remap(ctx->prev_va, ctx->next_va, &op->remap);
>>
>> As above, drm_gpuva_remap() does use drm_gpuva_init_from_op() internally, hence
>> the extra call to pvr_vm_gpuva_mapping_init() should be unnecessary.
>>
>>> +
>>> +	if (op->remap.prev) {
>>> +		pvr_gem_object_get(gem_to_pvr_gem(ctx->prev_va->gem.obj));
>>> +		drm_gpuva_link(ctx->prev_va);
>>> +		ctx->prev_va = NULL;
>>> +	}
>>> +
>>> +	if (op->remap.next) {
>>> +		pvr_gem_object_get(gem_to_pvr_gem(ctx->next_va->gem.obj));
>>> +		drm_gpuva_link(ctx->next_va);
>>> +		ctx->next_va = NULL;
>>> +	}
>>> +
>>> +	if (op->remap.unmap) {
>>
>> As above, no need for this check.
>>
>> - Danilo
>>
>>> +		struct pvr_gem_object *pvr_gem = gem_to_pvr_gem(op->remap.unmap->va->gem.obj);
>>> +
>>> +		drm_gpuva_unlink(op->unmap.va);
>>> +		kfree(op->unmap.va);
>>> +
>>> +		pvr_gem_object_put(pvr_gem);
>>> +	}
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +/*
>>> + * Public API
>>> + *
>>> + * For an overview of these functions, see *DOC: Public API* in "pvr_vm.h".
>>> + */
>>> +
>>> +/**
>>> + * pvr_device_addr_is_valid() - Tests whether a device-virtual address
>>> + *                              is valid.
>>> + * @device_addr: Virtual device address to test.
>>> + *
>>> + * Return:
>>> + *  * %true if @device_addr is within the valid range for a device page
>>> + *    table and is aligned to the device page size, or
>>> + *  * %false otherwise.
>>> + */
>>> +bool
>>> +pvr_device_addr_is_valid(u64 device_addr)
>>> +{
>>> +	return (device_addr & ~PVR_PAGE_TABLE_ADDR_MASK) == 0 &&
>>> +	       (device_addr & ~PVR_DEVICE_PAGE_MASK) == 0;
>>> +}
>>> +
>>> +/**
>>> + * pvr_device_addr_and_size_are_valid() - Tests whether a device-virtual
>>> + * address and associated size are both valid.
>>> + * @device_addr: Virtual device address to test.
>>> + * @size: Size of the range based at @device_addr to test.
>>> + *
>>> + * Calling pvr_device_addr_is_valid() twice (once on @size, and again on
>>> + * @device_addr + @size) to verify a device-virtual address range initially
>>> + * seems intuitive, but it produces a false-negative when the address range
>>> + * is right at the end of device-virtual address space.
>>> + *
>>> + * This function catches that corner case, as well as checking that
>>> + * @size is non-zero.
>>> + *
>>> + * Return:
>>> + *  * %true if @device_addr is device page aligned; @size is device page
>>> + *    aligned; the range specified by @device_addr and @size is within the
>>> + *    bounds of the device-virtual address space, and @size is non-zero, or
>>> + *  * %false otherwise.
>>> + */
>>> +bool
>>> +pvr_device_addr_and_size_are_valid(u64 device_addr, u64 size)
>>> +{
>>> +	return pvr_device_addr_is_valid(device_addr) &&
>>> +	       size != 0 && (size & ~PVR_DEVICE_PAGE_MASK) == 0 &&
>>> +	       (device_addr + size <= PVR_PAGE_TABLE_ADDR_SPACE_SIZE);
>>> +}
>>> +
>>> +static const struct drm_gpuva_fn_ops pvr_vm_gpuva_ops = {
>>> +	.sm_step_map = pvr_vm_gpuva_map,
>>> +	.sm_step_remap = pvr_vm_gpuva_remap,
>>> +	.sm_step_unmap = pvr_vm_gpuva_unmap,
>>> +};
>>> +
>>> +/**
>>> + * pvr_vm_create_context() - Create a new VM context.
>>> + * @pvr_dev: Target PowerVR device.
>>> + * @is_userspace_context: %true if this context is for userspace. This will
>>> + *                        create a firmware memory context for the VM context
>>> + *                        and disable warnings when tearing down mappings.
>>> + *
>>> + * Return:
>>> + *  * A handle to the newly-minted VM context on success,
>>> + *  * -%EINVAL if the feature "virtual address space bits" on @pvr_dev is
>>> + *    missing or has an unsupported value,
>>> + *  * -%ENOMEM if allocation of the structure behind the opaque handle fails,
>>> + *    or
>>> + *  * Any error encountered while setting up internal structures.
>>> + */
>>> +struct pvr_vm_context *
>>> +pvr_vm_create_context(struct pvr_device *pvr_dev, bool is_userspace_context)
>>> +{
>>> +	struct drm_device *drm_dev = from_pvr_device(pvr_dev);
>>> +
>>> +	struct pvr_vm_context *vm_ctx;
>>> +	u16 device_addr_bits;
>>> +
>>> +	int err;
>>> +
>>> +	err = PVR_FEATURE_VALUE(pvr_dev, virtual_address_space_bits,
>>> +				&device_addr_bits);
>>> +	if (err) {
>>> +		drm_err(drm_dev,
>>> +			"Failed to get device virtual address space bits\n");
>>> +		return ERR_PTR(err);
>>> +	}
>>> +
>>> +	if (device_addr_bits != PVR_PAGE_TABLE_ADDR_BITS) {
>>> +		drm_err(drm_dev,
>>> +			"Device has unsupported virtual address space size\n");
>>> +		return ERR_PTR(-EINVAL);
>>> +	}
>>> +
>>> +	vm_ctx = kzalloc(sizeof(*vm_ctx), GFP_KERNEL);
>>> +	if (!vm_ctx)
>>> +		return ERR_PTR(-ENOMEM);
>>> +
>>> +	vm_ctx->pvr_dev = pvr_dev;
>>> +	kref_init(&vm_ctx->ref_count);
>>> +	mutex_init(&vm_ctx->lock);
>>> +
>>> +	drm_gpuva_manager_init(&vm_ctx->gpuva_mgr,
>>> +			       is_userspace_context ? "PowerVR-user-VM" : "PowerVR-FW-VM",
>>> +			       0, 1ULL << device_addr_bits, 0, 0, &pvr_vm_gpuva_ops);
>>> +
>>> +	vm_ctx->mmu_ctx = pvr_mmu_context_create(pvr_dev);
>>> +	err = PTR_ERR_OR_ZERO(&vm_ctx->mmu_ctx);
>>> +	if (err) {
>>> +		vm_ctx->mmu_ctx = NULL;
>>> +		goto err_put_ctx;
>>> +	}
>>> +
>>> +	if (is_userspace_context) {
>>> +		/* TODO: Create FW mem context */
>>> +		err = -ENODEV;
>>> +		goto err_put_ctx;
>>> +	}
>>> +
>>> +	return vm_ctx;
>>> +
>>> +err_put_ctx:
>>> +	pvr_vm_context_put(vm_ctx);
>>> +
>>> +	return ERR_PTR(err);
>>> +}
>>> +
>>> +/**
>>> + * pvr_vm_context_release() - Teardown a VM context.
>>> + * @ref_count: Pointer to reference counter of the VM context.
>>> + *
>>> + * This function ensures that no mappings are left dangling by unmapping them
>>> + * all in order of ascending device-virtual address.
>>> + */
>>> +static void
>>> +pvr_vm_context_release(struct kref *ref_count)
>>> +{
>>> +	struct pvr_vm_context *vm_ctx =
>>> +		container_of(ref_count, struct pvr_vm_context, ref_count);
>>> +
>>> +	/* TODO: Destroy FW mem context */
>>> +	WARN_ON(vm_ctx->fw_mem_ctx_obj);
>>> +
>>> +	WARN_ON(pvr_vm_unmap(vm_ctx, vm_ctx->gpuva_mgr.mm_start,
>>> +			     vm_ctx->gpuva_mgr.mm_range));
>>> +
>>> +	drm_gpuva_manager_destroy(&vm_ctx->gpuva_mgr);
>>> +	pvr_mmu_context_destroy(vm_ctx->mmu_ctx);
>>> +	mutex_destroy(&vm_ctx->lock);
>>> +
>>> +	kfree(vm_ctx);
>>> +}
>>> +
>>> +/**
>>> + * pvr_vm_context_lookup() - Look up VM context from handle
>>> + * @pvr_file: Pointer to pvr_file structure.
>>> + * @handle: Object handle.
>>> + *
>>> + * Takes reference on VM context object. Call pvr_vm_context_put() to release.
>>> + *
>>> + * Returns:
>>> + *  * The requested object on success, or
>>> + *  * %NULL on failure (object does not exist in list, or is not a VM context)
>>> + */
>>> +struct pvr_vm_context *
>>> +pvr_vm_context_lookup(struct pvr_file *pvr_file, u32 handle)
>>> +{
>>> +	struct pvr_vm_context *vm_ctx;
>>> +
>>> +	xa_lock(&pvr_file->vm_ctx_handles);
>>> +	vm_ctx = xa_load(&pvr_file->vm_ctx_handles, handle);
>>> +	if (vm_ctx)
>>> +		kref_get(&vm_ctx->ref_count);
>>> +
>>> +	xa_unlock(&pvr_file->vm_ctx_handles);
>>> +
>>> +	return vm_ctx;
>>> +}
>>> +
>>> +/**
>>> + * pvr_vm_context_put() - Release a reference on a VM context
>>> + * @vm_ctx: Target VM context.
>>> + *
>>> + * Returns:
>>> + *  * %true if the VM context was destroyed, or
>>> + *  * %false if there are any references still remaining.
>>> + */
>>> +bool
>>> +pvr_vm_context_put(struct pvr_vm_context *vm_ctx)
>>> +{
>>> +	WARN_ON(!vm_ctx);
>>> +
>>> +	if (vm_ctx)
>>> +		return kref_put(&vm_ctx->ref_count, pvr_vm_context_release);
>>> +
>>> +	return true;
>>> +}
>>> +
>>> +/**
>>> + * pvr_destroy_vm_contexts_for_file: Destroy any VM contexts associated with the
>>> + * given file.
>>> + * @pvr_file: Pointer to pvr_file structure.
>>> + *
>>> + * Removes all vm_contexts associated with @pvr_file from the device VM context
>>> + * list and drops initial references. vm_contexts will then be destroyed once
>>> + * all outstanding references are dropped.
>>> + */
>>> +void pvr_destroy_vm_contexts_for_file(struct pvr_file *pvr_file)
>>> +{
>>> +	struct pvr_vm_context *vm_ctx;
>>> +	unsigned long handle;
>>> +
>>> +	xa_for_each(&pvr_file->vm_ctx_handles, handle, vm_ctx) {
>>> +		/* vm_ctx is not used here because that would create a race with xa_erase */
>>> +		pvr_vm_context_put(xa_erase(&pvr_file->vm_ctx_handles, handle));
>>> +	}
>>> +}
>>> +
>>> +/**
>>> + * pvr_vm_map() - Map a section of physical memory into a section of device-virtual memory.
>>> + * @vm_ctx: Target VM context.
>>> + * @pvr_obj: Target PowerVR memory object.
>>> + * @pvr_obj_offset: Offset into @pvr_obj to map from.
>>> + * @device_addr: Virtual device address at the start of the requested mapping.
>>> + * @size: Size of the requested mapping.
>>> + *
>>> + * No handle is returned to represent the mapping. Instead, callers should
>>> + * remember @device_addr and use that as a handle.
>>> + *
>>> + * Return:
>>> + *  * 0 on success,
>>> + *  * -%EINVAL if @device_addr is not a valid page-aligned device-virtual
>>> + *    address; the region specified by @pvr_obj_offset and @size does not fall
>>> + *    entirely within @pvr_obj, or any part of the specified region of @pvr_obj
>>> + *    is not device-virtual page-aligned,
>>> + *  * Any error encountered while performing internal operations required to
>>> + *    destroy the mapping (returned from pvr_vm_gpuva_map or
>>> + *    pvr_vm_gpuva_remap).
>>> + */
>>> +int
>>> +pvr_vm_map(struct pvr_vm_context *vm_ctx,
>>> +	   struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset,
>>> +	   u64 device_addr, u64 size)
>>> +{
>>> +	const size_t pvr_obj_size = pvr_gem_object_size(pvr_obj);
>>> +	struct pvr_vm_gpuva_op_ctx op_ctx = { .vm_ctx = vm_ctx };
>>> +	struct sg_table *sgt;
>>> +	int err;
>>> +
>>> +	if (!pvr_device_addr_and_size_are_valid(device_addr, size) ||
>>> +	    pvr_obj_offset & ~PAGE_MASK || size & ~PAGE_MASK ||
>>> +	    pvr_obj_offset + size > pvr_obj_size ||
>>> +	    pvr_obj_offset > pvr_obj_size) {
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	op_ctx.new_va = kzalloc(sizeof(*op_ctx.new_va), GFP_KERNEL);
>>> +	op_ctx.prev_va = kzalloc(sizeof(*op_ctx.prev_va), GFP_KERNEL);
>>> +	op_ctx.next_va = kzalloc(sizeof(*op_ctx.next_va), GFP_KERNEL);
>>> +	if (!op_ctx.new_va || !op_ctx.prev_va || !op_ctx.next_va) {
>>> +		err = -ENOMEM;
>>> +		goto out_free;
>>> +	}
>>> +
>>> +	sgt = pvr_gem_object_get_pages_sgt(pvr_obj);
>>> +	err = PTR_ERR_OR_ZERO(sgt);
>>> +	if (err)
>>> +		goto out_free;
>>> +
>>> +	op_ctx.mmu_op_ctx = pvr_mmu_op_context_create(vm_ctx->mmu_ctx, sgt,
>>> +						      pvr_obj_offset, size);
>>> +	err = PTR_ERR_OR_ZERO(op_ctx.mmu_op_ctx);
>>> +	if (err) {
>>> +		op_ctx.mmu_op_ctx = NULL;
>>> +		goto out_mmu_op_ctx_destroy;
>>> +	}
>>> +
>>> +	mutex_lock(&vm_ctx->lock);
>>> +	err = drm_gpuva_sm_map(&vm_ctx->gpuva_mgr, &op_ctx, device_addr, size,
>>> +			       gem_from_pvr_gem(pvr_obj), pvr_obj_offset);
>>> +	mutex_unlock(&vm_ctx->lock);
>>> +
>>> +out_mmu_op_ctx_destroy:
>>> +	pvr_mmu_op_context_destroy(op_ctx.mmu_op_ctx);
>>> +
>>> +out_free:
>>> +	kfree(op_ctx.next_va);
>>> +	kfree(op_ctx.prev_va);
>>> +	kfree(op_ctx.new_va);
>>> +
>>> +	return err;
>>> +}
>>> +
>>> +/**
>>> + * pvr_vm_unmap() - Unmap an already mapped section of device-virtual memory.
>>> + * @vm_ctx: Target VM context.
>>> + * @device_addr: Virtual device address at the start of the target mapping.
>>> + * @size: Size of the target mapping.
>>> + *
>>> + * Return:
>>> + *  * 0 on success,
>>> + *  * -%EINVAL if @device_addr is not a valid page-aligned device-virtual
>>> + *    address,
>>> + *  * Any error encountered while performing internal operations required to
>>> + *    destroy the mapping (returned from pvr_vm_gpuva_unmap or
>>> + *    pvr_vm_gpuva_remap).
>>> + */
>>> +int
>>> +pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size)
>>> +{
>>> +	struct pvr_vm_gpuva_op_ctx op_ctx = { .vm_ctx = vm_ctx };
>>> +	int err;
>>> +
>>> +	if (!pvr_device_addr_and_size_are_valid(device_addr, size))
>>> +		return -EINVAL;
>>> +
>>> +	op_ctx.prev_va = kzalloc(sizeof(*op_ctx.prev_va), GFP_KERNEL);
>>> +	op_ctx.next_va = kzalloc(sizeof(*op_ctx.next_va), GFP_KERNEL);
>>> +	if (!op_ctx.prev_va || !op_ctx.next_va) {
>>> +		err = -ENOMEM;
>>> +		goto out;
>>> +	}
>>> +
>>> +	op_ctx.mmu_op_ctx =
>>> +		pvr_mmu_op_context_create(vm_ctx->mmu_ctx, NULL, 0, 0);
>>> +	err = PTR_ERR_OR_ZERO(op_ctx.mmu_op_ctx);
>>> +	if (err) {
>>> +		op_ctx.mmu_op_ctx = NULL;
>>> +		goto out;
>>> +	}
>>> +
>>> +	mutex_lock(&vm_ctx->lock);
>>> +	err = drm_gpuva_sm_unmap(&vm_ctx->gpuva_mgr, &op_ctx, device_addr, size);
>>> +	mutex_unlock(&vm_ctx->lock);
>>> +
>>> +out:
>>> +	pvr_mmu_op_context_destroy(op_ctx.mmu_op_ctx);
>>> +	kfree(op_ctx.next_va);
>>> +	kfree(op_ctx.prev_va);
>>> +
>>> +	return err;
>>> +}
>>> +
>>> +/*
>>> + * Static data areas are determined by firmware.
>>> + *
>>> + * When adding a new static data area you will also need to update the reserved_size field for the
>>> + * heap in pvr_heaps[].
>>> + */
>>> +static const struct drm_pvr_static_data_area static_data_areas[] = {
>>> +	{
>>> +		.area_usage = DRM_PVR_STATIC_DATA_AREA_FENCE,
>>> +		.location_heap_id = DRM_PVR_HEAP_GENERAL,
>>> +		.offset = 0,
>>> +		.size = 128,
>>> +	},
>>> +	{
>>> +		.area_usage = DRM_PVR_STATIC_DATA_AREA_YUV_CSC,
>>> +		.location_heap_id = DRM_PVR_HEAP_GENERAL,
>>> +		.offset = 128,
>>> +		.size = 1024,
>>> +	},
>>> +	{
>>> +		.area_usage = DRM_PVR_STATIC_DATA_AREA_VDM_SYNC,
>>> +		.location_heap_id = DRM_PVR_HEAP_PDS_CODE_DATA,
>>> +		.offset = 0,
>>> +		.size = 128,
>>> +	},
>>> +	{
>>> +		.area_usage = DRM_PVR_STATIC_DATA_AREA_EOT,
>>> +		.location_heap_id = DRM_PVR_HEAP_PDS_CODE_DATA,
>>> +		.offset = 128,
>>> +		.size = 128,
>>> +	},
>>> +	{
>>> +		.area_usage = DRM_PVR_STATIC_DATA_AREA_VDM_SYNC,
>>> +		.location_heap_id = DRM_PVR_HEAP_USC_CODE,
>>> +		.offset = 0,
>>> +		.size = 128,
>>> +	},
>>> +};
>>> +
>>> +#define GET_RESERVED_SIZE(last_offset, last_size) round_up((last_offset) + (last_size), PAGE_SIZE)
>>> +
>>> +/*
>>> + * The values given to GET_RESERVED_SIZE() are taken from the last entry in the corresponding
>>> + * static data area for each heap.
>>> + */
>>> +static const struct drm_pvr_heap pvr_heaps[] = {
>>> +	[DRM_PVR_HEAP_GENERAL] = {
>>> +		.base = ROGUE_GENERAL_HEAP_BASE,
>>> +		.size = ROGUE_GENERAL_HEAP_SIZE,
>>> +		.flags = 0,
>>> +		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
>>> +	},
>>> +	[DRM_PVR_HEAP_PDS_CODE_DATA] = {
>>> +		.base = ROGUE_PDSCODEDATA_HEAP_BASE,
>>> +		.size = ROGUE_PDSCODEDATA_HEAP_SIZE,
>>> +		.flags = 0,
>>> +		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
>>> +	},
>>> +	[DRM_PVR_HEAP_USC_CODE] = {
>>> +		.base = ROGUE_USCCODE_HEAP_BASE,
>>> +		.size = ROGUE_USCCODE_HEAP_SIZE,
>>> +		.flags = 0,
>>> +		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
>>> +	},
>>> +	[DRM_PVR_HEAP_RGNHDR] = {
>>> +		.base = ROGUE_RGNHDR_HEAP_BASE,
>>> +		.size = ROGUE_RGNHDR_HEAP_SIZE,
>>> +		.flags = 0,
>>> +		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
>>> +	},
>>> +	[DRM_PVR_HEAP_VIS_TEST] = {
>>> +		.base = ROGUE_VISTEST_HEAP_BASE,
>>> +		.size = ROGUE_VISTEST_HEAP_SIZE,
>>> +		.flags = 0,
>>> +		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
>>> +	},
>>> +	[DRM_PVR_HEAP_TRANSFER_FRAG] = {
>>> +		.base = ROGUE_TRANSFER_FRAG_HEAP_BASE,
>>> +		.size = ROGUE_TRANSFER_FRAG_HEAP_SIZE,
>>> +		.flags = 0,
>>> +		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
>>> +	},
>>> +};
>>> +
>>> +int
>>> +pvr_static_data_areas_get(const struct pvr_device *pvr_dev,
>>> +			  struct drm_pvr_ioctl_dev_query_args *args)
>>> +{
>>> +	struct drm_pvr_dev_query_static_data_areas query = {0};
>>> +	int err;
>>> +
>>> +	if (!args->pointer) {
>>> +		args->size = sizeof(struct drm_pvr_dev_query_static_data_areas);
>>> +		return 0;
>>> +	}
>>> +
>>> +	err = PVR_UOBJ_GET(query, args->size, args->pointer);
>>> +	if (err < 0)
>>> +		return err;
>>> +
>>> +	if (!query.static_data_areas.array) {
>>> +		query.static_data_areas.count = ARRAY_SIZE(static_data_areas);
>>> +		query.static_data_areas.stride = sizeof(struct drm_pvr_static_data_area);
>>> +		goto copy_out;
>>> +	}
>>> +
>>> +	if (query.static_data_areas.count > ARRAY_SIZE(static_data_areas))
>>> +		query.static_data_areas.count = ARRAY_SIZE(static_data_areas);
>>> +
>>> +	err = PVR_UOBJ_SET_ARRAY(&query.static_data_areas, static_data_areas);
>>> +	if (err < 0)
>>> +		return err;
>>> +
>>> +copy_out:
>>> +	err = PVR_UOBJ_SET(args->pointer, args->size, query);
>>> +	if (err < 0)
>>> +		return err;
>>> +
>>> +	args->size = sizeof(query);
>>> +	return 0;
>>> +}
>>> +
>>> +int
>>> +pvr_heap_info_get(const struct pvr_device *pvr_dev,
>>> +		  struct drm_pvr_ioctl_dev_query_args *args)
>>> +{
>>> +	struct drm_pvr_dev_query_heap_info query = {0};
>>> +	u64 dest;
>>> +	int err;
>>> +
>>> +	if (!args->pointer) {
>>> +		args->size = sizeof(struct drm_pvr_dev_query_heap_info);
>>> +		return 0;
>>> +	}
>>> +
>>> +	err = PVR_UOBJ_GET(query, args->size, args->pointer);
>>> +	if (err < 0)
>>> +		return err;
>>> +
>>> +	if (!query.heaps.array) {
>>> +		query.heaps.count = ARRAY_SIZE(pvr_heaps);
>>> +		query.heaps.stride = sizeof(struct drm_pvr_heap);
>>> +		goto copy_out;
>>> +	}
>>> +
>>> +	if (query.heaps.count > ARRAY_SIZE(pvr_heaps))
>>> +		query.heaps.count = ARRAY_SIZE(pvr_heaps);
>>> +
>>> +	/* Region header heap is only present if BRN63142 is present. */
>>> +	dest = query.heaps.array;
>>> +	for (size_t i = 0; i < query.heaps.count; i++) {
>>> +		struct drm_pvr_heap heap = pvr_heaps[i];
>>> +
>>> +		if (i == DRM_PVR_HEAP_RGNHDR && !PVR_HAS_QUIRK(pvr_dev, 63142))
>>> +			heap.size = 0;
>>> +
>>> +		err = PVR_UOBJ_SET(dest, query.heaps.stride, heap);
>>> +		if (err < 0)
>>> +			return err;
>>> +
>>> +		dest += query.heaps.stride;
>>> +	}
>>> +
>>> +copy_out:
>>> +	err = PVR_UOBJ_SET(args->pointer, args->size, query);
>>> +	if (err < 0)
>>> +		return err;
>>> +
>>> +	args->size = sizeof(query);
>>> +	return 0;
>>> +}
>>> +
>>> +/**
>>> + * pvr_heap_contains_range() - Determine if a given heap contains the specified
>>> + *                             device-virtual address range.
>>> + * @pvr_heap: Target heap.
>>> + * @start: Inclusive start of the target range.
>>> + * @end: Inclusive end of the target range.
>>> + *
>>> + * It is an error to call this function with values of @start and @end that do
>>> + * not satisfy the condition @start <= @end.
>>> + */
>>> +static __always_inline bool
>>> +pvr_heap_contains_range(const struct drm_pvr_heap *pvr_heap, u64 start, u64 end)
>>> +{
>>> +	return pvr_heap->base <= start && end < pvr_heap->base + pvr_heap->size;
>>> +}
>>> +
>>> +/**
>>> + * pvr_find_heap_containing() - Find a heap which contains the specified
>>> + *                              device-virtual address range.
>>> + * @pvr_dev: Target PowerVR device.
>>> + * @start: Start of the target range.
>>> + * @size: Size of the target range.
>>> + *
>>> + * Return:
>>> + *  * A pointer to a constant instance of struct drm_pvr_heap representing the
>>> + *    heap containing the entire range specified by @start and @size on
>>> + *    success, or
>>> + *  * %NULL if no such heap exists.
>>> + */
>>> +const struct drm_pvr_heap *
>>> +pvr_find_heap_containing(struct pvr_device *pvr_dev, u64 start, u64 size)
>>> +{
>>> +	u64 end;
>>> +
>>> +	if (check_add_overflow(start, size - 1, &end))
>>> +		return NULL;
>>> +
>>> +	/*
>>> +	 * There are no guarantees about the order of address ranges in
>>> +	 * &pvr_heaps, so iterate over the entire array for a heap whose
>>> +	 * range completely encompasses the given range.
>>> +	 */
>>> +	for (u32 heap_id = 0; heap_id < ARRAY_SIZE(pvr_heaps); heap_id++) {
>>> +		/* Filter heaps that present only with an associated quirk */
>>> +		if (heap_id == DRM_PVR_HEAP_RGNHDR &&
>>> +		    !PVR_HAS_QUIRK(pvr_dev, 63142)) {
>>> +			continue;
>>> +		}
>>> +
>>> +		if (pvr_heap_contains_range(&pvr_heaps[heap_id], start, end))
>>> +			return &pvr_heaps[heap_id];
>>> +	}
>>> +
>>> +	return NULL;
>>> +}
>>> +
>>> +/**
>>> + * pvr_vm_find_gem_object() - Look up a buffer object from a given
>>> + *                            device-virtual address.
>>> + * @vm_ctx: [IN] Target VM context.
>>> + * @device_addr: [IN] Virtual device address at the start of the required
>>> + *               object.
>>> + * @mapped_offset_out: [OUT] Pointer to location to write offset of the start
>>> + *                     of the mapped region within the buffer object. May be
>>> + *                     %NULL if this information is not required.
>>> + * @mapped_size_out: [OUT] Pointer to location to write size of the mapped
>>> + *                   region. May be %NULL if this information is not required.
>>> + *
>>> + * If successful, a reference will be taken on the buffer object. The caller
>>> + * must drop the reference with pvr_gem_object_put().
>>> + *
>>> + * Return:
>>> + *  * The PowerVR buffer object mapped at @device_addr if one exists, or
>>> + *  * %NULL otherwise.
>>> + */
>>> +struct pvr_gem_object *
>>> +pvr_vm_find_gem_object(struct pvr_vm_context *vm_ctx, u64 device_addr,
>>> +		       u64 *mapped_offset_out, u64 *mapped_size_out)
>>> +{
>>> +	struct pvr_gem_object *pvr_obj;
>>> +	struct drm_gpuva *va;
>>> +
>>> +	mutex_lock(&vm_ctx->lock);
>>> +
>>> +	va = drm_gpuva_find_first(&vm_ctx->gpuva_mgr, device_addr, 1);
>>> +	if (!va)
>>> +		goto err_unlock;
>>> +
>>> +	pvr_obj = gem_to_pvr_gem(va->gem.obj);
>>> +	pvr_gem_object_get(pvr_obj);
>>> +
>>> +	if (mapped_offset_out)
>>> +		*mapped_offset_out = va->gem.offset;
>>> +	if (mapped_size_out)
>>> +		*mapped_size_out = va->va.range;
>>> +
>>> +	mutex_unlock(&vm_ctx->lock);
>>> +
>>> +	return pvr_obj;
>>> +
>>> +err_unlock:
>>> +	mutex_unlock(&vm_ctx->lock);
>>> +
>>> +	return NULL;
>>> +}
>>> +
>>> +/**
>>> + * pvr_vm_get_fw_mem_context: Get object representing firmware memory context
>>> + * @vm_ctx: Target VM context.
>>> + *
>>> + * Returns:
>>> + *  * FW object representing firmware memory context, or
>>> + *  * %NULL if this VM context does not have a firmware memory context.
>>> + */
>>> +struct pvr_fw_object *
>>> +pvr_vm_get_fw_mem_context(struct pvr_vm_context *vm_ctx)
>>> +{
>>> +	return vm_ctx->fw_mem_ctx_obj;
>>> +}
>>> diff --git a/drivers/gpu/drm/imagination/pvr_vm.h b/drivers/gpu/drm/imagination/pvr_vm.h
>>> new file mode 100644
>>> index 000000000000..b98bc3981807
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/imagination/pvr_vm.h
>>> @@ -0,0 +1,60 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
>>> +/* Copyright (c) 2023 Imagination Technologies Ltd. */
>>> +
>>> +#ifndef PVR_VM_H
>>> +#define PVR_VM_H
>>> +
>>> +#include "pvr_rogue_mmu_defs.h"
>>> +
>>> +#include <uapi/drm/pvr_drm.h>
>>> +
>>> +#include <linux/types.h>
>>> +
>>> +/* Forward declaration from "pvr_device.h" */
>>> +struct pvr_device;
>>> +struct pvr_file;
>>> +
>>> +/* Forward declaration from "pvr_gem.h" */
>>> +struct pvr_gem_object;
>>> +
>>> +/* Forward declaration from "pvr_vm.c" */
>>> +struct pvr_vm_context;
>>> +
>>> +/* Forward declaration from <uapi/drm/pvr_drm.h> */
>>> +struct drm_pvr_ioctl_get_heap_info_args;
>>> +
>>> +/* Functions defined in pvr_vm.c */
>>> +
>>> +bool pvr_device_addr_is_valid(u64 device_addr);
>>> +bool pvr_device_addr_and_size_are_valid(u64 device_addr, u64 size);
>>> +
>>> +struct pvr_vm_context *pvr_vm_create_context(struct pvr_device *pvr_dev,
>>> +					     bool is_userspace_context);
>>> +
>>> +int pvr_vm_map(struct pvr_vm_context *vm_ctx,
>>> +	       struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset,
>>> +	       u64 device_addr, u64 size);
>>> +int pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size);
>>> +
>>> +dma_addr_t pvr_vm_get_page_table_root_addr(struct pvr_vm_context *vm_ctx);
>>> +
>>> +int pvr_static_data_areas_get(const struct pvr_device *pvr_dev,
>>> +			      struct drm_pvr_ioctl_dev_query_args *args);
>>> +int pvr_heap_info_get(const struct pvr_device *pvr_dev,
>>> +		      struct drm_pvr_ioctl_dev_query_args *args);
>>> +const struct drm_pvr_heap *pvr_find_heap_containing(struct pvr_device *pvr_dev,
>>> +						    u64 addr, u64 size);
>>> +
>>> +struct pvr_gem_object *pvr_vm_find_gem_object(struct pvr_vm_context *vm_ctx,
>>> +					      u64 device_addr,
>>> +					      u64 *mapped_offset_out,
>>> +					      u64 *mapped_size_out);
>>> +
>>> +struct pvr_fw_object *
>>> +pvr_vm_get_fw_mem_context(struct pvr_vm_context *vm_ctx);
>>> +
>>> +struct pvr_vm_context *pvr_vm_context_lookup(struct pvr_file *pvr_file, u32 handle);
>>> +bool pvr_vm_context_put(struct pvr_vm_context *vm_ctx);
>>> +void pvr_destroy_vm_contexts_for_file(struct pvr_file *pvr_file);
>>> +
>>> +#endif /* PVR_VM_H */
>>> -- 
>>> 2.41.0
>>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/imagination/Makefile b/drivers/gpu/drm/imagination/Makefile
index 9e144ff2742b..8fcabc1bea36 100644
--- a/drivers/gpu/drm/imagination/Makefile
+++ b/drivers/gpu/drm/imagination/Makefile
@@ -7,6 +7,9 @@  powervr-y := \
 	pvr_device.o \
 	pvr_device_info.o \
 	pvr_drv.o \
-	pvr_fw.o
+	pvr_fw.o \
+	pvr_gem.o \
+	pvr_mmu.o \
+	pvr_vm.o
 
 obj-$(CONFIG_DRM_POWERVR) += powervr.o
diff --git a/drivers/gpu/drm/imagination/pvr_device.c b/drivers/gpu/drm/imagination/pvr_device.c
index b1fae182c4f6..ef8f7a2ff1a9 100644
--- a/drivers/gpu/drm/imagination/pvr_device.c
+++ b/drivers/gpu/drm/imagination/pvr_device.c
@@ -6,6 +6,7 @@ 
 
 #include "pvr_fw.h"
 #include "pvr_rogue_cr_defs.h"
+#include "pvr_vm.h"
 
 #include <drm/drm_print.h>
 
@@ -312,7 +313,26 @@  pvr_device_gpu_init(struct pvr_device *pvr_dev)
 	else
 		return -EINVAL;
 
-	return pvr_set_dma_info(pvr_dev);
+	err = pvr_set_dma_info(pvr_dev);
+	if (err)
+		return err;
+
+	pvr_dev->kernel_vm_ctx = pvr_vm_create_context(pvr_dev, false);
+	if (IS_ERR(pvr_dev->kernel_vm_ctx))
+		return PTR_ERR(pvr_dev->kernel_vm_ctx);
+
+	return 0;
+}
+
+/**
+ * pvr_device_gpu_fini() - GPU-specific deinitialization for a PowerVR device
+ * @pvr_dev: Target PowerVR device.
+ */
+static void
+pvr_device_gpu_fini(struct pvr_device *pvr_dev)
+{
+	WARN_ON(!pvr_vm_context_put(pvr_dev->kernel_vm_ctx));
+	pvr_dev->kernel_vm_ctx = NULL;
 }
 
 /**
@@ -364,6 +384,7 @@  pvr_device_fini(struct pvr_device *pvr_dev)
 	 * Deinitialization stages are performed in reverse order compared to
 	 * the initialization stages in pvr_device_init().
 	 */
+	pvr_device_gpu_fini(pvr_dev);
 }
 
 bool
diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h
index 65ece87e2405..990363e433d7 100644
--- a/drivers/gpu/drm/imagination/pvr_device.h
+++ b/drivers/gpu/drm/imagination/pvr_device.h
@@ -123,6 +123,16 @@  struct pvr_device {
 	 */
 	struct clk *mem_clk;
 
+	/**
+	 * @kernel_vm_ctx: Virtual memory context used for kernel mappings.
+	 *
+	 * This is used for mappings in the firmware address region when a META firmware processor
+	 * is in use.
+	 *
+	 * When a MIPS firmware processor is in use, this will be %NULL.
+	 */
+	struct pvr_vm_context *kernel_vm_ctx;
+
 	/** @fw_dev: Firmware related data. */
 	struct pvr_fw_device fw_dev;
 };
@@ -145,6 +155,14 @@  struct pvr_file {
 	 *           to_pvr_device().
 	 */
 	struct pvr_device *pvr_dev;
+
+	/**
+	 * @vm_ctx_handles: Array of VM contexts belonging to this file. Array
+	 * members are of type "struct pvr_vm_context *".
+	 *
+	 * This array is used to allocate handles returned to userspace.
+	 */
+	struct xarray vm_ctx_handles;
 };
 
 /**
diff --git a/drivers/gpu/drm/imagination/pvr_drv.c b/drivers/gpu/drm/imagination/pvr_drv.c
index eb3018f94f7c..0d51177b506c 100644
--- a/drivers/gpu/drm/imagination/pvr_drv.c
+++ b/drivers/gpu/drm/imagination/pvr_drv.c
@@ -3,9 +3,11 @@ 
 
 #include "pvr_device.h"
 #include "pvr_drv.h"
+#include "pvr_gem.h"
 #include "pvr_rogue_defs.h"
 #include "pvr_rogue_fwif_client.h"
 #include "pvr_rogue_fwif_shared.h"
+#include "pvr_vm.h"
 
 #include <uapi/drm/pvr_drm.h>
 
@@ -61,7 +63,86 @@  static int
 pvr_ioctl_create_bo(struct drm_device *drm_dev, void *raw_args,
 		    struct drm_file *file)
 {
-	return -ENOTTY;
+	struct drm_pvr_ioctl_create_bo_args *args = raw_args;
+	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
+	struct pvr_file *pvr_file = to_pvr_file(file);
+
+	struct pvr_gem_object *pvr_obj;
+	size_t sanitized_size;
+	size_t real_size;
+
+	int idx;
+	int err;
+
+	if (!drm_dev_enter(drm_dev, &idx))
+		return -EIO;
+
+	/* All padding fields must be zeroed. */
+	if (args->_padding_c != 0) {
+		err = -EINVAL;
+		goto err_drm_dev_exit;
+	}
+
+	/*
+	 * On 64-bit platforms (our primary target), size_t is a u64. However,
+	 * on other architectures we have to check for overflow when casting
+	 * down to size_t from u64.
+	 *
+	 * We also disallow zero-sized allocations, and reserved (kernel-only)
+	 * flags.
+	 */
+	if (args->size > SIZE_MAX || args->size == 0 ||
+	    args->flags & PVR_BO_RESERVED_MASK) {
+		err = -EINVAL;
+		goto err_drm_dev_exit;
+	}
+
+	sanitized_size = (size_t)args->size;
+
+	/*
+	 * Create a buffer object and transfer ownership to a userspace-
+	 * accessible handle.
+	 */
+	pvr_obj = pvr_gem_object_create(pvr_dev, sanitized_size, args->flags);
+	if (IS_ERR(pvr_obj)) {
+		err = PTR_ERR(pvr_obj);
+		goto err_drm_dev_exit;
+	}
+
+	/*
+	 * Store the actual size of the created buffer object. We can't fetch
+	 * this after this point because we will no longer have a reference to
+	 * &pvr_obj.
+	 */
+	real_size = pvr_gem_object_size(pvr_obj);
+
+	/* This function will not modify &args->handle unless it succeeds. */
+	err = pvr_gem_object_into_handle(pvr_obj, pvr_file, &args->handle);
+	if (err)
+		goto err_destroy_obj;
+
+	/*
+	 * Now write the real size back to the args struct, after no further
+	 * errors can occur.
+	 */
+	args->size = real_size;
+
+	drm_dev_exit(idx);
+
+	return 0;
+
+err_destroy_obj:
+	/*
+	 * GEM objects are refcounted, so there is no explicit destructor
+	 * function. Instead, we release the singular reference we currently
+	 * hold on the object and let GEM take care of the rest.
+	 */
+	pvr_gem_object_put(pvr_obj);
+
+err_drm_dev_exit:
+	drm_dev_exit(idx);
+
+	return err;
 }
 
 /**
@@ -88,7 +169,61 @@  static int
 pvr_ioctl_get_bo_mmap_offset(struct drm_device *drm_dev, void *raw_args,
 			     struct drm_file *file)
 {
-	return -ENOTTY;
+	struct drm_pvr_ioctl_get_bo_mmap_offset_args *args = raw_args;
+	struct pvr_file *pvr_file = to_pvr_file(file);
+	struct pvr_gem_object *pvr_obj;
+	struct drm_gem_object *gem_obj;
+	int idx;
+	int ret;
+
+	if (!drm_dev_enter(drm_dev, &idx))
+		return -EIO;
+
+	/* All padding fields must be zeroed. */
+	if (args->_padding_4 != 0) {
+		ret = -EINVAL;
+		goto err_drm_dev_exit;
+	}
+
+	/*
+	 * Obtain a kernel reference to the buffer object. This reference is
+	 * counted and must be manually dropped before returning. If a buffer
+	 * object cannot be found for the specified handle, return -%ENOENT (No
+	 * such file or directory).
+	 */
+	pvr_obj = pvr_gem_object_from_handle(pvr_file, args->handle);
+	if (!pvr_obj) {
+		ret = -ENOENT;
+		goto err_drm_dev_exit;
+	}
+
+	gem_obj = gem_from_pvr_gem(pvr_obj);
+
+	/*
+	 * Allocate a fake offset which can be used in userspace calls to mmap
+	 * on the DRM device file. If this fails, return the error code. This
+	 * operation is idempotent.
+	 */
+	ret = drm_gem_create_mmap_offset(gem_obj);
+	if (ret != 0) {
+		/* Drop our reference to the buffer object. */
+		drm_gem_object_put(gem_obj);
+		goto err_drm_dev_exit;
+	}
+
+	/*
+	 * Read out the fake offset allocated by the earlier call to
+	 * drm_gem_create_mmap_offset.
+	 */
+	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
+
+	/* Drop our reference to the buffer object. */
+	pvr_gem_object_put(pvr_obj);
+
+err_drm_dev_exit:
+	drm_dev_exit(idx);
+
+	return ret;
 }
 
 static __always_inline u64
@@ -517,10 +652,12 @@  pvr_ioctl_dev_query(struct drm_device *drm_dev, void *raw_args,
 		break;
 
 	case DRM_PVR_DEV_QUERY_HEAP_INFO_GET:
-		return -EINVAL;
+		ret = pvr_heap_info_get(pvr_dev, args);
+		break;
 
 	case DRM_PVR_DEV_QUERY_STATIC_DATA_AREAS_GET:
-		return -EINVAL;
+		ret = pvr_static_data_areas_get(pvr_dev, args);
+		break;
 	}
 
 	drm_dev_exit(idx);
@@ -667,7 +804,46 @@  static int
 pvr_ioctl_create_vm_context(struct drm_device *drm_dev, void *raw_args,
 			    struct drm_file *file)
 {
-	return -ENOTTY;
+	struct drm_pvr_ioctl_create_vm_context_args *args = raw_args;
+	struct pvr_file *pvr_file = to_pvr_file(file);
+	struct pvr_vm_context *vm_ctx;
+	int idx;
+	int err;
+
+	if (!drm_dev_enter(drm_dev, &idx))
+		return -EIO;
+
+	if (args->_padding_4) {
+		err = -EINVAL;
+		goto err_drm_dev_exit;
+	}
+
+	vm_ctx = pvr_vm_create_context(pvr_file->pvr_dev, true);
+	if (IS_ERR(vm_ctx)) {
+		err = PTR_ERR(vm_ctx);
+		goto err_drm_dev_exit;
+	}
+
+	/* Allocate object handle for userspace. */
+	err = xa_alloc(&pvr_file->vm_ctx_handles,
+		       &args->handle,
+		       vm_ctx,
+		       xa_limit_32b,
+		       GFP_KERNEL);
+	if (err < 0)
+		goto err_cleanup;
+
+	drm_dev_exit(idx);
+
+	return 0;
+
+err_cleanup:
+	pvr_vm_context_put(vm_ctx);
+
+err_drm_dev_exit:
+	drm_dev_exit(idx);
+
+	return err;
 }
 
 /**
@@ -687,7 +863,19 @@  static int
 pvr_ioctl_destroy_vm_context(struct drm_device *drm_dev, void *raw_args,
 			     struct drm_file *file)
 {
-	return -ENOTTY;
+	struct drm_pvr_ioctl_destroy_vm_context_args *args = raw_args;
+	struct pvr_file *pvr_file = to_pvr_file(file);
+	struct pvr_vm_context *vm_ctx;
+
+	if (args->_padding_4)
+		return -EINVAL;
+
+	vm_ctx = xa_erase(&pvr_file->vm_ctx_handles, args->handle);
+	if (!vm_ctx)
+		return -EINVAL;
+
+	pvr_vm_context_put(vm_ctx);
+	return 0;
 }
 
 /**
@@ -717,7 +905,79 @@  static int
 pvr_ioctl_vm_map(struct drm_device *drm_dev, void *raw_args,
 		 struct drm_file *file)
 {
-	return -ENOTTY;
+	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
+	struct drm_pvr_ioctl_vm_map_args *args = raw_args;
+	struct pvr_file *pvr_file = to_pvr_file(file);
+	struct pvr_vm_context *vm_ctx;
+
+	struct pvr_gem_object *pvr_obj;
+	size_t pvr_obj_size;
+
+	u64 offset_plus_size;
+	int idx;
+	int err;
+
+	if (!drm_dev_enter(drm_dev, &idx))
+		return -EIO;
+
+	/* Initial validation of args. */
+	if (args->_padding_14) {
+		err = -EINVAL;
+		goto err_drm_dev_exit;
+	}
+
+	if (args->flags != 0 ||
+	    check_add_overflow(args->offset, args->size, &offset_plus_size) ||
+	    !pvr_find_heap_containing(pvr_dev, args->device_addr, args->size)) {
+		err = -EINVAL;
+		goto err_drm_dev_exit;
+	}
+
+	vm_ctx = pvr_vm_context_lookup(pvr_file, args->vm_context_handle);
+	if (!vm_ctx) {
+		err = -EINVAL;
+		goto err_drm_dev_exit;
+	}
+
+	pvr_obj = pvr_gem_object_from_handle(pvr_file, args->handle);
+	if (!pvr_obj) {
+		err = -ENOENT;
+		goto err_put_vm_context;
+	}
+
+	pvr_obj_size = pvr_gem_object_size(pvr_obj);
+
+	/*
+	 * Validate offset and size args. The alignment of these will be
+	 * checked when mapping; for now just check that they're within valid
+	 * bounds
+	 */
+	if (args->offset >= pvr_obj_size || offset_plus_size > pvr_obj_size) {
+		err = -EINVAL;
+		goto err_put_pvr_object;
+	}
+
+	err = pvr_vm_map(vm_ctx, pvr_obj, args->offset,
+			 args->device_addr, args->size);
+	if (err)
+		goto err_put_pvr_object;
+
+	/*
+	 * In order to set up the mapping, we needed a reference to &pvr_obj.
+	 * However, pvr_vm_map() obtains and stores its own reference, so we
+	 * must release ours before returning.
+	 */
+
+err_put_pvr_object:
+	pvr_gem_object_put(pvr_obj);
+
+err_put_vm_context:
+	pvr_vm_context_put(vm_ctx);
+
+err_drm_dev_exit:
+	drm_dev_exit(idx);
+
+	return err;
 }
 
 /**
@@ -740,7 +1000,24 @@  static int
 pvr_ioctl_vm_unmap(struct drm_device *drm_dev, void *raw_args,
 		   struct drm_file *file)
 {
-	return -ENOTTY;
+	struct drm_pvr_ioctl_vm_unmap_args *args = raw_args;
+	struct pvr_file *pvr_file = to_pvr_file(file);
+	struct pvr_vm_context *vm_ctx;
+	int err;
+
+	/* Initial validation of args. */
+	if (args->_padding_4)
+		return -EINVAL;
+
+	vm_ctx = pvr_vm_context_lookup(pvr_file, args->vm_context_handle);
+	if (!vm_ctx)
+		return -EINVAL;
+
+	err = pvr_vm_unmap(vm_ctx, args->device_addr, args->size);
+
+	pvr_vm_context_put(vm_ctx);
+
+	return err;
 }
 
 /*
@@ -931,6 +1208,8 @@  pvr_drm_driver_open(struct drm_device *drm_dev, struct drm_file *file)
 	 */
 	pvr_file->pvr_dev = pvr_dev;
 
+	xa_init_flags(&pvr_file->vm_ctx_handles, XA_FLAGS_ALLOC1);
+
 	/*
 	 * Store reference to powervr-specific file private data in DRM file
 	 * private data.
@@ -956,6 +1235,9 @@  pvr_drm_driver_postclose(__always_unused struct drm_device *drm_dev,
 {
 	struct pvr_file *pvr_file = to_pvr_file(file);
 
+	/* Drop references on any remaining objects. */
+	pvr_destroy_vm_contexts_for_file(pvr_file);
+
 	kfree(pvr_file);
 	file->driver_priv = NULL;
 }
@@ -963,7 +1245,7 @@  pvr_drm_driver_postclose(__always_unused struct drm_device *drm_dev,
 DEFINE_DRM_GEM_FOPS(pvr_drm_driver_fops);
 
 static struct drm_driver pvr_drm_driver = {
-	.driver_features = DRIVER_RENDER,
+	.driver_features = DRIVER_GEM | DRIVER_GEM_GPUVA | DRIVER_RENDER,
 	.open = pvr_drm_driver_open,
 	.postclose = pvr_drm_driver_postclose,
 	.ioctls = pvr_drm_driver_ioctls,
@@ -977,6 +1259,8 @@  static struct drm_driver pvr_drm_driver = {
 	.minor = PVR_DRIVER_MINOR,
 	.patchlevel = PVR_DRIVER_PATCHLEVEL,
 
+	.gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,
+	.gem_create_object = pvr_gem_create_object,
 };
 
 static int
diff --git a/drivers/gpu/drm/imagination/pvr_gem.c b/drivers/gpu/drm/imagination/pvr_gem.c
new file mode 100644
index 000000000000..8a07bb4c38ac
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_gem.c
@@ -0,0 +1,396 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_device.h"
+#include "pvr_gem.h"
+#include "pvr_vm.h"
+
+#include <drm/drm_gem.h>
+#include <drm/drm_prime.h>
+
+#include <linux/compiler.h>
+#include <linux/compiler_attributes.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/iosys-map.h>
+#include <linux/log2.h>
+#include <linux/mutex.h>
+#include <linux/pagemap.h>
+#include <linux/refcount.h>
+#include <linux/scatterlist.h>
+
+static int pvr_gem_mmap(struct drm_gem_object *gem_obj, struct vm_area_struct *vma)
+{
+	struct pvr_gem_object *pvr_obj = gem_to_pvr_gem(gem_obj);
+	struct drm_gem_shmem_object *shmem_obj = shmem_gem_from_pvr_gem(pvr_obj);
+
+	if (!(pvr_obj->flags & DRM_PVR_BO_CPU_ALLOW_USERSPACE_ACCESS))
+		return -EINVAL;
+
+	return drm_gem_shmem_mmap(shmem_obj, vma);
+}
+
+static const struct drm_gem_object_funcs pvr_gem_object_funcs = {
+	.free = drm_gem_shmem_object_free,
+	.print_info = drm_gem_shmem_object_print_info,
+	.pin = drm_gem_shmem_object_pin,
+	.unpin = drm_gem_shmem_object_unpin,
+	.get_sg_table = drm_gem_shmem_object_get_sg_table,
+	.vmap = drm_gem_shmem_object_vmap,
+	.vunmap = drm_gem_shmem_object_vunmap,
+	.mmap = pvr_gem_mmap,
+	.vm_ops = &drm_gem_shmem_vm_ops,
+};
+
+/**
+ * pvr_gem_object_flags_validate() - Verify that a collection of PowerVR GEM
+ * mapping and/or creation flags form a valid combination.
+ * @flags: PowerVR GEM mapping/creation flags to validate.
+ *
+ * This function explicitly allows kernel-only flags. All ioctl entrypoints
+ * should do their own validation as well as relying on this function.
+ *
+ * Return:
+ *  * %true if @flags contains valid mapping and/or creation flags, or
+ *  * %false otherwise.
+ */
+static bool
+pvr_gem_object_flags_validate(u64 flags)
+{
+	static const u64 invalid_combinations[] = {
+		/*
+		 * Memory flagged as PM/FW-protected cannot be mapped to
+		 * userspace. To make this explicit, we require that the two
+		 * flags allowing each of these respective features are never
+		 * specified together.
+		 */
+		(DRM_PVR_BO_DEVICE_PM_FW_PROTECT |
+		 DRM_PVR_BO_CPU_ALLOW_USERSPACE_ACCESS),
+	};
+
+	int i;
+
+	/*
+	 * Check for bits set in undefined regions. Reserved regions refer to
+	 * options that can only be set by the kernel. These are explicitly
+	 * allowed in most cases, and must be checked specifically in IOCTL
+	 * callback code.
+	 */
+	if ((flags & PVR_BO_UNDEFINED_MASK) != 0)
+		return false;
+
+	/*
+	 * Check for all combinations of flags marked as invalid in the array
+	 * above.
+	 */
+	for (i = 0; i < ARRAY_SIZE(invalid_combinations); ++i) {
+		u64 combo = invalid_combinations[i];
+
+		if ((flags & combo) == combo)
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * pvr_gem_object_into_handle() - Convert a reference to an object into a
+ * userspace-accessible handle.
+ * @pvr_obj: [IN] Target PowerVR-specific object.
+ * @pvr_file: [IN] File to associate the handle with.
+ * @handle: [OUT] Pointer to store the created handle in. Remains unmodified if
+ * an error is encountered.
+ *
+ * If an error is encountered, ownership of @pvr_obj will not have been
+ * transferred. If this function succeeds, however, further use of @pvr_obj is
+ * considered undefined behaviour unless another reference to it is explicitly
+ * held.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error encountered while attempting to allocate a handle on @pvr_file.
+ */
+int
+pvr_gem_object_into_handle(struct pvr_gem_object *pvr_obj,
+			   struct pvr_file *pvr_file, u32 *handle)
+{
+	struct drm_gem_object *gem_obj = gem_from_pvr_gem(pvr_obj);
+	struct drm_file *file = from_pvr_file(pvr_file);
+
+	u32 new_handle;
+	int err;
+
+	err = drm_gem_handle_create(file, gem_obj, &new_handle);
+	if (err)
+		return err;
+
+	/*
+	 * Release our reference to @pvr_obj, effectively transferring
+	 * ownership to the handle.
+	 */
+	pvr_gem_object_put(pvr_obj);
+
+	/*
+	 * Do not store the new handle in @handle until no more errors can
+	 * occur.
+	 */
+	*handle = new_handle;
+
+	return 0;
+}
+
+/**
+ * pvr_gem_object_from_handle() - Obtain a reference to an object from a
+ * userspace handle.
+ * @pvr_file: PowerVR-specific file to which @handle is associated.
+ * @handle: Userspace handle referencing the target object.
+ *
+ * On return, @handle always maintains its reference to the requested object
+ * (if it had one in the first place). If this function succeeds, the returned
+ * object will hold an additional reference. When the caller is finished with
+ * the returned object, they should call pvr_gem_object_put() on it to release
+ * this reference.
+ *
+ * Return:
+ *  * A pointer to the requested PowerVR-specific object on success, or
+ *  * %NULL otherwise.
+ */
+struct pvr_gem_object *
+pvr_gem_object_from_handle(struct pvr_file *pvr_file, u32 handle)
+{
+	struct drm_file *file = from_pvr_file(pvr_file);
+	struct drm_gem_object *gem_obj;
+
+	gem_obj = drm_gem_object_lookup(file, handle);
+	if (!gem_obj)
+		return NULL;
+
+	return gem_to_pvr_gem(gem_obj);
+}
+
+/**
+ * pvr_gem_object_vmap() - Map a PowerVR GEM object into CPU virtual address
+ * space.
+ * @pvr_obj: Target PowerVR GEM object.
+ *
+ * Once the caller is finished with the CPU mapping, they must call
+ * pvr_gem_object_vunmap() on @pvr_obj.
+ *
+ * If @pvr_obj is CPU-cached, dma_sync_sgtable_for_cpu() is called to make
+ * sure the CPU mapping is consistent.
+ *
+ * Return:
+ *  * A pointer to the CPU mapping on success,
+ *  * -%ENOMEM if the mapping fails, or
+ *  * Any error encountered while attempting to acquire a reference to the
+ *    backing pages for @pvr_obj.
+ */
+void *
+pvr_gem_object_vmap(struct pvr_gem_object *pvr_obj)
+{
+	struct drm_gem_shmem_object *shmem_obj = shmem_gem_from_pvr_gem(pvr_obj);
+	struct drm_gem_object *obj = gem_from_pvr_gem(pvr_obj);
+	struct iosys_map map;
+	int err;
+
+	dma_resv_lock(obj->resv, NULL);
+
+	err = drm_gem_shmem_vmap(shmem_obj, &map);
+	if (err)
+		goto err_unlock;
+
+	if (pvr_obj->flags & PVR_BO_CPU_CACHED) {
+		struct device *dev = shmem_obj->base.dev->dev;
+
+		/* If shmem_obj->sgt is NULL, that means the buffer hasn't been mapped
+		 * in GPU space yet.
+		 */
+		if (shmem_obj->sgt)
+			dma_sync_sgtable_for_cpu(dev, shmem_obj->sgt, DMA_BIDIRECTIONAL);
+	}
+
+	dma_resv_unlock(obj->resv);
+
+	return map.vaddr;
+
+err_unlock:
+	dma_resv_unlock(obj->resv);
+
+	return ERR_PTR(err);
+}
+
+/**
+ * pvr_gem_object_vunmap() - Unmap a PowerVR memory object from CPU virtual
+ * address space.
+ * @pvr_obj: Target PowerVR GEM object.
+ *
+ * If @pvr_obj is CPU-cached, dma_sync_sgtable_for_device() is called to make
+ * sure the GPU mapping is consistent.
+ */
+void
+pvr_gem_object_vunmap(struct pvr_gem_object *pvr_obj)
+{
+	struct drm_gem_shmem_object *shmem_obj = shmem_gem_from_pvr_gem(pvr_obj);
+	struct iosys_map map = IOSYS_MAP_INIT_VADDR(shmem_obj->vaddr);
+	struct drm_gem_object *obj = gem_from_pvr_gem(pvr_obj);
+
+	if (WARN_ON(!map.vaddr))
+		return;
+
+	dma_resv_lock(obj->resv, NULL);
+
+	if (pvr_obj->flags & PVR_BO_CPU_CACHED) {
+		struct device *dev = shmem_obj->base.dev->dev;
+
+		/* If shmem_obj->sgt is NULL, that means the buffer hasn't been mapped
+		 * in GPU space yet.
+		 */
+		if (shmem_obj->sgt)
+			dma_sync_sgtable_for_device(dev, shmem_obj->sgt, DMA_BIDIRECTIONAL);
+	}
+
+	drm_gem_shmem_vunmap(shmem_obj, &map);
+
+	dma_resv_unlock(obj->resv);
+}
+
+/**
+ * pvr_gem_object_zero() - Zeroes the physical memory behind an object.
+ * @pvr_obj: Target PowerVR GEM object.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error encountered while attempting to map @pvr_obj to the CPU (see
+ *    pvr_gem_object_vmap()).
+ */
+static int
+pvr_gem_object_zero(struct pvr_gem_object *pvr_obj)
+{
+	void *cpu_ptr;
+
+	cpu_ptr = pvr_gem_object_vmap(pvr_obj);
+	if (IS_ERR(cpu_ptr))
+		return PTR_ERR(cpu_ptr);
+
+	memset(cpu_ptr, 0, pvr_gem_object_size(pvr_obj));
+
+	/* Make sure the zero-ing is done before vumap-ing the object. */
+	wmb();
+
+	pvr_gem_object_vunmap(pvr_obj);
+
+	return 0;
+}
+
+/**
+ * pvr_gem_create_object() - Allocate and pre-initializes a pvr_gem_object
+ * @drm_dev: DRM device creating this object.
+ * @size: Size of the object to allocate in bytes.
+ *
+ * Return:
+ *  * The new pre-initialized GEM object on success,
+ *  * -ENOMEM if the allocation failed.
+ */
+struct drm_gem_object *pvr_gem_create_object(struct drm_device *drm_dev, size_t size)
+{
+	struct drm_gem_object *gem_obj;
+	struct pvr_gem_object *pvr_obj;
+
+	pvr_obj = kzalloc(sizeof(*pvr_obj), GFP_KERNEL);
+	if (!pvr_obj)
+		return ERR_PTR(-ENOMEM);
+
+	gem_obj = gem_from_pvr_gem(pvr_obj);
+	gem_obj->funcs = &pvr_gem_object_funcs;
+	return gem_obj;
+}
+
+/**
+ * pvr_gem_object_create() - Creates a PowerVR-specific buffer object.
+ * @pvr_dev: Target PowerVR device.
+ * @size: Size of the object to allocate in bytes. Must be greater than zero.
+ * Any value which is not an exact multiple of the system page size will be
+ * rounded up to satisfy this condition.
+ * @flags: Options which affect both this operation and future mapping
+ * operations performed on the returned object. Must be a combination of
+ * DRM_PVR_BO_* and/or PVR_BO_* flags.
+ *
+ * The created object may be larger than @size, but can never be smaller. To
+ * get the exact size, call pvr_gem_object_size() on the returned pointer.
+ *
+ * Return:
+ *  * The newly-minted PowerVR-specific buffer object on success,
+ *  * -%EINVAL if @size is zero or @flags is not valid,
+ *  * -%ENOMEM if sufficient physical memory cannot be allocated, or
+ *  * Any other error returned by drm_gem_create_mmap_offset().
+ */
+struct pvr_gem_object *
+pvr_gem_object_create(struct pvr_device *pvr_dev, size_t size, u64 flags)
+{
+	struct drm_gem_shmem_object *shmem_obj;
+	struct pvr_gem_object *pvr_obj;
+
+	/* Verify @size and @flags before continuing. */
+	if (size == 0 || !pvr_gem_object_flags_validate(flags))
+		return ERR_PTR(-EINVAL);
+
+	shmem_obj = drm_gem_shmem_create(from_pvr_device(pvr_dev), size);
+	if (IS_ERR(shmem_obj))
+		return ERR_CAST(shmem_obj);
+
+	shmem_obj->pages_mark_dirty_on_put = true;
+	shmem_obj->map_wc = !(flags & PVR_BO_CPU_CACHED);
+	pvr_obj = shmem_gem_to_pvr_gem(shmem_obj);
+	pvr_obj->flags = flags;
+
+	/*
+	 * Do this last because pvr_gem_object_zero() requires a fully
+	 * configured instance of struct pvr_gem_object.
+	 */
+	pvr_gem_object_zero(pvr_obj);
+
+	return pvr_obj;
+}
+
+/**
+ * pvr_gem_get_dma_addr() - Get DMA address for given offset in object
+ * @pvr_obj: Pointer to object to lookup address in.
+ * @offset: Offset within object to lookup address at.
+ * @dma_addr_out: Pointer to location to store DMA address.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * -%EINVAL if object is not currently backed, or if @offset is out of valid
+ *    range for this object.
+ */
+int
+pvr_gem_get_dma_addr(struct pvr_gem_object *pvr_obj, u32 offset,
+		     dma_addr_t *dma_addr_out)
+{
+	struct drm_gem_shmem_object *shmem_obj = shmem_gem_from_pvr_gem(pvr_obj);
+	struct sg_table *sgt;
+	u32 accumulated_offset = 0;
+	struct scatterlist *sgl;
+	unsigned int sgt_idx;
+
+	sgt = drm_gem_shmem_get_pages_sgt(shmem_obj);
+	if (IS_ERR(sgt))
+		return PTR_ERR(sgt);
+
+	for_each_sgtable_dma_sg(sgt, sgl, sgt_idx) {
+		u32 new_offset = accumulated_offset + sg_dma_len(sgl);
+
+		if (offset >= accumulated_offset && offset < new_offset) {
+			*dma_addr_out = sg_dma_address(sgl) +
+					(offset - accumulated_offset);
+			return 0;
+		}
+
+		accumulated_offset = new_offset;
+	}
+
+	return -EINVAL;
+}
diff --git a/drivers/gpu/drm/imagination/pvr_gem.h b/drivers/gpu/drm/imagination/pvr_gem.h
new file mode 100644
index 000000000000..5b82cd67d83c
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_gem.h
@@ -0,0 +1,177 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_GEM_H
+#define PVR_GEM_H
+
+#include "pvr_rogue_heap_config.h"
+#include "pvr_rogue_meta.h"
+
+#include <uapi/drm/pvr_drm.h>
+
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_mm.h>
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/const.h>
+#include <linux/compiler_attributes.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/refcount.h>
+#include <linux/scatterlist.h>
+#include <linux/sizes.h>
+#include <linux/types.h>
+
+/* Forward declaration from "pvr_device.h". */
+struct pvr_device;
+struct pvr_file;
+
+/**
+ * DOC: Flags for DRM_IOCTL_PVR_CREATE_BO (kernel-only)
+ *
+ * Kernel-only values allowed in &pvr_gem_object->flags. The majority of options
+ * for this field are specified in the UAPI header "pvr_drm.h" with a
+ * DRM_PVR_BO_ prefix. To distinguish these internal options (which must exist
+ * in ranges marked as "reserved" in the UAPI header), we drop the DRM prefix.
+ * The public options should be used directly, DRM prefix and all.
+ *
+ * To avoid potentially confusing gaps in the UAPI options, these kernel-only
+ * options are specified "in reverse", starting at bit 63.
+ *
+ * We use "reserved" to refer to bits defined here and not exposed in the UAPI.
+ * Bits not defined anywhere are "undefined".
+ *
+ * Creation options
+ *    These use the prefix PVR_BO_CREATE_.
+ *
+ *    *There are currently no kernel-only flags in this group.*
+ *
+ * Device mapping options
+ *    These use the prefix PVR_BO_DEVICE_.
+ *
+ *    *There are currently no kernel-only flags in this group.*
+ *
+ * CPU mapping options
+ *    These use the prefix PVR_BO_CPU_.
+ *
+ *    :CACHED: By default, all GEM objects are mapped write-combined on the
+ *       CPU. Set this flag to override this behaviour and map the object
+ *       cached.
+ */
+#define PVR_BO_CPU_CACHED BIT_ULL(63)
+
+#define PVR_BO_FW_NO_CLEAR_ON_RESET BIT_ULL(62)
+
+/* Bits 62..3 are undefined. */
+/* Bits 2..0 are defined in the UAPI. */
+
+/* Other utilities. */
+#define PVR_BO_UNDEFINED_MASK GENMASK_ULL(61, 3)
+#define PVR_BO_RESERVED_MASK (PVR_BO_UNDEFINED_MASK | GENMASK_ULL(63, 63))
+
+/*
+ * All firmware-mapped memory uses (mostly) the same flags. Specifically,
+ * firmware-mapped memory should be:
+ *  * Read/write on the device,
+ *  * Read/write on the CPU, and
+ *  * Write-combined on the CPU.
+ *
+ * The only variation is in caching on the device.
+ */
+#define PVR_BO_FW_FLAGS_DEVICE_CACHED (ULL(0))
+#define PVR_BO_FW_FLAGS_DEVICE_UNCACHED DRM_PVR_BO_DEVICE_BYPASS_CACHE
+
+/**
+ * struct pvr_gem_object - powervr-specific wrapper for &struct drm_gem_object
+ */
+struct pvr_gem_object {
+	/**
+	 * @base: The underlying &struct drm_gem_shmem_object.
+	 *
+	 * Do not access this member directly, instead call
+	 * shem_gem_from_pvr_gem().
+	 */
+	struct drm_gem_shmem_object base;
+
+	/**
+	 * @flags: Options set at creation-time. Some of these options apply to
+	 * the creation operation itself (which are stored here for reference)
+	 * with the remainder used for mapping options to both the device and
+	 * CPU. These are used every time this object is mapped, but may be
+	 * changed after creation.
+	 *
+	 * Must be a combination of DRM_PVR_BO_* and/or PVR_BO_* flags.
+	 *
+	 * .. note::
+	 *
+	 *    This member is declared const to indicate that none of these
+	 *    options may change or be changed throughout the object's
+	 *    lifetime.
+	 */
+	u64 flags;
+};
+
+static_assert(offsetof(struct pvr_gem_object, base) == 0,
+	      "offsetof(struct pvr_gem_object, base) not zero");
+
+#define shmem_gem_from_pvr_gem(pvr_obj) (&pvr_obj->base)
+
+#define shmem_gem_to_pvr_gem(shmem_obj) container_of_const(shmem_obj, struct pvr_gem_object, base)
+
+#define gem_from_pvr_gem(pvr_obj) (&pvr_obj->base.base)
+
+#define gem_to_pvr_gem(gem_obj) container_of_const(gem_obj, struct pvr_gem_object, base.base)
+
+/* Functions defined in pvr_gem.c */
+
+struct drm_gem_object *pvr_gem_create_object(struct drm_device *drm_dev, size_t size);
+
+struct pvr_gem_object *pvr_gem_object_create(struct pvr_device *pvr_dev,
+					     size_t size, u64 flags);
+
+int pvr_gem_object_into_handle(struct pvr_gem_object *pvr_obj,
+			       struct pvr_file *pvr_file, u32 *handle);
+struct pvr_gem_object *pvr_gem_object_from_handle(struct pvr_file *pvr_file,
+						  u32 handle);
+
+static __always_inline struct sg_table *
+pvr_gem_object_get_pages_sgt(struct pvr_gem_object *pvr_obj)
+{
+	return drm_gem_shmem_get_pages_sgt(shmem_gem_from_pvr_gem(pvr_obj));
+}
+
+void *pvr_gem_object_vmap(struct pvr_gem_object *pvr_obj);
+void pvr_gem_object_vunmap(struct pvr_gem_object *pvr_obj);
+
+int pvr_gem_get_dma_addr(struct pvr_gem_object *pvr_obj, u32 offset,
+			 dma_addr_t *dma_addr_out);
+
+/**
+ * pvr_gem_object_get() - Acquire reference on pvr_gem_object
+ * @pvr_obj: Pointer to object to acquire reference on.
+ */
+static __always_inline void
+pvr_gem_object_get(struct pvr_gem_object *pvr_obj)
+{
+	drm_gem_object_get(gem_from_pvr_gem(pvr_obj));
+}
+
+/**
+ * pvr_gem_object_put() - Release reference on pvr_gem_object
+ * @pvr_obj: Pointer to object to release reference on.
+ */
+static __always_inline void
+pvr_gem_object_put(struct pvr_gem_object *pvr_obj)
+{
+	drm_gem_object_put(gem_from_pvr_gem(pvr_obj));
+}
+
+static __always_inline size_t
+pvr_gem_object_size(struct pvr_gem_object *pvr_obj)
+{
+	return gem_from_pvr_gem(pvr_obj)->size;
+}
+
+#endif /* PVR_GEM_H */
diff --git a/drivers/gpu/drm/imagination/pvr_mmu.c b/drivers/gpu/drm/imagination/pvr_mmu.c
new file mode 100644
index 000000000000..3b48e1f77d09
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_mmu.c
@@ -0,0 +1,2487 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_mmu.h"
+
+#include "pvr_device.h"
+#include "pvr_fw.h"
+#include "pvr_gem.h"
+#include "pvr_rogue_fwif.h"
+#include "pvr_rogue_mmu_defs.h"
+
+#include <drm/drm_drv.h>
+#include <linux/bitops.h>
+#include <linux/dma-mapping.h>
+#include <linux/kmemleak.h>
+#include <linux/minmax.h>
+#include <linux/sizes.h>
+
+#define PVR_SHIFT_FROM_SIZE(size_) (__builtin_ctzll(size_))
+#define PVR_MASK_FROM_SIZE(size_) (~((size_) - U64_C(1)))
+
+/*
+ * The value of the device page size (%PVR_DEVICE_PAGE_SIZE) is currently
+ * pegged to the host page size (%PAGE_SIZE). This chunk of macro goodness both
+ * ensures that the selected host page size corresponds to a valid device page
+ * size and sets up values needed by the MMU code below.
+ */
+#if (PVR_DEVICE_PAGE_SIZE == SZ_4K)
+# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_4KB
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_4KB_RANGE_SHIFT
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_4KB_RANGE_CLRMSK
+#elif (PVR_DEVICE_PAGE_SIZE == SZ_16K)
+# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_16KB
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_16KB_RANGE_SHIFT
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_16KB_RANGE_CLRMSK
+#elif (PVR_DEVICE_PAGE_SIZE == SZ_64K)
+# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_64KB
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_64KB_RANGE_SHIFT
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_64KB_RANGE_CLRMSK
+#elif (PVR_DEVICE_PAGE_SIZE == SZ_256K)
+# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_256KB
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_256KB_RANGE_SHIFT
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_256KB_RANGE_CLRMSK
+#elif (PVR_DEVICE_PAGE_SIZE == SZ_1M)
+# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_1MB
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_1MB_RANGE_SHIFT
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_1MB_RANGE_CLRMSK
+#elif (PVR_DEVICE_PAGE_SIZE == SZ_2M)
+# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_2MB
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_2MB_RANGE_SHIFT
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_2MB_RANGE_CLRMSK
+#else
+# error Unsupported device page size PVR_DEVICE_PAGE_SIZE
+#endif
+
+#define ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X   \
+	(ROGUE_MMUCTRL_ENTRIES_PT_VALUE >> \
+	 (PVR_DEVICE_PAGE_SHIFT - PVR_SHIFT_FROM_SIZE(SZ_4K)))
+
+/**
+ * pvr_mmu_flush() - Request flush of all MMU caches.
+ * @pvr_dev: Target PowerVR device.
+ *
+ * This function must be called following any possible change to the MMU page
+ * tables.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * Any error encountered while submitting the flush command via the KCCB.
+ */
+int
+pvr_mmu_flush(struct pvr_device *pvr_dev)
+{
+	/* TODO: implement */
+	return -ENODEV;
+}
+
+/**
+ * DOC: PowerVR Virtual Memory Handling
+ */
+/**
+ * DOC: PowerVR Virtual Memory Handling (constants)
+ *
+ * .. c:macro:: PVR_IDX_INVALID
+ *
+ *    Default value for a u16-based index.
+ *
+ *    This value cannot be zero, since zero is a valid index value.
+ */
+#define PVR_IDX_INVALID ((u16)(-1))
+
+/**
+ * DOC: MMU backing pages
+ */
+/**
+ * DOC: MMU backing pages (constants)
+ *
+ * .. c:macro:: PVR_MMU_BACKING_PAGE_SIZE
+ *
+ *    Page size of a PowerVR device's integrated MMU. The CPU page size must be
+ *    at least as large as this value for the current implementation; this is
+ *    checked at compile-time.
+ */
+#define PVR_MMU_BACKING_PAGE_SIZE SZ_4K
+static_assert(PAGE_SIZE >= PVR_MMU_BACKING_PAGE_SIZE);
+
+/**
+ * struct pvr_mmu_backing_page - Represents a single page used to back a page
+ *                              table of any level.
+ * @dma_addr: DMA address of this page.
+ * @host_ptr: CPU address of this page.
+ * @pvr_dev: The PowerVR device to which this page is associated. **For
+ *           internal use only.**
+ */
+struct pvr_mmu_backing_page {
+	dma_addr_t dma_addr;
+	void *host_ptr;
+/* private: internal use only */
+	struct page *raw_page;
+	struct pvr_device *pvr_dev;
+};
+
+/**
+ * pvr_mmu_backing_page_init() - Initialize a MMU backing page.
+ * @page: Target backing page.
+ * @pvr_dev: Target PowerVR device.
+ *
+ * This function performs three distinct operations:
+ *
+ * 1. Allocate a single page,
+ * 2. Map the page to the CPU, and
+ * 3. Map the page to DMA-space.
+ *
+ * It is expected that @page be zeroed (e.g. from kzalloc()) before calling
+ * this function.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * -%ENOMEM if allocation of the backing page or mapping of the backing
+ *    page to DMA fails.
+ */
+static int
+pvr_mmu_backing_page_init(struct pvr_mmu_backing_page *page,
+			  struct pvr_device *pvr_dev)
+{
+	struct device *dev = from_pvr_device(pvr_dev)->dev;
+
+	struct page *raw_page;
+	int err;
+
+	dma_addr_t dma_addr;
+	void *host_ptr;
+
+	raw_page = alloc_page(__GFP_ZERO | GFP_KERNEL);
+	if (!raw_page)
+		return -ENOMEM;
+
+	host_ptr = vmap(&raw_page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+	if (!host_ptr) {
+		err = -ENOMEM;
+		goto err_free_page;
+	}
+
+	dma_addr = dma_map_page(dev, raw_page, 0, PVR_MMU_BACKING_PAGE_SIZE,
+				DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, dma_addr)) {
+		err = -ENOMEM;
+		goto err_unmap_page;
+	}
+
+	page->dma_addr = dma_addr;
+	page->host_ptr = host_ptr;
+	page->pvr_dev = pvr_dev;
+	page->raw_page = raw_page;
+	kmemleak_alloc(page->host_ptr, PAGE_SIZE, 1, GFP_KERNEL);
+
+	return 0;
+
+err_unmap_page:
+	vunmap(host_ptr);
+
+err_free_page:
+	__free_page(raw_page);
+
+	return err;
+}
+
+/**
+ * pvr_mmu_backing_page_fini() - Teardown a MMU backing page.
+ * @page: Target backing page.
+ *
+ * This function performs the mirror operations to pvr_mmu_backing_page_init(),
+ * in reverse order:
+ *
+ * 1. Unmap the page from DMA-space,
+ * 2. Unmap the page from the CPU, and
+ * 3. Free the page.
+ *
+ * It also zeros @page.
+ *
+ * It is a no-op to call this function a second (or further) time on any @page.
+ */
+static void
+pvr_mmu_backing_page_fini(struct pvr_mmu_backing_page *page)
+{
+	struct device *dev = from_pvr_device(page->pvr_dev)->dev;
+
+	/* Do nothing if no allocation is present. */
+	if (!page->pvr_dev)
+		return;
+
+	dma_unmap_page(dev, page->dma_addr, PVR_MMU_BACKING_PAGE_SIZE,
+		       DMA_TO_DEVICE);
+
+	kmemleak_free(page->host_ptr);
+	vunmap(page->host_ptr);
+
+	__free_page(page->raw_page);
+
+	memset(page, 0, sizeof(*page));
+}
+
+/**
+ * pvr_mmu_backing_page_sync() - Flush a MMU backing page from the CPU to the
+ *                              device.
+ * @page: Target backing page.
+ *
+ * .. caution::
+ *
+ *    **This is potentially an expensive function call.** Only call
+ *    pvr_mmu_backing_page_sync() once you're sure you have no more changes to
+ *    make to the backing page in the immediate future.
+ */
+static void
+pvr_mmu_backing_page_sync(struct pvr_mmu_backing_page *page)
+{
+	struct device *dev;
+
+	/*
+	 * Do nothing if no allocation is present. This may be the case if
+	 * we are unmapping pages.
+	 */
+	if (!page->pvr_dev)
+		return;
+
+	dev = from_pvr_device(page->pvr_dev)->dev;
+
+	dma_sync_single_for_device(dev, page->dma_addr,
+				   PVR_MMU_BACKING_PAGE_SIZE, DMA_TO_DEVICE);
+}
+
+/**
+ * DOC: Raw page tables
+ */
+
+#define PVR_PAGE_TABLE_TYPEOF_ENTRY(level_) \
+	typeof_member(struct pvr_page_table_l##level_##_entry_raw, val)
+
+#define PVR_PAGE_TABLE_FIELD_GET(level_, name_, field_, entry_)           \
+	(((entry_).val &                                           \
+	  ~ROGUE_MMUCTRL_##name_##_DATA_##field_##_CLRMSK) >> \
+	 ROGUE_MMUCTRL_##name_##_DATA_##field_##_SHIFT)
+
+#define PVR_PAGE_TABLE_FIELD_PREP(level_, name_, field_, val_)            \
+	((((PVR_PAGE_TABLE_TYPEOF_ENTRY(level_))(val_))            \
+	  << ROGUE_MMUCTRL_##name_##_DATA_##field_##_SHIFT) & \
+	 ~ROGUE_MMUCTRL_##name_##_DATA_##field_##_CLRMSK)
+
+/**
+ * struct pvr_page_table_l2_entry_raw - A single entry in a level 2 page table.
+ * @val: The raw value of this entry.
+ *
+ * This type is a structure for type-checking purposes. At compile-time, its
+ * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PC_VALUE.
+ *
+ * The value stored in this structure can be decoded using the following bitmap:
+ *
+ * .. flat-table::
+ *    :widths: 1 5
+ *    :stub-columns: 1
+ *
+ *    * - 31..4
+ *      - **Level 1 Page Table Base Address:** Bits 39..12 of the L1
+ *        page table base address, which is 4KiB aligned.
+ *
+ *    * - 3..2
+ *      - *(reserved)*
+ *
+ *    * - 1
+ *      - **Pending:** When valid bit is not set, indicates that a valid
+ *        entry is pending and the MMU should wait for the driver to map
+ *        the entry. This is used to support page demand mapping of
+ *        memory.
+ *
+ *    * - 0
+ *      - **Valid:** Indicates that the entry contains a valid L1 page
+ *        table. If the valid bit is not set, then an attempted use of
+ *        the page would result in a page fault.
+ */
+struct pvr_page_table_l2_entry_raw {
+	u32 val;
+} __packed;
+static_assert(sizeof(struct pvr_page_table_l2_entry_raw) * 8 ==
+	      ROGUE_MMUCTRL_ENTRY_SIZE_PC_VALUE);
+
+static bool
+pvr_page_table_l2_entry_raw_is_valid(struct pvr_page_table_l2_entry_raw entry)
+{
+	return PVR_PAGE_TABLE_FIELD_GET(2, PC, VALID, entry);
+}
+
+/**
+ * pvr_page_table_l2_entry_raw_set() - Write a valid entry into a raw level 2
+ *                                     page table.
+ * @entry: Target raw level 2 page table entry.
+ * @child_table_dma_addr: DMA address of the level 1 page table to be
+ *                        associated with @entry.
+ *
+ * When calling this function, @child_table_dma_addr must be a valid DMA
+ * address and a multiple of %ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSIZE.
+ */
+static void
+pvr_page_table_l2_entry_raw_set(struct pvr_page_table_l2_entry_raw *entry,
+				dma_addr_t child_table_dma_addr)
+{
+	child_table_dma_addr >>= ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSHIFT;
+
+	entry->val =
+		PVR_PAGE_TABLE_FIELD_PREP(2, PC, VALID, true) |
+		PVR_PAGE_TABLE_FIELD_PREP(2, PC, ENTRY_PENDING, false) |
+		PVR_PAGE_TABLE_FIELD_PREP(2, PC, PD_BASE, child_table_dma_addr);
+}
+
+static void
+pvr_page_table_l2_entry_raw_clear(struct pvr_page_table_l2_entry_raw *entry)
+{
+	entry->val = 0;
+}
+
+/**
+ * struct pvr_page_table_l1_entry_raw - A single entry in a level 1 page table.
+ * @val: The raw value of this entry.
+ *
+ * This type is a structure for type-checking purposes. At compile-time, its
+ * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PD_VALUE.
+ *
+ * The value stored in this structure can be decoded using the following bitmap:
+ *
+ * .. flat-table::
+ *    :widths: 1 5
+ *    :stub-columns: 1
+ *
+ *    * - 63..41
+ *      - *(reserved)*
+ *
+ *    * - 40
+ *      - **Pending:** When valid bit is not set, indicates that a valid entry
+ *        is pending and the MMU should wait for the driver to map the entry.
+ *        This is used to support page demand mapping of memory.
+ *
+ *    * - 39..5
+ *      - **Level 0 Page Table Base Address:** The way this value is
+ *        interpreted depends on the page size. Bits not specified in the
+ *        table below (e.g. bits 11..5 for page size 4KiB) should be
+ *        considered reserved.
+ *
+ *        This table shows the bits used in an L1 page table entry to
+ *        represent the Physical Table Base Address for a given Page Size.
+ *        Since each L1 page table entry covers 2MiB of address space, the
+ *        maximum page size is 2MiB.
+ *
+ *        .. flat-table::
+ *           :widths: 1 1 1 1
+ *           :header-rows: 1
+ *           :stub-columns: 1
+ *
+ *           * - Page size
+ *             - L0 page table base address bits
+ *             - Number of L0 page table entries
+ *             - Size of L0 page table
+ *
+ *           * - 4KiB
+ *             - 39..12
+ *             - 512
+ *             - 4KiB
+ *
+ *           * - 16KiB
+ *             - 39..10
+ *             - 128
+ *             - 1KiB
+ *
+ *           * - 64KiB
+ *             - 39..8
+ *             - 32
+ *             - 256B
+ *
+ *           * - 256KiB
+ *             - 39..6
+ *             - 8
+ *             - 64B
+ *
+ *           * - 1MiB
+ *             - 39..5 (4 = '0')
+ *             - 2
+ *             - 16B
+ *
+ *           * - 2MiB
+ *             - 39..5 (4..3 = '00')
+ *             - 1
+ *             - 8B
+ *
+ *    * - 4
+ *      - *(reserved)*
+ *
+ *    * - 3..1
+ *      - **Page Size:** Sets the page size, from 4KiB to 2MiB.
+ *
+ *    * - 0
+ *      - **Valid:** Indicates that the entry contains a valid L0 page table.
+ *        If the valid bit is not set, then an attempted use of the page would
+ *        result in a page fault.
+ */
+struct pvr_page_table_l1_entry_raw {
+	u64 val;
+} __packed;
+static_assert(sizeof(struct pvr_page_table_l1_entry_raw) * 8 ==
+	      ROGUE_MMUCTRL_ENTRY_SIZE_PD_VALUE);
+
+static bool
+pvr_page_table_l1_entry_raw_is_valid(struct pvr_page_table_l1_entry_raw entry)
+{
+	return PVR_PAGE_TABLE_FIELD_GET(1, PD, VALID, entry);
+}
+
+/**
+ * pvr_page_table_l1_entry_raw_set() - Write a valid entry into a raw level 1
+ *                                     page table.
+ * @entry: Target raw level 1 page table entry.
+ * @child_table_dma_addr: DMA address of the level 0 page table to be
+ *                        associated with @entry.
+ *
+ * When calling this function, @child_table_dma_addr must be a valid DMA
+ * address and a multiple of 4 KiB.
+ */
+static void
+pvr_page_table_l1_entry_raw_set(struct pvr_page_table_l1_entry_raw *entry,
+				dma_addr_t child_table_dma_addr)
+{
+	entry->val = PVR_PAGE_TABLE_FIELD_PREP(1, PD, VALID, true) |
+		     PVR_PAGE_TABLE_FIELD_PREP(1, PD, ENTRY_PENDING, false) |
+		     PVR_PAGE_TABLE_FIELD_PREP(1, PD, PAGE_SIZE,
+					       ROGUE_MMUCTRL_PAGE_SIZE_X) |
+		     /*
+		      * The use of a 4K-specific macro here is correct. It is
+		      * a future optimization to allocate sub-host-page-sized
+		      * blocks for individual tables, so the condition that any
+		      * page table address is aligned to the size of the
+		      * largest (a 4KB) table currently holds.
+		      */
+		     (child_table_dma_addr &
+		      ~ROGUE_MMUCTRL_PT_BASE_4KB_RANGE_CLRMSK);
+}
+
+static void
+pvr_page_table_l1_entry_raw_clear(struct pvr_page_table_l1_entry_raw *entry)
+{
+	entry->val = 0;
+}
+
+/**
+ * struct pvr_page_table_l0_entry_raw - A single entry in a level 0 page table.
+ * @val: The raw value of this entry.
+ *
+ * This type is a structure for type-checking purposes. At compile-time, its
+ * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PT_VALUE.
+ *
+ * The value stored in this structure can be decoded using the following bitmap:
+ *
+ * .. flat-table::
+ *    :widths: 1 5
+ *    :stub-columns: 1
+ *
+ *    * - 63
+ *      - *(reserved)*
+ *
+ *    * - 62
+ *      - **PM/FW Protect:** Indicates a protected region which only the
+ *        Parameter Manager (PM) or firmware processor can write to.
+ *
+ *    * - 61..40
+ *      - **VP Page (High):** Virtual-physical page used for Parameter Manager
+ *        (PM) memory. This field is only used if the additional level of PB
+ *        virtualization is enabled. The VP Page field is needed by the PM in
+ *        order to correctly reconstitute the free lists after render
+ *        completion. This (High) field holds bits 39..18 of the value; the
+ *        Low field holds bits 17..12. Bits 11..0 are always zero because the
+ *        value is always aligned to the 4KiB page size.
+ *
+ *    * - 39..12
+ *      - **Physical Page Address:** The way this value is interpreted depends
+ *        on the page size. Bits not specified in the table below (e.g. bits
+ *        20..12 for page size 2MiB) should be considered reserved.
+ *
+ *        This table shows the bits used in an L0 page table entry to represent
+ *        the Physical Page Address for a given page size (as defined in the
+ *        associated L1 page table entry).
+ *
+ *        .. flat-table::
+ *           :widths: 1 1
+ *           :header-rows: 1
+ *           :stub-columns: 1
+ *
+ *           * - Page size
+ *             - Physical address bits
+ *
+ *           * - 4KiB
+ *             - 39..12
+ *
+ *           * - 16KiB
+ *             - 39..14
+ *
+ *           * - 64KiB
+ *             - 39..16
+ *
+ *           * - 256KiB
+ *             - 39..18
+ *
+ *           * - 1MiB
+ *             - 39..20
+ *
+ *           * - 2MiB
+ *             - 39..21
+ *
+ *    * - 11..6
+ *      - **VP Page (Low):** Continuation of VP Page (High).
+ *
+ *    * - 5
+ *      - **Pending:** When valid bit is not set, indicates that a valid entry
+ *        is pending and the MMU should wait for the driver to map the entry.
+ *        This is used to support page demand mapping of memory.
+ *
+ *    * - 4
+ *      - **PM Src:** Set on Parameter Manager (PM) allocated page table
+ *        entries when indicated by the PM. Note that this bit will only be set
+ *        by the PM, not by the device driver.
+ *
+ *    * - 3
+ *      - **SLC Bypass Control:** Specifies requests to this page should bypass
+ *        the System Level Cache (SLC), if enabled in SLC configuration.
+ *
+ *    * - 2
+ *      - **Cache Coherency:** Indicates that the page is coherent (i.e. it
+ *        does not require a cache flush between operations on the CPU and the
+ *        device).
+ *
+ *    * - 1
+ *      - **Read Only:** If set, this bit indicates that the page is read only.
+ *        An attempted write to this page would result in a write-protection
+ *        fault.
+ *
+ *    * - 0
+ *      - **Valid:** Indicates that the entry contains a valid page. If the
+ *        valid bit is not set, then an attempted use of the page would result
+ *        in a page fault.
+ */
+struct pvr_page_table_l0_entry_raw {
+	u64 val;
+} __packed;
+static_assert(sizeof(struct pvr_page_table_l0_entry_raw) * 8 ==
+	      ROGUE_MMUCTRL_ENTRY_SIZE_PT_VALUE);
+
+/**
+ * struct pvr_page_flags_raw - The configurable flags from a single entry in a
+ *                             level 0 page table.
+ * @val: The raw value of these flags. Since these are a strict subset of
+ *       &struct pvr_page_table_l0_entry_raw; use that type for our member here.
+ *
+ * The flags stored in this type are: PM/FW Protect; SLC Bypass Control; Cache
+ * Coherency, and Read Only (bits 62, 3, 2 and 1 respectively).
+ *
+ * This type should never be instantiated directly; instead use
+ * pvr_page_flags_raw_create() to ensure only valid bits of @val are set.
+ */
+struct pvr_page_flags_raw {
+	struct pvr_page_table_l0_entry_raw val;
+} __packed;
+static_assert(sizeof(struct pvr_page_flags_raw) ==
+	      sizeof(struct pvr_page_table_l0_entry_raw));
+
+static bool
+pvr_page_table_l0_entry_raw_is_valid(struct pvr_page_table_l0_entry_raw entry)
+{
+	return PVR_PAGE_TABLE_FIELD_GET(0, PT, VALID, entry);
+}
+
+/**
+ * pvr_page_table_l0_entry_raw_set() - Write a valid entry into a raw level 0
+ *                                     page table.
+ * @entry: Target raw level 0 page table entry.
+ * @dma_addr: DMA address of the physical page to be associated with @entry.
+ * @flags: Options to be set on @entry.
+ *
+ * When calling this function, @child_table_dma_addr must be a valid DMA
+ * address and a multiple of %PVR_DEVICE_PAGE_SIZE.
+ *
+ * The @flags parameter is directly assigned into @entry. It is the callers
+ * responsibility to ensure that only bits specified in
+ * &struct pvr_page_flags_raw are set in @flags.
+ */
+static void
+pvr_page_table_l0_entry_raw_set(struct pvr_page_table_l0_entry_raw *entry,
+				dma_addr_t dma_addr,
+				struct pvr_page_flags_raw flags)
+{
+	entry->val = PVR_PAGE_TABLE_FIELD_PREP(0, PT, VALID, true) |
+		     PVR_PAGE_TABLE_FIELD_PREP(0, PT, ENTRY_PENDING, false) |
+		     (dma_addr & ~ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK) |
+		     flags.val.val;
+}
+
+static void
+pvr_page_table_l0_entry_raw_clear(struct pvr_page_table_l0_entry_raw *entry)
+{
+	entry->val = 0;
+}
+
+/**
+ * pvr_page_flags_raw_create() - Initialize the flag bits of a raw level 0 page
+ *                               table entry.
+ * @read_only: This page is read-only (see: Read Only).
+ * @cache_coherent: This page does not require cache flushes (see: Cache
+ *                  Coherency).
+ * @slc_bypass: This page bypasses the device cache (see: SLC Bypass Control).
+ * @pm_fw_protect: This page is only for use by the firmware or Parameter
+ *                 Manager (see PM/FW Protect).
+ *
+ * For more details on the use of these four options, see their respective
+ * entries in the table under &struct pvr_page_table_l0_entry_raw.
+ *
+ * Return:
+ * A new &struct pvr_page_flags_raw instance which can be passed directly to
+ * pvr_page_table_l0_entry_raw_set() or pvr_page_table_l0_insert().
+ */
+static struct pvr_page_flags_raw
+pvr_page_flags_raw_create(bool read_only, bool cache_coherent, bool slc_bypass,
+			  bool pm_fw_protect)
+{
+	struct pvr_page_flags_raw flags;
+
+	flags.val.val =
+		PVR_PAGE_TABLE_FIELD_PREP(0, PT, READ_ONLY, read_only) |
+		PVR_PAGE_TABLE_FIELD_PREP(0, PT, CC, cache_coherent) |
+		PVR_PAGE_TABLE_FIELD_PREP(0, PT, SLC_BYPASS_CTRL, slc_bypass) |
+		PVR_PAGE_TABLE_FIELD_PREP(0, PT, PM_META_PROTECT, pm_fw_protect);
+
+	return flags;
+}
+
+/**
+ * struct pvr_page_table_l2_raw - The raw data of a level 2 page table.
+ *
+ * This type is a structure for type-checking purposes. At compile-time, its
+ * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
+ */
+struct pvr_page_table_l2_raw {
+	/** @entries: The raw values of this table. */
+	struct pvr_page_table_l2_entry_raw
+		entries[ROGUE_MMUCTRL_ENTRIES_PC_VALUE];
+} __packed;
+static_assert(sizeof(struct pvr_page_table_l2_raw) == PVR_MMU_BACKING_PAGE_SIZE);
+
+/**
+ * struct pvr_page_table_l1_raw - The raw data of a level 1 page table.
+ *
+ * This type is a structure for type-checking purposes. At compile-time, its
+ * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
+ */
+struct pvr_page_table_l1_raw {
+	/** @entries: The raw values of this table. */
+	struct pvr_page_table_l1_entry_raw
+		entries[ROGUE_MMUCTRL_ENTRIES_PD_VALUE];
+} __packed;
+static_assert(sizeof(struct pvr_page_table_l1_raw) == PVR_MMU_BACKING_PAGE_SIZE);
+
+/**
+ * struct pvr_page_table_l0_raw - The raw data of a level 0 page table.
+ *
+ * This type is a structure for type-checking purposes. At compile-time, its
+ * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
+ *
+ * .. caution::
+ *
+ *    The size of level 0 page tables is variable depending on the page size
+ *    specified in the associated level 1 page table entry. Since the device
+ *    page size in use is pegged to the host page size, it cannot vary at
+ *    runtime. This structure is therefore only defined to contain the required
+ *    number of entries for the current device page size. **You should never
+ *    read or write beyond the last supported entry.**
+ */
+struct pvr_page_table_l0_raw {
+	/** @entries: The raw values of this table. */
+	struct pvr_page_table_l0_entry_raw
+		entries[ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X];
+} __packed;
+static_assert(sizeof(struct pvr_page_table_l0_raw) <= PVR_MMU_BACKING_PAGE_SIZE);
+
+/**
+ * DOC: Mirror page tables
+ */
+
+/*
+ * We pre-declare these types because they cross-depend on pointers to each
+ * other.
+ */
+struct pvr_page_table_l1;
+struct pvr_page_table_l0;
+
+/**
+ * struct pvr_page_table_l2 - A wrapped level 2 page table.
+ *
+ * To access the raw part of this table, use pvr_page_table_l2_get_raw().
+ * Alternatively to access a raw entry directly, use
+ * pvr_page_table_l2_get_entry_raw().
+ *
+ * A level 2 page table forms the root of the page table tree structure, so
+ * this type has no &parent or &parent_idx members.
+ */
+struct pvr_page_table_l2 {
+	/**
+	 * @entries: The children of this node in the page table tree
+	 * structure. These are also mirror tables. The indexing of this array
+	 * is identical to that of the raw equivalent
+	 * (&pvr_page_table_l1_raw.entries).
+	 */
+	struct pvr_page_table_l1 *entries[ROGUE_MMUCTRL_ENTRIES_PC_VALUE];
+
+	/**
+	 * @backing_page: A handle to the memory which holds the raw
+	 * equivalent of this table. **For internal use only.**
+	 */
+	struct pvr_mmu_backing_page backing_page;
+
+	/**
+	 * @entry_count: The current number of valid entries (that we know of)
+	 * in this table. This value is essentially a refcount - the table is
+	 * destroyed when this value is decremented to zero by
+	 * pvr_page_table_l2_remove().
+	 */
+	u16 entry_count;
+};
+
+/**
+ * pvr_page_table_l2_init() - Initialize a level 2 page table.
+ * @table: Target level 2 page table.
+ * @pvr_dev: Target PowerVR device
+ *
+ * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
+ * this function.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error encountered while intializing &table->backing_page using
+ *    pvr_mmu_backing_page_init().
+ */
+static int
+pvr_page_table_l2_init(struct pvr_page_table_l2 *table,
+		       struct pvr_device *pvr_dev)
+{
+	return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
+}
+
+/**
+ * pvr_page_table_l2_fini() - Teardown a level 2 page table.
+ * @table: Target level 2 page table.
+ *
+ * It is an error to attempt to use @table after calling this function.
+ */
+static void
+pvr_page_table_l2_fini(struct pvr_page_table_l2 *table)
+{
+	pvr_mmu_backing_page_fini(&table->backing_page);
+}
+
+/**
+ * pvr_page_table_l2_sync() - Flush a level 2 page table from the CPU to the
+ *                            device.
+ * @table: Target level 2 page table.
+ *
+ * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
+ * warning there applies here too: **Only call pvr_page_table_l2_sync() once
+ * you're sure you have no more changes to make to** @table **in the immediate
+ * future.**
+ *
+ * If child level 1 page tables of @table also need to be flushed, this should
+ * be done first using pvr_page_table_l1_sync() *before* calling this function.
+ */
+static void
+pvr_page_table_l2_sync(struct pvr_page_table_l2 *table)
+{
+	pvr_mmu_backing_page_sync(&table->backing_page);
+}
+
+/**
+ * pvr_page_table_l2_get_raw() - Access the raw equivalent of a mirror level 2
+ *                               page table.
+ * @table: Target level 2 page table.
+ *
+ * Essentially returns the CPU address of the raw equivalent of @table, cast to
+ * a &struct pvr_page_table_l2_raw pointer.
+ *
+ * You probably want to call pvr_page_table_l2_get_entry_raw() instead.
+ *
+ * Return:
+ * The raw equivalent of @table.
+ */
+static struct pvr_page_table_l2_raw *
+pvr_page_table_l2_get_raw(struct pvr_page_table_l2 *table)
+{
+	return table->backing_page.host_ptr;
+}
+
+/**
+ * pvr_page_table_l2_get_entry_raw() - Access an entry from the raw equivalent
+ *                                     of a mirror level 2 page table.
+ * @table: Target level 2 page table.
+ * @idx: Index of the entry to access.
+ *
+ * Technically this function returns a pointer to a slot in a raw level 2 page
+ * table, since the returned "entry" is not guaranteed to be valid. The caller
+ * must verify the validity of the entry at the returned address (perhaps using
+ * pvr_page_table_l2_entry_raw_is_valid()) before reading or overwriting it.
+ *
+ * The value of @idx is not checked here; it is the callers responsibility to
+ * ensure @idx refers to a valid index within @table before dereferencing the
+ * returned pointer.
+ *
+ * Return:
+ * A pointer to the requested raw level 2 page table entry.
+ */
+static struct pvr_page_table_l2_entry_raw *
+pvr_page_table_l2_get_entry_raw(struct pvr_page_table_l2 *table, u16 idx)
+{
+	return &pvr_page_table_l2_get_raw(table)->entries[idx];
+}
+
+/**
+ * pvr_page_table_l2_entry_is_valid() - Check if a level 2 page table entry is
+ *                                      marked as valid.
+ * @table: Target level 2 page table.
+ * @idx: Index of the entry to check.
+ *
+ * The value of @idx is not checked here; it is the callers responsibility to
+ * ensure @idx refers to a valid index within @table before calling this
+ * function.
+ */
+static bool
+pvr_page_table_l2_entry_is_valid(struct pvr_page_table_l2 *table, u16 idx)
+{
+	struct pvr_page_table_l2_entry_raw entry_raw =
+		*pvr_page_table_l2_get_entry_raw(table, idx);
+
+	return pvr_page_table_l2_entry_raw_is_valid(entry_raw);
+}
+
+/**
+ * struct pvr_page_table_l1 - A wrapped level 1 page table.
+ *
+ * To access the raw part of this table, use pvr_page_table_l1_get_raw().
+ * Alternatively to access a raw entry directly, use
+ * pvr_page_table_l1_get_entry_raw().
+ */
+struct pvr_page_table_l1 {
+	/**
+	 * @entries: The children of this node in the page table tree
+	 * structure. These are also mirror tables. The indexing of this array
+	 * is identical to that of the raw equivalent
+	 * (&pvr_page_table_l0_raw.entries).
+	 */
+	struct pvr_page_table_l0 *entries[ROGUE_MMUCTRL_ENTRIES_PD_VALUE];
+
+	/**
+	 * @backing_page: A handle to the memory which holds the raw
+	 * equivalent of this table. **For internal use only.**
+	 */
+	struct pvr_mmu_backing_page backing_page;
+
+	union {
+		/**
+		 * @parent: The parent of this node in the page table tree structure.
+		 *
+		 * This is also a mirror table.
+		 *
+		 * Only valid when the L1 page table is active. When the L1 page table
+		 * has been removed and queued for destruction, the next_free field
+		 * should be used instead.
+		 */
+		struct pvr_page_table_l2 *parent;
+
+		/**
+		 * @next_free: Pointer to the next L1 page table to take/free.
+		 *
+		 * Used to form a linked list of L1 page tables. This is used
+		 * when preallocating tables and when the page table has been
+		 * removed and queued for destruction.
+		 */
+		struct pvr_page_table_l1 *next_free;
+	};
+
+	/**
+	 * @parent_idx: The index of the entry in the parent table (see
+	 * @parent) which corresponds to this table.
+	 */
+	u16 parent_idx;
+
+	/**
+	 * @entry_count: The current number of valid entries (that we know of)
+	 * in this table. This value is essentially a refcount - the table is
+	 * destroyed when this value is decremented to zero by
+	 * pvr_page_table_l1_remove().
+	 */
+	u16 entry_count;
+};
+
+/**
+ * pvr_page_table_l1_init() - Initialize a level 1 page table.
+ * @table: Target level 1 page table.
+ * @pvr_dev: Target PowerVR device
+ *
+ * When this function returns successfully, @table is still not considered
+ * valid. It must be inserted into the page table tree structure with
+ * pvr_page_table_l2_insert() before it is ready for use.
+ *
+ * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
+ * this function.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error encountered while intializing &table->backing_page using
+ *    pvr_mmu_backing_page_init().
+ */
+static int
+pvr_page_table_l1_init(struct pvr_page_table_l1 *table,
+		       struct pvr_device *pvr_dev)
+{
+	table->parent_idx = PVR_IDX_INVALID;
+
+	return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
+}
+
+/**
+ * pvr_page_table_l1_free() - Teardown a level 1 page table.
+ * @table: Target level 1 page table.
+ *
+ * It is an error to attempt to use @table after calling this function, even
+ * indirectly. This includes calling pvr_page_table_l2_remove(), which must
+ * be called *before* pvr_page_table_l1_free().
+ */
+static void
+pvr_page_table_l1_free(struct pvr_page_table_l1 *table)
+{
+	pvr_mmu_backing_page_fini(&table->backing_page);
+	kfree(table);
+}
+
+/**
+ * pvr_page_table_l1_sync() - Flush a level 1 page table from the CPU to the
+ *                            device.
+ * @table: Target level 1 page table.
+ *
+ * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
+ * warning there applies here too: **Only call pvr_page_table_l1_sync() once
+ * you're sure you have no more changes to make to** @table **in the immediate
+ * future.**
+ *
+ * If child level 0 page tables of @table also need to be flushed, this should
+ * be done first using pvr_page_table_l0_sync() *before* calling this function.
+ */
+static void
+pvr_page_table_l1_sync(struct pvr_page_table_l1 *table)
+{
+	pvr_mmu_backing_page_sync(&table->backing_page);
+}
+
+/**
+ * pvr_page_table_l1_get_raw() - Access the raw equivalent of a mirror level 1
+ *                               page table.
+ * @table: Target level 1 page table.
+ *
+ * Essentially returns the CPU address of the raw equivalent of @table, cast to
+ * a &struct pvr_page_table_l1_raw pointer.
+ *
+ * You probably want to call pvr_page_table_l1_get_entry_raw() instead.
+ *
+ * Return:
+ * The raw equivalent of @table.
+ */
+static struct pvr_page_table_l1_raw *
+pvr_page_table_l1_get_raw(struct pvr_page_table_l1 *table)
+{
+	return table->backing_page.host_ptr;
+}
+
+/**
+ * pvr_page_table_l1_get_entry_raw() - Access an entry from the raw equivalent
+ *                                     of a mirror level 1 page table.
+ * @table: Target level 1 page table.
+ * @idx: Index of the entry to access.
+ *
+ * Technically this function returns a pointer to a slot in a raw level 1 page
+ * table, since the returned "entry" is not guaranteed to be valid. The caller
+ * must verify the validity of the entry at the returned address (perhaps using
+ * pvr_page_table_l1_entry_raw_is_valid()) before reading or overwriting it.
+ *
+ * The value of @idx is not checked here; it is the callers responsibility to
+ * ensure @idx refers to a valid index within @table before dereferencing the
+ * returned pointer.
+ *
+ * Return:
+ * A pointer to the requested raw level 1 page table entry.
+ */
+static struct pvr_page_table_l1_entry_raw *
+pvr_page_table_l1_get_entry_raw(struct pvr_page_table_l1 *table, u16 idx)
+{
+	return &pvr_page_table_l1_get_raw(table)->entries[idx];
+}
+
+/**
+ * pvr_page_table_l1_entry_is_valid() - Check if a level 1 page table entry is
+ *                                      marked as valid.
+ * @table: Target level 1 page table.
+ * @idx: Index of the entry to check.
+ *
+ * The value of @idx is not checked here; it is the callers responsibility to
+ * ensure @idx refers to a valid index within @table before calling this
+ * function.
+ */
+static bool
+pvr_page_table_l1_entry_is_valid(struct pvr_page_table_l1 *table, u16 idx)
+{
+	struct pvr_page_table_l1_entry_raw entry_raw =
+		*pvr_page_table_l1_get_entry_raw(table, idx);
+
+	return pvr_page_table_l1_entry_raw_is_valid(entry_raw);
+}
+
+/**
+ * struct pvr_page_table_l0 - A wrapped level 0 page table.
+ *
+ * To access the raw part of this table, use pvr_page_table_l0_get_raw().
+ * Alternatively to access a raw entry directly, use
+ * pvr_page_table_l0_get_entry_raw().
+ *
+ * There is no mirror representation of an individual page, so this type has no
+ * &entries member.
+ */
+struct pvr_page_table_l0 {
+	/**
+	 * @backing_page: A handle to the memory which holds the raw
+	 * equivalent of this table. **For internal use only.**
+	 */
+	struct pvr_mmu_backing_page backing_page;
+
+	union {
+		/**
+		 * @parent: The parent of this node in the page table tree structure.
+		 *
+		 * This is also a mirror table.
+		 *
+		 * Only valid when the L0 page table is active. When the L0 page table
+		 * has been removed and queued for destruction, the next_free field
+		 * should be used instead.
+		 */
+		struct pvr_page_table_l1 *parent;
+
+		/**
+		 * @next_free: Pointer to the next L0 page table to take/free.
+		 *
+		 * Used to form a linked list of L0 page tables. This is used
+		 * when preallocating tables and when the page table has been
+		 * removed and queued for destruction.
+		 */
+		struct pvr_page_table_l0 *next_free;
+	};
+
+	/**
+	 * @parent_idx: The index of the entry in the parent table (see
+	 * @parent) which corresponds to this table.
+	 */
+	u16 parent_idx;
+
+	/**
+	 * @entry_count: The current number of valid entries (that we know of)
+	 * in this table. This value is essentially a refcount - the table is
+	 * destroyed when this value is decremented to zero by
+	 * pvr_page_table_l0_remove().
+	 */
+	u16 entry_count;
+};
+
+/**
+ * pvr_page_table_l0_init() - Initialize a level 0 page table.
+ * @table: Target level 0 page table.
+ * @pvr_dev: Target PowerVR device
+ *
+ * When this function returns successfully, @table is still not considered
+ * valid. It must be inserted into the page table tree structure with
+ * pvr_page_table_l1_insert() before it is ready for use.
+ *
+ * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
+ * this function.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error encountered while intializing &table->backing_page using
+ *    pvr_mmu_backing_page_init().
+ */
+static int
+pvr_page_table_l0_init(struct pvr_page_table_l0 *table,
+		       struct pvr_device *pvr_dev)
+{
+	table->parent_idx = PVR_IDX_INVALID;
+
+	return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
+}
+
+/**
+ * pvr_page_table_l0_free() - Teardown a level 0 page table.
+ * @table: Target level 0 page table.
+ *
+ * It is an error to attempt to use @table after calling this function, even
+ * indirectly. This includes calling pvr_page_table_l1_remove(), which must
+ * be called *before* pvr_page_table_l0_free().
+ */
+static void
+pvr_page_table_l0_free(struct pvr_page_table_l0 *table)
+{
+	pvr_mmu_backing_page_fini(&table->backing_page);
+	kfree(table);
+}
+
+/**
+ * pvr_page_table_l0_sync() - Flush a level 0 page table from the CPU to the
+ *                            device.
+ * @table: Target level 0 page table.
+ *
+ * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
+ * warning there applies here too: **Only call pvr_page_table_l0_sync() once
+ * you're sure you have no more changes to make to** @table **in the immediate
+ * future.**
+ *
+ * If child pages of @table also need to be flushed, this should be done first
+ * using a DMA sync function (e.g. dma_sync_sg_for_device()) *before* calling
+ * this function.
+ */
+static void
+pvr_page_table_l0_sync(struct pvr_page_table_l0 *table)
+{
+	pvr_mmu_backing_page_sync(&table->backing_page);
+}
+
+/**
+ * pvr_page_table_l0_get_raw() - Access the raw equivalent of a mirror level 0
+ *                               page table.
+ * @table: Target level 0 page table.
+ *
+ * Essentially returns the CPU address of the raw equivalent of @table, cast to
+ * a &struct pvr_page_table_l0_raw pointer.
+ *
+ * You probably want to call pvr_page_table_l0_get_entry_raw() instead.
+ *
+ * Return:
+ * The raw equivalent of @table.
+ */
+static struct pvr_page_table_l0_raw *
+pvr_page_table_l0_get_raw(struct pvr_page_table_l0 *table)
+{
+	return table->backing_page.host_ptr;
+}
+
+/**
+ * pvr_page_table_l0_get_entry_raw() - Access an entry from the raw equivalent
+ *                                     of a mirror level 0 page table.
+ * @table: Target level 0 page table.
+ * @idx: Index of the entry to access.
+ *
+ * Technically this function returns a pointer to a slot in a raw level 0 page
+ * table, since the returned "entry" is not guaranteed to be valid. The caller
+ * must verify the validity of the entry at the returned address (perhaps using
+ * pvr_page_table_l0_entry_raw_is_valid()) before reading or overwriting it.
+ *
+ * The value of @idx is not checked here; it is the callers responsibility to
+ * ensure @idx refers to a valid index within @table before dereferencing the
+ * returned pointer. This is espcially important for level 0 page tables, which
+ * can have a variable number of entries.
+ *
+ * Return:
+ * A pointer to the requested raw level 0 page table entry.
+ */
+static struct pvr_page_table_l0_entry_raw *
+pvr_page_table_l0_get_entry_raw(struct pvr_page_table_l0 *table, u16 idx)
+{
+	return &pvr_page_table_l0_get_raw(table)->entries[idx];
+}
+
+/**
+ * pvr_page_table_l0_entry_is_valid() - Check if a level 0 page table entry is
+ *                                      marked as valid.
+ * @table: Target level 0 page table.
+ * @idx: Index of the entry to check.
+ *
+ * The value of @idx is not checked here; it is the callers responsibility to
+ * ensure @idx refers to a valid index within @table before calling this
+ * function.
+ */
+static bool
+pvr_page_table_l0_entry_is_valid(struct pvr_page_table_l0 *table, u16 idx)
+{
+	struct pvr_page_table_l0_entry_raw entry_raw =
+		*pvr_page_table_l0_get_entry_raw(table, idx);
+
+	return pvr_page_table_l0_entry_raw_is_valid(entry_raw);
+}
+
+/**
+ * struct pvr_mmu_context - context holding data for operations at page
+ * catalogue level, intended for use with a VM context.
+ */
+struct pvr_mmu_context {
+	/** @pvr_dev: The PVR device associated with the owning VM context. */
+	struct pvr_device *pvr_dev;
+
+	/** @page_table_l2: The MMU table root. */
+	struct pvr_page_table_l2 page_table_l2;
+};
+
+enum pvr_mmu_sync_level {
+	PVR_MMU_SYNC_LEVEL_NONE = -1,
+	PVR_MMU_SYNC_LEVEL_0 = 0,
+	PVR_MMU_SYNC_LEVEL_1 = 1,
+	PVR_MMU_SYNC_LEVEL_2 = 2,
+};
+
+/**
+ * struct pvr_page_table_ptr - A reference to a single physical page as indexed
+ * by the page table structure.
+ *
+ * Intended for embedding in a &struct pvr_mmu_op_context.
+ */
+struct pvr_page_table_ptr {
+	/**
+	 * @l1_table: A cached handle to the level 1 page table the
+	 * context is currently traversing.
+	 */
+	struct pvr_page_table_l1 *l1_table;
+
+	/**
+	 * @l0_table: A cached handle to the level 0 page table the
+	 * context is currently traversing.
+	 */
+	struct pvr_page_table_l0 *l0_table;
+
+	/**
+	 * @l2_idx: Index into the level 2 page table the context is
+	 * currently referencing.
+	 */
+	u16 l2_idx;
+
+	/**
+	 * @l1_idx: Index into the level 1 page table the context is
+	 * currently referencing.
+	 */
+	u16 l1_idx;
+
+	/**
+	 * @l0_idx: Index into the level 0 page table the context is
+	 * currently referencing.
+	 */
+	u16 l0_idx;
+};
+
+/**
+ * struct pvr_mmu_op_context - context holding data for individual
+ * device-virtual mapping operations. Intended for use with a VM bind operation.
+ */
+struct pvr_mmu_op_context {
+	/** @mmu_ctx: The MMU context associated with the owning VM context. */
+	struct pvr_mmu_context *mmu_ctx;
+
+	/** @map: Data specifically for map operations. */
+	struct {
+		/**
+		 * @sgt: Scatter gather table containing pages pinned for use by
+		 * this context - these are currently pinned when initialising
+		 * the VM bind operation.
+		 */
+		struct sg_table *sgt;
+
+		/** @sgt_offset: Start address of the device-virtual mapping. */
+		u64 sgt_offset;
+	} map;
+
+	/**
+	 * @l1_free_tables: Preallocated l1 page table objects for use by this
+	 * context when creating a page mapping. Linked list created during
+	 * initialisation. Also used to collect page table objects freed by an
+	 * unmap.
+	 */
+	struct pvr_page_table_l1 *l1_free_tables;
+
+	/**
+	 * @l0_free_tables: Preallocated l0 page table objects for use by this
+	 * context when creating a page mapping. Linked list created during
+	 * initialisation. Also used to collect page table objects freed by an
+	 * unmap.
+	 */
+	struct pvr_page_table_l0 *l0_free_tables;
+
+	/**
+	 * @curr_page - A reference to a single physical page as indexed by
+	 * the page table structure.
+	 */
+	struct pvr_page_table_ptr curr_page;
+
+	/**
+	 * @sync_level_required: The maximum level of the page table tree
+	 * structure which has (possibly) been modified since it was last
+	 * flushed to the device.
+	 *
+	 * This field should only be set with pvr_mmu_op_context_require_sync()
+	 * or indirectly by pvr_mmu_op_context_sync_partial().
+	 */
+	enum pvr_mmu_sync_level sync_level_required;
+};
+
+/**
+ * pvr_page_table_l2_insert() - Insert an entry referring to a level 1 page
+ * table into a level 2 page table.
+ * @op_ctx: Target MMU op context pointing at the entry to insert the L1 page
+ * table into.
+ * @child_table: Target level 1 page table to be referenced by the new entry.
+ *
+ * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
+ * valid L2 entry.
+ */
+static void
+pvr_page_table_l2_insert(struct pvr_mmu_op_context *op_ctx,
+			 struct pvr_page_table_l1 *child_table)
+{
+	struct pvr_page_table_l2 *l2_table =
+		&op_ctx->mmu_ctx->page_table_l2;
+	struct pvr_page_table_l2_entry_raw *entry_raw =
+		pvr_page_table_l2_get_entry_raw(l2_table,
+						op_ctx->curr_page.l2_idx);
+
+	pvr_page_table_l2_entry_raw_set(entry_raw,
+					child_table->backing_page.dma_addr);
+
+	child_table->parent = l2_table;
+	child_table->parent_idx = op_ctx->curr_page.l2_idx;
+	l2_table->entries[op_ctx->curr_page.l2_idx] = child_table;
+	++l2_table->entry_count;
+	op_ctx->curr_page.l1_table = child_table;
+}
+
+/**
+ * pvr_page_table_l2_remove() - Remove a level 1 page table from a level 2 page
+ * table.
+ * @op_ctx: Target MMU op context pointing at the L2 entry to remove.
+ *
+ * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
+ * valid L2 entry.
+ */
+static void
+pvr_page_table_l2_remove(struct pvr_mmu_op_context *op_ctx)
+{
+	struct pvr_page_table_l2 *l2_table =
+		&op_ctx->mmu_ctx->page_table_l2;
+	struct pvr_page_table_l2_entry_raw *entry_raw =
+		pvr_page_table_l2_get_entry_raw(l2_table,
+						op_ctx->curr_page.l1_table->parent_idx);
+
+	WARN_ON(op_ctx->curr_page.l1_table->parent != l2_table);
+
+	pvr_page_table_l2_entry_raw_clear(entry_raw);
+
+	l2_table->entries[op_ctx->curr_page.l1_table->parent_idx] = NULL;
+	op_ctx->curr_page.l1_table->parent_idx = PVR_IDX_INVALID;
+	op_ctx->curr_page.l1_table->next_free = op_ctx->l1_free_tables;
+	op_ctx->l1_free_tables = op_ctx->curr_page.l1_table;
+	op_ctx->curr_page.l1_table = NULL;
+
+	--l2_table->entry_count;
+}
+
+/**
+ * pvr_page_table_l1_insert() - Insert an entry referring to a level 0 page
+ * table into a level 1 page table.
+ * @op_ctx: Target MMU op context pointing at the entry to insert the L0 page
+ * table into.
+ * @child_table: L0 page table to insert.
+ *
+ * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
+ * valid L1 entry.
+ */
+static void
+pvr_page_table_l1_insert(struct pvr_mmu_op_context *op_ctx,
+			 struct pvr_page_table_l0 *child_table)
+{
+	struct pvr_page_table_l1_entry_raw *entry_raw =
+		pvr_page_table_l1_get_entry_raw(op_ctx->curr_page.l1_table,
+						op_ctx->curr_page.l1_idx);
+
+	pvr_page_table_l1_entry_raw_set(entry_raw,
+					child_table->backing_page.dma_addr);
+
+	child_table->parent = op_ctx->curr_page.l1_table;
+	child_table->parent_idx = op_ctx->curr_page.l1_idx;
+	op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l1_idx] = child_table;
+	++op_ctx->curr_page.l1_table->entry_count;
+	op_ctx->curr_page.l0_table = child_table;
+}
+
+/**
+ * pvr_page_table_l1_remove() - Remove a level 0 page table from a level 1 page
+ *                              table.
+ * @op_ctx: Target MMU op context pointing at the L1 entry to remove.
+ *
+ * If this function results in the L1 table becoming empty, it will be removed
+ * from its parent level 2 page table and destroyed.
+ *
+ * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
+ * valid L1 entry.
+ */
+static void
+pvr_page_table_l1_remove(struct pvr_mmu_op_context *op_ctx)
+{
+	struct pvr_page_table_l1_entry_raw *entry_raw =
+		pvr_page_table_l1_get_entry_raw(op_ctx->curr_page.l0_table->parent,
+						op_ctx->curr_page.l0_table->parent_idx);
+
+	WARN_ON(op_ctx->curr_page.l0_table->parent !=
+		op_ctx->curr_page.l1_table);
+
+	pvr_page_table_l1_entry_raw_clear(entry_raw);
+
+	op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l0_table->parent_idx] = NULL;
+	op_ctx->curr_page.l0_table->parent_idx = PVR_IDX_INVALID;
+	op_ctx->curr_page.l0_table->next_free = op_ctx->l0_free_tables;
+	op_ctx->l0_free_tables = op_ctx->curr_page.l0_table;
+	op_ctx->curr_page.l0_table = NULL;
+
+	if (--op_ctx->curr_page.l1_table->entry_count == 0) {
+		/* Clear the parent L2 page table entry. */
+		if (op_ctx->curr_page.l1_table->parent_idx != PVR_IDX_INVALID)
+			pvr_page_table_l2_remove(op_ctx);
+	}
+}
+
+/**
+ * pvr_page_table_l0_insert() - Insert an entry referring to a physical page
+ * into a level 0 page table.
+ * @op_ctx: Target MMU op context pointing at the L0 entry to insert.
+ * @dma_addr: Target DMA address to be referenced by the new entry.
+ * @flags: Page options to be stored in the new entry.
+ *
+ * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
+ * valid L0 entry.
+ */
+static void
+pvr_page_table_l0_insert(struct pvr_mmu_op_context *op_ctx,
+			 dma_addr_t dma_addr, struct pvr_page_flags_raw flags)
+{
+	struct pvr_page_table_l0_entry_raw *entry_raw =
+		pvr_page_table_l0_get_entry_raw(op_ctx->curr_page.l0_table,
+						op_ctx->curr_page.l0_idx);
+
+	pvr_page_table_l0_entry_raw_set(entry_raw, dma_addr, flags);
+
+	/*
+	 * There is no entry to set here - we don't keep a mirror of
+	 * individual pages.
+	 */
+
+	++op_ctx->curr_page.l0_table->entry_count;
+}
+
+/**
+ * pvr_page_table_l0_remove() - Remove a physical page from a level 0 page
+ * table.
+ * @op_ctx: Target MMU op context pointing at the L0 entry to remove.
+ *
+ * If this function results in the L0 table becoming empty, it will be removed
+ * from its parent L1 page table and destroyed.
+ *
+ * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
+ * valid L0 entry.
+ */
+static void
+pvr_page_table_l0_remove(struct pvr_mmu_op_context *op_ctx)
+{
+	struct pvr_page_table_l0_entry_raw *entry_raw =
+		pvr_page_table_l0_get_entry_raw(op_ctx->curr_page.l0_table,
+						op_ctx->curr_page.l0_idx);
+
+	pvr_page_table_l0_entry_raw_clear(entry_raw);
+
+	/*
+	 * There is no entry to clear here - we don't keep a mirror of
+	 * individual pages.
+	 */
+
+	if (--op_ctx->curr_page.l0_table->entry_count == 0) {
+		/* Clear the parent L1 page table entry. */
+		if (op_ctx->curr_page.l0_table->parent_idx != PVR_IDX_INVALID)
+			pvr_page_table_l1_remove(op_ctx);
+	}
+}
+
+/**
+ * DOC: Page table index utilities
+ */
+
+/**
+ * pvr_page_table_l2_idx() - Calculate the level 2 page table index for a
+ *                           device-virtual address.
+ * @device_addr: Target device-virtual address.
+ *
+ * This function does not perform any bounds checking - it is the caller's
+ * responsibility to ensure that @device_addr is valid before interpreting
+ * the result.
+ *
+ * Return:
+ * The index into a level 2 page table corresponding to @device_addr.
+ */
+static u16
+pvr_page_table_l2_idx(u64 device_addr)
+{
+	return (device_addr & ~ROGUE_MMUCTRL_VADDR_PC_INDEX_CLRMSK) >>
+	       ROGUE_MMUCTRL_VADDR_PC_INDEX_SHIFT;
+}
+
+/**
+ * pvr_page_table_l1_idx() - Calculate the level 1 page table index for a
+ *                           device-virtual address.
+ * @device_addr: Target device-virtual address.
+ *
+ * This function does not perform any bounds checking - it is the caller's
+ * responsibility to ensure that @device_addr is valid before interpreting
+ * the result.
+ *
+ * Return:
+ * The index into a level 1 page table corresponding to @device_addr.
+ */
+static u16
+pvr_page_table_l1_idx(u64 device_addr)
+{
+	return (device_addr & ~ROGUE_MMUCTRL_VADDR_PD_INDEX_CLRMSK) >>
+	       ROGUE_MMUCTRL_VADDR_PD_INDEX_SHIFT;
+}
+
+/**
+ * pvr_page_table_l0_idx() - Calculate the level 0 page table index for a
+ *                           device-virtual address.
+ * @device_addr: Target device-virtual address.
+ *
+ * This function does not perform any bounds checking - it is the caller's
+ * responsibility to ensure that @device_addr is valid before interpreting
+ * the result.
+ *
+ * Return:
+ * The index into a level 0 page table corresponding to @device_addr.
+ */
+static u16
+pvr_page_table_l0_idx(u64 device_addr)
+{
+	return (device_addr & ~ROGUE_MMUCTRL_VADDR_PT_INDEX_CLRMSK) >>
+	       ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT;
+}
+
+/**
+ * DOC: High-level page table operations
+ */
+
+/**
+ * pvr_page_table_l1_get_or_insert() - Retrieves (optionally inserting if
+ * necessary) a level 1 page table from the specified level 2 page table entry.
+ * @op_ctx: Target MMU op context.
+ * @should_insert: [IN] Specifies whether new page tables should be inserted
+ * when empty page table entries are encountered during traversal.
+ *
+ * Return:
+ *  * 0 on success, or
+ *
+ *    If @should_insert is %false:
+ *     * -%ENXIO if a level 1 page table would have been inserted.
+ *
+ *    If @should_insert is %true:
+ *     * Any error encountered while inserting the level 1 page table.
+ */
+static int
+pvr_page_table_l1_get_or_insert(struct pvr_mmu_op_context *op_ctx,
+				bool should_insert)
+{
+	struct pvr_page_table_l2 *l2_table =
+		&op_ctx->mmu_ctx->page_table_l2;
+	struct pvr_page_table_l1 *table;
+	int err;
+
+	if (pvr_page_table_l2_entry_is_valid(l2_table,
+					     op_ctx->curr_page.l2_idx)) {
+		op_ctx->curr_page.l1_table =
+			l2_table->entries[op_ctx->curr_page.l2_idx];
+		return 0;
+	}
+
+	if (!should_insert)
+		return -ENXIO;
+
+	/* Take a prealloced table. */
+	table = op_ctx->l1_free_tables;
+	if (!table)
+		return -ENOMEM;
+
+	err = pvr_page_table_l1_init(table, op_ctx->mmu_ctx->pvr_dev);
+	if (err)
+		return err;
+
+	/* Pop */
+	op_ctx->l1_free_tables = table->next_free;
+	table->next_free = NULL;
+
+	pvr_page_table_l2_insert(op_ctx, table);
+
+	return 0;
+}
+
+/**
+ * pvr_page_table_l0_get_or_insert() - Retrieves (optionally inserting if
+ * necessary) a level 0 page table from the specified level 1 page table entry.
+ * @op_ctx: Target MMU op context.
+ * @should_insert: [IN] Specifies whether new page tables should be inserted
+ * when empty page table entries are encountered during traversal.
+ *
+ * Return:
+ *  * 0 on success,
+ *
+ *    If @should_insert is %false:
+ *     * -%ENXIO if a level 0 page table would have been inserted.
+ *
+ *    If @should_insert is %true:
+ *     * Any error encountered while inserting the level 0 page table.
+ */
+static int
+pvr_page_table_l0_get_or_insert(struct pvr_mmu_op_context *op_ctx,
+				bool should_insert)
+{
+	struct pvr_page_table_l0 *table;
+	int err;
+
+	if (pvr_page_table_l1_entry_is_valid(op_ctx->curr_page.l1_table,
+					     op_ctx->curr_page.l1_idx)) {
+		op_ctx->curr_page.l0_table =
+			op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l1_idx];
+		return 0;
+	}
+
+	if (!should_insert)
+		return -ENXIO;
+
+	/* Take a prealloced table. */
+	table = op_ctx->l0_free_tables;
+	if (!table)
+		return -ENOMEM;
+
+	err = pvr_page_table_l0_init(table, op_ctx->mmu_ctx->pvr_dev);
+	if (err)
+		return err;
+
+	/* Pop */
+	op_ctx->l0_free_tables = table->next_free;
+	table->next_free = NULL;
+
+	pvr_page_table_l1_insert(op_ctx, table);
+
+	return err;
+}
+
+/**
+ * pvr_mmu_context_create() - Create an MMU context.
+ * @pvr_dev: PVR device associated with owning VM context.
+ *
+ * Returns:
+ *  * Newly created MMU context object on success, or
+ *  * -%ENOMEM if no memory is available,
+ *  * Any error code returned by pvr_page_table_l2_init().
+ */
+struct pvr_mmu_context *pvr_mmu_context_create(struct pvr_device *pvr_dev)
+{
+	struct pvr_mmu_context *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	int err;
+
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	err = pvr_page_table_l2_init(&ctx->page_table_l2, pvr_dev);
+	if (err)
+		return ERR_PTR(err);
+
+	ctx->pvr_dev = pvr_dev;
+
+	return ctx;
+}
+
+/**
+ * pvr_mmu_context_destroy() - Destroy an MMU context.
+ * @ctx: Target MMU context.
+ */
+void pvr_mmu_context_destroy(struct pvr_mmu_context *ctx)
+{
+	pvr_page_table_l2_fini(&ctx->page_table_l2);
+	kfree(ctx);
+}
+
+/**
+ * pvr_mmu_get_root_table_dma_addr() - Get the DMA address of the root of the
+ * page table structure behind a VM context.
+ * @root: Target MMU page table root.
+ */
+dma_addr_t pvr_mmu_get_root_table_dma_addr(struct pvr_mmu_context *ctx)
+{
+	return ctx->page_table_l2.backing_page.dma_addr;
+}
+
+/**
+ * pvr_page_table_l1_alloc() - Allocate a l1 page_table object.
+ * @ctx: MMU context of owning VM context.
+ *
+ * Returns:
+ *  * Newly created page table object on success, or
+ *  * -%ENOMEM if no memory is available,
+ *  * Any error code returned by pvr_page_table_l1_init().
+ */
+static struct pvr_page_table_l1 *
+pvr_page_table_l1_alloc(struct pvr_mmu_context *ctx)
+{
+	int err;
+
+	struct pvr_page_table_l1 *table =
+		kzalloc(sizeof(*table), GFP_KERNEL);
+
+	if (!table)
+		return ERR_PTR(-ENOMEM);
+
+	err = pvr_page_table_l1_init(table, ctx->pvr_dev);
+	if (err) {
+		kfree(table);
+		return ERR_PTR(err);
+	}
+
+	return table;
+}
+
+/**
+ * pvr_page_table_l0_alloc() - Allocate a l0 page_table object.
+ * @ctx: MMU context of owning VM context.
+ *
+ * Returns:
+ *  * Newly created page table object on success, or
+ *  * -%ENOMEM if no memory is available,
+ *  * Any error code returned by pvr_page_table_l0_init().
+ */
+static struct pvr_page_table_l0 *
+pvr_page_table_l0_alloc(struct pvr_mmu_context *ctx)
+{
+	int err;
+
+	struct pvr_page_table_l0 *table =
+		kzalloc(sizeof(*table), GFP_KERNEL);
+
+	if (!table)
+		return ERR_PTR(-ENOMEM);
+
+	err = pvr_page_table_l0_init(table, ctx->pvr_dev);
+	if (err) {
+		kfree(table);
+		return ERR_PTR(err);
+	}
+
+	return table;
+}
+
+/**
+ * pvr_mmu_op_context_require_sync() - Mark an MMU op context as requiring a
+ * sync operation for the referenced page tables up to a specified level.
+ * @op_ctx: Target MMU op context.
+ * @level: Maximum page table level for which a sync is required.
+ */
+static void
+pvr_mmu_op_context_require_sync(struct pvr_mmu_op_context *op_ctx,
+				enum pvr_mmu_sync_level level)
+{
+	if (op_ctx->sync_level_required < level)
+		op_ctx->sync_level_required = level;
+}
+
+/**
+ * pvr_mmu_op_context_sync_manual() - Trigger a sync of some or all of the
+ * page tables referenced by a MMU op context.
+ * @op_ctx: Target MMU op context.
+ * @level: Maximum page table level to sync.
+ *
+ * Do not call this function directly. Instead use
+ * pvr_mmu_op_context_sync_partial() which is checked against the current
+ * value of &op_ctx->sync_level_required as set by
+ * pvr_mmu_op_context_require_sync().
+ */
+static void
+pvr_mmu_op_context_sync_manual(struct pvr_mmu_op_context *op_ctx,
+			       enum pvr_mmu_sync_level level)
+{
+	/*
+	 * We sync the page table levels in ascending order (starting from the
+	 * leaf node) to ensure consistency.
+	 */
+
+	WARN_ON(level < PVR_MMU_SYNC_LEVEL_NONE);
+
+	if (level <= PVR_MMU_SYNC_LEVEL_NONE)
+		return;
+
+	if (op_ctx->curr_page.l0_table)
+		pvr_page_table_l0_sync(op_ctx->curr_page.l0_table);
+
+	if (level < PVR_MMU_SYNC_LEVEL_1)
+		return;
+
+	if (op_ctx->curr_page.l1_table)
+		pvr_page_table_l1_sync(op_ctx->curr_page.l1_table);
+
+	if (level < PVR_MMU_SYNC_LEVEL_2)
+		return;
+
+	pvr_page_table_l2_sync(&op_ctx->mmu_ctx->page_table_l2);
+}
+
+/**
+ * pvr_mmu_op_context_sync_partial() - Trigger a sync of some or all of the
+ * page tables referenced by a MMU op context.
+ * @op_ctx: Target MMU op context.
+ * @level: Requested page table level to sync up to (inclusive).
+ *
+ * If @level is greater than the maximum level recorded by @op_ctx as requiring
+ * a sync operation, only the previously recorded maximum will be used.
+ *
+ * Additionally, if @level is greater than or equal to the maximum level
+ * recorded by @op_ctx as requiring a sync operation, that maximum level will be
+ * reset as a full sync will be performed. This is equivalent to calling
+ * pvr_mmu_op_context_sync().
+ */
+static void
+pvr_mmu_op_context_sync_partial(struct pvr_mmu_op_context *op_ctx,
+				enum pvr_mmu_sync_level level)
+{
+	/*
+	 * If the requested sync level is greater than or equal to the
+	 * currently required sync level, we do two things:
+	 *  * Don't waste time syncing levels we haven't previously marked as
+	 *    requiring a sync, and
+	 *  * Reset the required sync level since we are about to sync
+	 *    everything that was previously marked as requiring a sync.
+	 */
+	if (level >= op_ctx->sync_level_required) {
+		level = op_ctx->sync_level_required;
+		op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
+	}
+
+	pvr_mmu_op_context_sync_manual(op_ctx, level);
+}
+
+/**
+ * pvr_mmu_op_context_sync() - Trigger a sync of every page table referenced by
+ * a MMU op context.
+ * @op_ctx: Target MMU op context.
+ *
+ * The maximum level marked internally as requiring a sync will be reset so
+ * that subsequent calls to this function will be no-ops unless @op_ctx is
+ * otherwise updated.
+ */
+static void
+pvr_mmu_op_context_sync(struct pvr_mmu_op_context *op_ctx)
+{
+	pvr_mmu_op_context_sync_manual(op_ctx, op_ctx->sync_level_required);
+
+	op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
+}
+
+/**
+ * pvr_mmu_op_context_load_tables() - Load pointers to tables in each level of
+ * the page table tree structure needed to reference the physical page
+ * referenced by a MMU op context.
+ * @op_ctx: Target MMU op context.
+ * @should_create: Specifies whether new page tables should be created when
+ * empty page table entries are encountered during traversal.
+ * @load_level_required: Maximum page table level to load.
+ *
+ * If @should_create is %true, this function may modify the stored required
+ * sync level of @op_ctx as new page tables are created and inserted into their
+ * respective parents.
+ *
+ * Since there is only one root page table, it is technically incorrect to call
+ * this function with a value of @load_level_required greater than or equal to
+ * the root level number. However, this is not explicitly disallowed here.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * Any error returned by pvr_page_table_l1_get_or_create() if
+ *    @load_level_required >= 1 except -%ENXIO, or
+ *  * Any error returned by pvr_page_table_l0_get_or_create() if
+ *    @load_level_required >= 0 except -%ENXIO.
+ */
+static int
+pvr_mmu_op_context_load_tables(struct pvr_mmu_op_context *op_ctx,
+			       bool should_create,
+			       enum pvr_mmu_sync_level load_level_required)
+{
+	const struct pvr_page_table_l1 *l1_head_before = op_ctx->l1_free_tables;
+	const struct pvr_page_table_l0 *l0_head_before = op_ctx->l0_free_tables;
+	int err;
+
+	/* Clear tables we're about to fetch in case of error states. */
+	if (load_level_required >= PVR_MMU_SYNC_LEVEL_1)
+		op_ctx->curr_page.l1_table = NULL;
+
+	if (load_level_required >= PVR_MMU_SYNC_LEVEL_0)
+		op_ctx->curr_page.l0_table = NULL;
+
+	/* Get or create L1 page table. */
+	if (load_level_required >= PVR_MMU_SYNC_LEVEL_1) {
+		err = pvr_page_table_l1_get_or_insert(op_ctx, should_create);
+		if (err) {
+			/*
+			 * If @should_create is %false and no L1 page table was
+			 * found, return early but without an error. Since
+			 * pvr_page_table_l1_get_or_create() can only return
+			 * -%ENXIO if @should_create is %false, there is no
+			 * need to check it here.
+			 */
+			if (err == -ENXIO)
+				err = 0;
+
+			return err;
+		}
+	}
+
+	/* Get or create L0 page table. */
+	if (load_level_required >= PVR_MMU_SYNC_LEVEL_0) {
+		err = pvr_page_table_l0_get_or_insert(op_ctx, should_create);
+		if (err) {
+			/*
+			 * If @should_create is %false and no L0 page table was
+			 * found, return early but without an error. Since
+			 * pvr_page_table_l0_get_or_insert() can only return
+			 * -%ENXIO if @should_create is %false, there is no
+			 * need to check it here.
+			 */
+			if (err == -ENXIO)
+				err = 0;
+
+			/*
+			 * At this point, an L1 page table could have been
+			 * inserted but is now empty due to the failed attempt
+			 * at inserting an L0 page table. In this instance, we
+			 * must remove the empty L1 page table ourselves as
+			 * pvr_page_table_l1_remove() is never called as part
+			 * of the error path in
+			 * pvr_page_table_l0_get_or_insert().
+			 */
+			if (l1_head_before != op_ctx->l1_free_tables) {
+				pvr_page_table_l2_remove(op_ctx);
+				pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_2);
+			}
+
+			return err;
+		}
+	}
+
+	/*
+	 * A sync is only needed if table objects were inserted. This can be
+	 * inferred by checking if the pointer at the head of the linked list
+	 * has changed.
+	 */
+	if (l1_head_before != op_ctx->l1_free_tables)
+		pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_2);
+	else if (l0_head_before != op_ctx->l0_free_tables)
+		pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_1);
+
+	return 0;
+}
+
+/**
+ * pvr_mmu_op_context_set_curr_page() - Reassign the current page of an MMU op
+ * context, syncing any page tables previously assigned to it which are no
+ * longer relevant.
+ * @op_ctx: Target MMU op context.
+ * @device_addr: New pointer target.
+ * @should_create: Specify whether new page tables should be created when
+ * empty page table entries are encountered during traversal.
+ *
+ * This function performs a full sync on the pointer, regardless of which
+ * levels are modified.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_mmu_op_context_load_tables().
+ */
+static int
+pvr_mmu_op_context_set_curr_page(struct pvr_mmu_op_context *op_ctx,
+				 u64 device_addr, bool should_create)
+{
+	pvr_mmu_op_context_sync(op_ctx);
+
+	op_ctx->curr_page.l2_idx = pvr_page_table_l2_idx(device_addr);
+	op_ctx->curr_page.l1_idx = pvr_page_table_l1_idx(device_addr);
+	op_ctx->curr_page.l0_idx = pvr_page_table_l0_idx(device_addr);
+	op_ctx->curr_page.l1_table = NULL;
+	op_ctx->curr_page.l0_table = NULL;
+
+	return pvr_mmu_op_context_load_tables(op_ctx, should_create,
+					      PVR_MMU_SYNC_LEVEL_1);
+}
+
+/**
+ * pvr_mmu_op_context_next_page() - Advance the current page of an MMU op
+ * context.
+ * @op_ctx: Target MMU op context.
+ * @should_create: Specify whether new page tables should be created when
+ * empty page table entries are encountered during traversal.
+ *
+ * If @should_create is %false, it is the caller's responsibility to verify that
+ * the state of the table references in @op_ctx is valid on return. If -%ENXIO
+ * is returned, at least one of the table references is invalid. It should be
+ * noted that @op_ctx as a whole will be left in a valid state if -%ENXIO is
+ * returned, unlike other error codes. The caller should check which references
+ * are invalid by comparing them to %NULL. Only &@ptr->l2_table is guaranteed
+ * to be valid, since it represents the root of the page table tree structure.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * -%EPERM if the operation would wrap at the top of the page table
+ *    hierarchy,
+ *  * -%ENXIO if @should_create is %false and a page table of any level would
+ *    have otherwise been created, or
+ *  * Any error returned while attempting to create missing page tables if
+ *    @should_create is %true.
+ */
+static int
+pvr_mmu_op_context_next_page(struct pvr_mmu_op_context *op_ctx,
+			     bool should_create)
+{
+	s8 load_level_required = PVR_MMU_SYNC_LEVEL_NONE;
+
+	if (++op_ctx->curr_page.l0_idx != ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X)
+		goto load_tables;
+
+	op_ctx->curr_page.l0_idx = 0;
+	load_level_required = PVR_MMU_SYNC_LEVEL_0;
+
+	if (++op_ctx->curr_page.l1_idx != ROGUE_MMUCTRL_ENTRIES_PD_VALUE)
+		goto load_tables;
+
+	op_ctx->curr_page.l1_idx = 0;
+	load_level_required = PVR_MMU_SYNC_LEVEL_1;
+
+	if (++op_ctx->curr_page.l2_idx != ROGUE_MMUCTRL_ENTRIES_PC_VALUE)
+		goto load_tables;
+
+	/*
+	 * If the pattern continued, we would set &op_ctx->curr_page.l2_idx to
+	 * zero here. However, that would wrap the top layer of the page table
+	 * hierarchy which is not a valid operation. Instead, we warn and return
+	 * an error.
+	 */
+	WARN(true,
+	     "%s(%p) attempted to loop the top of the page table hierarchy",
+	     __func__, op_ctx);
+	return -EPERM;
+
+	/* If indices have wrapped, we need to load new tables. */
+load_tables:
+	/* First, flush tables which will be unloaded. */
+	pvr_mmu_op_context_sync_partial(op_ctx, load_level_required);
+
+	/* Then load tables from the required level down. */
+	return pvr_mmu_op_context_load_tables(op_ctx, should_create,
+					      load_level_required);
+}
+
+/**
+ * DOC: Single page operations
+ */
+
+/**
+ * pvr_page_create() - Create a device-virtual memory page and insert it into
+ * a level 0 page table.
+ * @op_ctx: Target MMU op context pointing at the device-virtual address of the
+ * target page.
+ * @dma_addr: DMA address of the physical page backing the created page.
+ * @flags: Page options saved on the level 0 page table entry for reading by
+ *         the device.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * -%EEXIST if the requested page already exists.
+ */
+static int
+pvr_page_create(struct pvr_mmu_op_context *op_ctx, dma_addr_t dma_addr,
+		struct pvr_page_flags_raw flags)
+{
+	/* Do not create a new page if one already exists. */
+	if (pvr_page_table_l0_entry_is_valid(op_ctx->curr_page.l0_table,
+					     op_ctx->curr_page.l0_idx)) {
+		return -EEXIST;
+	}
+
+	pvr_page_table_l0_insert(op_ctx, dma_addr, flags);
+
+	pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
+
+	return 0;
+}
+
+/**
+ * pvr_page_destroy() - Destroy a device page after removing it from its
+ * parent level 0 page table.
+ * @op_ctx: Target MMU op context.
+ * @ptr: Page table pointer to the device-virtual address of the target page.
+ */
+static void
+pvr_page_destroy(struct pvr_mmu_op_context *op_ctx)
+{
+	/* Do nothing if the page does not exist. */
+	if (!pvr_page_table_l0_entry_is_valid(op_ctx->curr_page.l0_table,
+					      op_ctx->curr_page.l0_idx)) {
+		return;
+	}
+
+	/* Clear the parent L0 page table entry. */
+	pvr_page_table_l0_remove(op_ctx);
+
+	pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
+}
+
+/**
+ * pvr_mmu_op_context_destroy() - Destroy an MMU op context.
+ * @op_ctx: Target MMU op context.
+ */
+void pvr_mmu_op_context_destroy(struct pvr_mmu_op_context *op_ctx)
+{
+	const bool flush_caches =
+		op_ctx->sync_level_required != PVR_MMU_SYNC_LEVEL_NONE;
+
+	pvr_mmu_op_context_sync(op_ctx);
+
+	if (flush_caches)
+		WARN_ON(pvr_mmu_flush(op_ctx->mmu_ctx->pvr_dev));
+
+	while (op_ctx->l0_free_tables) {
+		struct pvr_page_table_l0 *tmp = op_ctx->l0_free_tables;
+
+		op_ctx->l0_free_tables = op_ctx->l0_free_tables->next_free;
+		pvr_page_table_l0_free(tmp);
+	}
+
+	while (op_ctx->l1_free_tables) {
+		struct pvr_page_table_l1 *tmp = op_ctx->l1_free_tables;
+
+		op_ctx->l1_free_tables = op_ctx->l1_free_tables->next_free;
+		pvr_page_table_l1_free(tmp);
+	}
+
+	kfree(op_ctx);
+}
+
+/**
+ * pvr_mmu_op_context_create() - Create an MMU op context.
+ * @ctx: MMU context associated with owning VM context.
+ * @sgt: Scatter gather table containing pages pinned for use by this context.
+ * @sgt_offset: Start offset of the requested device-virtual memory mapping.
+ * @size: Size in bytes of the requested device-virtual memory mapping. For an
+ * unmapping, this should be zero so that no page tables are allocated.
+ *
+ * Returns:
+ *  * Newly created MMU op context object on success, or
+ *  * -%ENOMEM if no memory is available,
+ *  * Any error code returned by pvr_page_table_l2_init().
+ */
+struct pvr_mmu_op_context *
+pvr_mmu_op_context_create(struct pvr_mmu_context *ctx, struct sg_table *sgt,
+			  u64 sgt_offset, u64 size)
+{
+	int err;
+
+	struct pvr_mmu_op_context *op_ctx =
+		kzalloc(sizeof(*op_ctx), GFP_KERNEL);
+
+	if (!op_ctx)
+		return ERR_PTR(-ENOMEM);
+
+	op_ctx->mmu_ctx = ctx;
+	op_ctx->map.sgt = sgt;
+	op_ctx->map.sgt_offset = sgt_offset;
+	op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
+
+	if (size) {
+		const u32 l1_start_idx = pvr_page_table_l2_idx(sgt_offset);
+		const u32 l1_end_idx = pvr_page_table_l2_idx(sgt_offset + size);
+		const u32 l1_count = l1_end_idx - l1_start_idx + 1;
+		const u32 l0_start_idx = pvr_page_table_l1_idx(sgt_offset);
+		const u32 l0_end_idx = pvr_page_table_l1_idx(sgt_offset + size);
+		const u32 l0_count = l0_end_idx - l0_start_idx + 1;
+
+		/*
+		 * Alloc and push page table entries until we have enough of
+		 * each type, ending with linked lists of l0 and l1 entries in
+		 * reverse order.
+		 */
+		for (int i = 0; i < l1_count; i++) {
+			struct pvr_page_table_l1 *l1_tmp =
+				pvr_page_table_l1_alloc(ctx);
+
+			err = PTR_ERR_OR_ZERO(l1_tmp);
+			if (err)
+				goto err_cleanup;
+
+			l1_tmp->next_free = op_ctx->l1_free_tables;
+			op_ctx->l1_free_tables = l1_tmp;
+		}
+
+		for (int i = 0; i < l0_count; i++) {
+			struct pvr_page_table_l0 *l0_tmp =
+				pvr_page_table_l0_alloc(ctx);
+
+			err = PTR_ERR_OR_ZERO(l0_tmp);
+			if (err)
+				goto err_cleanup;
+
+			l0_tmp->next_free = op_ctx->l0_free_tables;
+			op_ctx->l0_free_tables = l0_tmp;
+		}
+	}
+
+	return op_ctx;
+
+err_cleanup:
+	pvr_mmu_op_context_destroy(op_ctx);
+
+	return ERR_PTR(err);
+}
+
+/**
+ * pvr_mmu_op_context_unmap_curr_page() - Unmap pages from a memory context
+ * starting from the current page of an MMU op context.
+ * @op_ctx: Target MMU op context pointing at the first page to unmap.
+ * @nr_pages: Number of pages to unmap.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error encountered while advancing @op_ctx.curr_page with
+ *    pvr_mmu_op_context_next_page() (except -%ENXIO).
+ */
+static int
+pvr_mmu_op_context_unmap_curr_page(struct pvr_mmu_op_context *op_ctx,
+				   u64 nr_pages)
+{
+	int err;
+
+	if (nr_pages == 0)
+		return 0;
+
+	/*
+	 * Destroy first page outside loop, as it doesn't require a page
+	 * advance beforehand. If the L0 page table reference in
+	 * @op_ctx.curr_page is %NULL, there cannot be a mapped page at
+	 * @op_ctx.curr_page (so skip ahead).
+	 */
+	if (op_ctx->curr_page.l0_table)
+		pvr_page_destroy(op_ctx);
+
+	for (u64 page = 1; page < nr_pages; ++page) {
+		err = pvr_mmu_op_context_next_page(op_ctx, false);
+		/*
+		 * If the page table tree structure at @op_ctx.curr_page is
+		 * incomplete, skip ahead. We don't care about unmapping pages
+		 * that cannot exist.
+		 *
+		 * FIXME: This could be made more efficient by jumping ahead
+		 * using pvr_mmu_op_context_set_curr_page().
+		 */
+		if (err == -ENXIO)
+			continue;
+		else if (err)
+			return err;
+
+		pvr_page_destroy(op_ctx);
+	}
+
+	return 0;
+}
+
+/**
+ * pvr_mmu_unmap() - Unmap pages from a memory context.
+ * @op_ctx: Target MMU op context.
+ * @device_addr: First device-virtual address to unmap.
+ * @size: Size in bytes to unmap.
+ *
+ * The total amount of device-virtual memory unmapped is
+ * @nr_pages * %PVR_DEVICE_PAGE_SIZE.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * Any error code returned by pvr_page_table_ptr_init(), or
+ *  * Any error code returned by pvr_page_table_ptr_unmap().
+ */
+int pvr_mmu_unmap(struct pvr_mmu_op_context *op_ctx, u64 device_addr, u64 size)
+{
+	int err = pvr_mmu_op_context_set_curr_page(op_ctx, device_addr, false);
+
+	if (err)
+		return err;
+
+	return pvr_mmu_op_context_unmap_curr_page(op_ctx,
+						  size >> PVR_DEVICE_PAGE_SHIFT);
+}
+
+/**
+ * pvr_mmu_map_sgl() - Map part of a scatter-gather table entry to
+ * device-virtual memory.
+ * @op_ctx: Target MMU op context pointing to the first page that should be
+ * mapped.
+ * @sgl: Target scatter-gather table entry.
+ * @offset: Offset into @sgl to map from. Must result in a starting address
+ * from @sgl which is CPU page-aligned.
+ * @size: Size of the memory to be mapped in bytes. Must be a non-zero multiple
+ * of the device page size.
+ * @page_flags: Page options to be applied to every device-virtual memory page
+ * in the created mapping.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * -%EINVAL if the range specified by @offset and @size is not completely
+ *    within @sgl, or
+ *  * Any error encountered while creating a page with pvr_page_create(), or
+ *  * Any error encountered while advancing @op_ctx.curr_page with
+ *    pvr_mmu_op_context_next_page().
+ */
+static int
+pvr_mmu_map_sgl(struct pvr_mmu_op_context *op_ctx, struct scatterlist *sgl,
+		u64 offset, u64 size, struct pvr_page_flags_raw page_flags)
+{
+	const unsigned int pages = size >> PVR_DEVICE_PAGE_SHIFT;
+	dma_addr_t dma_addr = sg_dma_address(sgl) + offset;
+	const unsigned int dma_len = sg_dma_len(sgl);
+	struct pvr_page_table_ptr ptr_copy;
+	unsigned int page;
+	int err;
+
+	if (size > dma_len || offset > dma_len - size)
+		return -EINVAL;
+
+	/*
+	 * Before progressing, save a copy of the start pointer so we can use
+	 * it again if we enter an error state and have to destroy pages.
+	 */
+	memcpy(&ptr_copy, &op_ctx->curr_page, sizeof(ptr_copy));
+
+	/*
+	 * Create first page outside loop, as it doesn't require a page advance
+	 * beforehand.
+	 */
+	err = pvr_page_create(op_ctx, dma_addr, page_flags);
+	if (err)
+		return err;
+
+	for (page = 1; page < pages; ++page) {
+		err = pvr_mmu_op_context_next_page(op_ctx, true);
+		if (err)
+			goto err_destroy_pages;
+
+		dma_addr += PVR_DEVICE_PAGE_SIZE;
+
+		err = pvr_page_create(op_ctx, dma_addr, page_flags);
+		if (err)
+			goto err_destroy_pages;
+	}
+
+	return 0;
+
+err_destroy_pages:
+	memcpy(&op_ctx->curr_page, &ptr_copy, sizeof(op_ctx->curr_page));
+	err = pvr_mmu_op_context_unmap_curr_page(op_ctx, page);
+
+	return err;
+}
+
+/**
+ * pvr_mmu_map() - Map an object's virtual memory to physical memory.
+ * @op_ctx: Target MMU op context.
+ * @size: Size of memory to be mapped in bytes. Must be a non-zero multiple
+ * of the device page size.
+ * @flags: Flags from pvr_gem_object associated with the mapping.
+ * @device_addr: Virtual device address to map to. Must be device page-aligned.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * Any error code returned by pvr_page_table_ptr_init(), or
+ *  * Any error code returned by pvr_mmu_map_sgl(), or
+ *  * Any error code returned by pvr_page_table_ptr_next_page().
+ */
+int pvr_mmu_map(struct pvr_mmu_op_context *op_ctx, u64 size, u64 flags,
+		u64 device_addr)
+{
+	struct pvr_page_table_ptr ptr_copy;
+	struct pvr_page_flags_raw flags_raw;
+	struct scatterlist *sgl;
+	u64 mapped_size = 0;
+	unsigned int count;
+	int err;
+
+	if (!size)
+		return 0;
+
+	if ((op_ctx->map.sgt_offset | size) & ~PVR_DEVICE_PAGE_MASK)
+		return -EINVAL;
+
+	err = pvr_mmu_op_context_set_curr_page(op_ctx, device_addr, true);
+	if (err)
+		return -EINVAL;
+
+	memcpy(&ptr_copy, &op_ctx->curr_page, sizeof(ptr_copy));
+
+	flags_raw = pvr_page_flags_raw_create(false, false,
+					      flags & DRM_PVR_BO_DEVICE_BYPASS_CACHE,
+					      flags & DRM_PVR_BO_DEVICE_PM_FW_PROTECT);
+
+	/* Map scatter gather table */
+	for_each_sgtable_dma_sg(op_ctx->map.sgt, sgl, count) {
+		const size_t sgl_len = sg_dma_len(sgl);
+		u64 sgl_offset, map_sgl_len;
+
+		if (sgl_len <= op_ctx->map.sgt_offset) {
+			op_ctx->map.sgt_offset -= sgl_len;
+			continue;
+		}
+
+		sgl_offset = op_ctx->map.sgt_offset;
+		map_sgl_len = min_t(u64, sgl_len - sgl_offset, size - mapped_size);
+
+		err = pvr_mmu_map_sgl(op_ctx, sgl, sgl_offset, map_sgl_len,
+				      flags_raw);
+		if (err)
+			break;
+
+		/*
+		 * Flag the L0 page table as requiring a flush when the MMU op
+		 * context is destroyed.
+		 */
+		pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
+
+		op_ctx->map.sgt_offset = 0;
+		mapped_size += map_sgl_len;
+
+		if (mapped_size >= size)
+			break;
+
+		err = pvr_mmu_op_context_next_page(op_ctx, true);
+		if (err)
+			break;
+	}
+
+	if (err && mapped_size) {
+		memcpy(&op_ctx->curr_page, &ptr_copy, sizeof(op_ctx->curr_page));
+		pvr_mmu_op_context_unmap_curr_page(op_ctx,
+						   mapped_size >> PVR_DEVICE_PAGE_SHIFT);
+	}
+
+	return err;
+}
diff --git a/drivers/gpu/drm/imagination/pvr_mmu.h b/drivers/gpu/drm/imagination/pvr_mmu.h
new file mode 100644
index 000000000000..bf93c5ffc86a
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_mmu.h
@@ -0,0 +1,108 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_MMU_H
+#define PVR_MMU_H
+
+#include <linux/memory.h>
+#include <linux/types.h>
+
+/* Forward declaration from "pvr_device.h" */
+struct pvr_device;
+
+/* Forward declaration from "pvr_mmu.c" */
+struct pvr_mmu_context;
+struct pvr_mmu_op_context;
+
+/* Forward declaration from "pvr_vm.c" */
+struct pvr_vm_context;
+
+/* Forward declaration from <linux/scatterlist.h> */
+struct sg_table;
+
+/**
+ * DOC: Public API (constants)
+ *
+ * .. c:macro:: PVR_DEVICE_PAGE_SIZE
+ *
+ *    Fixed page size referenced by leaf nodes in the page table tree
+ *    structure. In the current implementation, this value is pegged to the
+ *    CPU page size (%PAGE_SIZE). It is therefore an error to specify a CPU
+ *    page size which is not also a supported device page size. The supported
+ *    device page sizes are: 4KiB, 16KiB, 64KiB, 256KiB, 1MiB and 2MiB.
+ *
+ * .. c:macro:: PVR_DEVICE_PAGE_SHIFT
+ *
+ *    Shift value used to efficiently multiply or divide by
+ *    %PVR_DEVICE_PAGE_SIZE.
+ *
+ *    This value is derived from %PVR_DEVICE_PAGE_SIZE.
+ *
+ * .. c:macro:: PVR_DEVICE_PAGE_MASK
+ *
+ *    Mask used to round a value down to the nearest multiple of
+ *    %PVR_DEVICE_PAGE_SIZE. When bitwise negated, it will indicate whether a
+ *    value is already a multiple of %PVR_DEVICE_PAGE_SIZE.
+ *
+ *    This value is derived from %PVR_DEVICE_PAGE_SIZE.
+ */
+
+/* PVR_DEVICE_PAGE_SIZE determines the page size */
+#define PVR_DEVICE_PAGE_SIZE (PAGE_SIZE)
+#define PVR_DEVICE_PAGE_SHIFT (PAGE_SHIFT)
+#define PVR_DEVICE_PAGE_MASK (PAGE_MASK)
+
+/**
+ * DOC: Page table index utilities (constants)
+ *
+ * .. c:macro:: PVR_PAGE_TABLE_ADDR_SPACE_SIZE
+ *
+ *    Size of device-virtual address space which can be represented in the page
+ *    table structure.
+ *
+ *    This value is checked at runtime against
+ *    &pvr_device_features.virtual_address_space_bits by
+ *    pvr_vm_create_context(), which will return an error if the feature value
+ *    does not match this constant.
+ *
+ *    .. admonition:: Future work
+ *
+ *       It should be possible to support other values of
+ *       &pvr_device_features.virtual_address_space_bits, but so far no
+ *       hardware has been created which advertises an unsupported value.
+ *
+ * .. c:macro:: PVR_PAGE_TABLE_ADDR_BITS
+ *
+ *    Number of bits needed to represent any value less than
+ *    %PVR_PAGE_TABLE_ADDR_SPACE_SIZE exactly.
+ *
+ * .. c:macro:: PVR_PAGE_TABLE_ADDR_MASK
+ *
+ *    Bitmask of device-virtual addresses which are valid in the page table
+ *    structure.
+ *
+ *    This value is derived from %PVR_PAGE_TABLE_ADDR_SPACE_SIZE, so the same
+ *    notes on that constant apply here.
+ */
+#define PVR_PAGE_TABLE_ADDR_SPACE_SIZE SZ_1T
+#define PVR_PAGE_TABLE_ADDR_BITS __ffs(PVR_PAGE_TABLE_ADDR_SPACE_SIZE)
+#define PVR_PAGE_TABLE_ADDR_MASK (PVR_PAGE_TABLE_ADDR_SPACE_SIZE - 1)
+
+int pvr_mmu_flush(struct pvr_device *pvr_dev);
+
+struct pvr_mmu_context *pvr_mmu_context_create(struct pvr_device *pvr_dev);
+void pvr_mmu_context_destroy(struct pvr_mmu_context *ctx);
+
+dma_addr_t pvr_mmu_get_root_table_dma_addr(struct pvr_mmu_context *ctx);
+
+void pvr_mmu_op_context_destroy(struct pvr_mmu_op_context *op_ctx);
+struct pvr_mmu_op_context *
+pvr_mmu_op_context_create(struct pvr_mmu_context *ctx,
+			  struct sg_table *sgt, u64 sgt_offset, u64 size);
+
+int pvr_mmu_map(struct pvr_mmu_op_context *op_ctx, u64 size, u64 flags,
+		u64 device_addr);
+int pvr_mmu_unmap(struct pvr_mmu_op_context *op_ctx, u64 device_addr, u64 size);
+
+#endif /* PVR_MMU_H */
+
diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c
new file mode 100644
index 000000000000..616fad3a3325
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_vm.c
@@ -0,0 +1,890 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_vm.h"
+
+#include "pvr_device.h"
+#include "pvr_drv.h"
+#include "pvr_gem.h"
+#include "pvr_mmu.h"
+#include "pvr_rogue_fwif.h"
+#include "pvr_rogue_heap_config.h"
+
+#include <drm/drm_gem.h>
+#include <drm/drm_gpuva_mgr.h>
+
+#include <linux/container_of.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/gfp_types.h>
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/stddef.h>
+
+/**
+ * DOC: Memory context
+ *
+ * This is the "top level" datatype in the VM code. It's exposed in the public
+ * API as an opaque handle.
+ */
+
+/**
+ * struct pvr_vm_context - Context type which encapsulates an entire page table
+ * tree structure.
+ * @pvr_dev: The PowerVR device to which this context is bound.
+ *
+ * This binding is immutable for the life of the context.
+ * @mmu_ctx: The context for binding to physical memory.
+ * @gpuva_mgr: GPUVA manager object associated with this context.
+ * @lock: Global lock on this entire structure of page tables.
+ * @fw_mem_ctx_obj: Firmware object representing firmware memory context.
+ * @ref_count: Reference count of object.
+ */
+struct pvr_vm_context {
+	struct pvr_device *pvr_dev;
+	struct pvr_mmu_context *mmu_ctx;
+	struct drm_gpuva_manager gpuva_mgr;
+	struct mutex lock;
+	struct pvr_fw_object *fw_mem_ctx_obj;
+	struct kref ref_count;
+};
+
+/**
+ * pvr_vm_get_page_table_root_addr() - Get the DMA address of the root of the
+ *                                     page table structure behind a VM context.
+ * @vm_ctx: Target VM context.
+ */
+dma_addr_t pvr_vm_get_page_table_root_addr(struct pvr_vm_context *vm_ctx)
+{
+	return pvr_mmu_get_root_table_dma_addr(vm_ctx->mmu_ctx);
+}
+
+/**
+ * DOC: Memory mappings
+ */
+
+/**
+ * pvr_vm_gpuva_mapping_init() - Setup a mapping object with the specified
+ * parameters ready for mapping using pvr_vm_gpuva_mapping_map().
+ * @va: Pointer to drm_gpuva mapping object.
+ * @device_addr: Device-virtual address at the start of the mapping.
+ * @size: Size of the desired mapping.
+ * @pvr_obj: Target PowerVR memory object.
+ * @pvr_obj_offset: Offset into @pvr_obj to begin mapping from.
+ *
+ * Some parameters of this function are unchecked. It is therefore the callers
+ * responsibility to ensure certain constraints are met. Specifically:
+ *
+ * * @pvr_obj_offset must be less than the size of @pvr_obj,
+ * * The sum of @pvr_obj_offset and @size must be less than or equal to the
+ *   size of @pvr_obj,
+ * * The range specified by @pvr_obj_offset and @size (the "CPU range") must be
+ *   CPU page-aligned both in start position and size, and
+ * * The range specified by @device_addr and @size (the "device range") must be
+ *   device page-aligned both in start position and size.
+ *
+ * Furthermore, it is up to the caller to make sure that a reference to @pvr_obj
+ * is taken prior to mapping @va with the drm_gpuva_manager.
+ */
+static void
+pvr_vm_gpuva_mapping_init(struct drm_gpuva *va, u64 device_addr, u64 size,
+			  struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset)
+{
+	va->va.addr = device_addr;
+	va->va.range = size;
+	va->gem.obj = gem_from_pvr_gem(pvr_obj);
+	va->gem.offset = pvr_obj_offset;
+}
+
+struct pvr_vm_gpuva_op_ctx {
+	struct pvr_vm_context *vm_ctx;
+	struct pvr_mmu_op_context *mmu_op_ctx;
+	struct drm_gpuva *new_va, *prev_va, *next_va;
+};
+
+/**
+ * pvr_vm_gpuva_map() - Insert a mapping into a memory context.
+ * @op: gpuva op containing the remap details.
+ * @op_ctx: Operation context.
+ *
+ * Context: Called by drm_gpuva_sm_map following a successful mapping while
+ * @op_ctx.vm_ctx mutex is held.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_mmu_map().
+ */
+static int
+pvr_vm_gpuva_map(struct drm_gpuva_op *op, void *op_ctx)
+{
+	struct pvr_gem_object *pvr_gem = gem_to_pvr_gem(op->map.gem.obj);
+	struct pvr_vm_gpuva_op_ctx *ctx = op_ctx;
+	int err;
+
+	if ((op->map.gem.offset | op->map.va.range) & ~PVR_DEVICE_PAGE_MASK)
+		return -EINVAL;
+
+	err = pvr_mmu_map(ctx->mmu_op_ctx, op->map.va.range, pvr_gem->flags,
+			  op->map.va.addr);
+	if (err)
+		return err;
+
+	pvr_vm_gpuva_mapping_init(ctx->new_va, op->map.va.addr,
+				  op->map.va.range, pvr_gem, op->map.gem.offset);
+
+	drm_gpuva_map(&ctx->vm_ctx->gpuva_mgr, ctx->new_va, &op->map);
+	drm_gpuva_link(ctx->new_va);
+	ctx->new_va = NULL;
+
+	/*
+	 * Increment the refcount on the underlying physical memory resource
+	 * to prevent de-allocation while the mapping exists.
+	 */
+	pvr_gem_object_get(pvr_gem);
+
+	return 0;
+}
+
+/**
+ * pvr_vm_gpuva_unmap() - Remove a mapping from a memory context.
+ * @op: gpuva op containing the unmap details.
+ * @op_ctx: Operation context.
+ *
+ * Context: Called by drm_gpuva_sm_unmap following a successful unmapping while
+ * @op_ctx.vm_ctx mutex is held.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_mmu_unmap().
+ */
+static int
+pvr_vm_gpuva_unmap(struct drm_gpuva_op *op, void *op_ctx)
+{
+	struct pvr_gem_object *pvr_gem = gem_to_pvr_gem(op->unmap.va->gem.obj);
+	struct pvr_vm_gpuva_op_ctx *ctx = op_ctx;
+
+	int err = pvr_mmu_unmap(ctx->mmu_op_ctx, op->unmap.va->va.addr,
+				op->unmap.va->va.range);
+
+	if (err)
+		return err;
+
+	drm_gpuva_unmap(&op->unmap);
+	drm_gpuva_unlink(op->unmap.va);
+	kfree(op->unmap.va);
+
+	pvr_gem_object_put(pvr_gem);
+
+	return 0;
+}
+
+/**
+ * pvr_vm_gpuva_remap() - Remap a mapping within a memory context.
+ * @op: gpuva op containing the remap details.
+ * @op_ctx: Operation context.
+ *
+ * Context: Called by either drm_gpuva_sm_map or drm_gpuva_sm_unmap when a
+ * mapping or unmapping operation causes a region to be split. The
+ * @op_ctx.vm_ctx mutex is held.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_vm_gpuva_unmap() or pvr_vm_gpuva_unmap().
+ */
+static int
+pvr_vm_gpuva_remap(struct drm_gpuva_op *op, void *op_ctx)
+{
+	struct pvr_vm_gpuva_op_ctx *ctx = op_ctx;
+
+	if (op->remap.unmap) {
+		const u64 va_start = op->remap.prev ?
+				     op->remap.prev->va.addr + op->remap.prev->va.range :
+				     op->remap.unmap->va->va.addr;
+		const u64 va_end = op->remap.next ?
+				   op->remap.next->va.addr :
+				   op->remap.unmap->va->va.addr + op->remap.unmap->va->va.range;
+
+		int err = pvr_mmu_unmap(ctx->mmu_op_ctx, va_start,
+					va_end - va_start);
+
+		if (err)
+			return err;
+	}
+
+	if (op->remap.prev)
+		pvr_vm_gpuva_mapping_init(ctx->prev_va, op->remap.prev->va.addr,
+					  op->remap.prev->va.range,
+					  gem_to_pvr_gem(op->remap.prev->gem.obj),
+					  op->remap.prev->gem.offset);
+
+	if (op->remap.next)
+		pvr_vm_gpuva_mapping_init(ctx->next_va, op->remap.next->va.addr,
+					  op->remap.next->va.range,
+					  gem_to_pvr_gem(op->remap.next->gem.obj),
+					  op->remap.next->gem.offset);
+
+	/* No actual remap required: the page table tree depth is fixed to 3,
+	 * and we use 4k page table entries only for now.
+	 */
+	drm_gpuva_remap(ctx->prev_va, ctx->next_va, &op->remap);
+
+	if (op->remap.prev) {
+		pvr_gem_object_get(gem_to_pvr_gem(ctx->prev_va->gem.obj));
+		drm_gpuva_link(ctx->prev_va);
+		ctx->prev_va = NULL;
+	}
+
+	if (op->remap.next) {
+		pvr_gem_object_get(gem_to_pvr_gem(ctx->next_va->gem.obj));
+		drm_gpuva_link(ctx->next_va);
+		ctx->next_va = NULL;
+	}
+
+	if (op->remap.unmap) {
+		struct pvr_gem_object *pvr_gem = gem_to_pvr_gem(op->remap.unmap->va->gem.obj);
+
+		drm_gpuva_unlink(op->unmap.va);
+		kfree(op->unmap.va);
+
+		pvr_gem_object_put(pvr_gem);
+	}
+
+	return 0;
+}
+
+/*
+ * Public API
+ *
+ * For an overview of these functions, see *DOC: Public API* in "pvr_vm.h".
+ */
+
+/**
+ * pvr_device_addr_is_valid() - Tests whether a device-virtual address
+ *                              is valid.
+ * @device_addr: Virtual device address to test.
+ *
+ * Return:
+ *  * %true if @device_addr is within the valid range for a device page
+ *    table and is aligned to the device page size, or
+ *  * %false otherwise.
+ */
+bool
+pvr_device_addr_is_valid(u64 device_addr)
+{
+	return (device_addr & ~PVR_PAGE_TABLE_ADDR_MASK) == 0 &&
+	       (device_addr & ~PVR_DEVICE_PAGE_MASK) == 0;
+}
+
+/**
+ * pvr_device_addr_and_size_are_valid() - Tests whether a device-virtual
+ * address and associated size are both valid.
+ * @device_addr: Virtual device address to test.
+ * @size: Size of the range based at @device_addr to test.
+ *
+ * Calling pvr_device_addr_is_valid() twice (once on @size, and again on
+ * @device_addr + @size) to verify a device-virtual address range initially
+ * seems intuitive, but it produces a false-negative when the address range
+ * is right at the end of device-virtual address space.
+ *
+ * This function catches that corner case, as well as checking that
+ * @size is non-zero.
+ *
+ * Return:
+ *  * %true if @device_addr is device page aligned; @size is device page
+ *    aligned; the range specified by @device_addr and @size is within the
+ *    bounds of the device-virtual address space, and @size is non-zero, or
+ *  * %false otherwise.
+ */
+bool
+pvr_device_addr_and_size_are_valid(u64 device_addr, u64 size)
+{
+	return pvr_device_addr_is_valid(device_addr) &&
+	       size != 0 && (size & ~PVR_DEVICE_PAGE_MASK) == 0 &&
+	       (device_addr + size <= PVR_PAGE_TABLE_ADDR_SPACE_SIZE);
+}
+
+static const struct drm_gpuva_fn_ops pvr_vm_gpuva_ops = {
+	.sm_step_map = pvr_vm_gpuva_map,
+	.sm_step_remap = pvr_vm_gpuva_remap,
+	.sm_step_unmap = pvr_vm_gpuva_unmap,
+};
+
+/**
+ * pvr_vm_create_context() - Create a new VM context.
+ * @pvr_dev: Target PowerVR device.
+ * @is_userspace_context: %true if this context is for userspace. This will
+ *                        create a firmware memory context for the VM context
+ *                        and disable warnings when tearing down mappings.
+ *
+ * Return:
+ *  * A handle to the newly-minted VM context on success,
+ *  * -%EINVAL if the feature "virtual address space bits" on @pvr_dev is
+ *    missing or has an unsupported value,
+ *  * -%ENOMEM if allocation of the structure behind the opaque handle fails,
+ *    or
+ *  * Any error encountered while setting up internal structures.
+ */
+struct pvr_vm_context *
+pvr_vm_create_context(struct pvr_device *pvr_dev, bool is_userspace_context)
+{
+	struct drm_device *drm_dev = from_pvr_device(pvr_dev);
+
+	struct pvr_vm_context *vm_ctx;
+	u16 device_addr_bits;
+
+	int err;
+
+	err = PVR_FEATURE_VALUE(pvr_dev, virtual_address_space_bits,
+				&device_addr_bits);
+	if (err) {
+		drm_err(drm_dev,
+			"Failed to get device virtual address space bits\n");
+		return ERR_PTR(err);
+	}
+
+	if (device_addr_bits != PVR_PAGE_TABLE_ADDR_BITS) {
+		drm_err(drm_dev,
+			"Device has unsupported virtual address space size\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	vm_ctx = kzalloc(sizeof(*vm_ctx), GFP_KERNEL);
+	if (!vm_ctx)
+		return ERR_PTR(-ENOMEM);
+
+	vm_ctx->pvr_dev = pvr_dev;
+	kref_init(&vm_ctx->ref_count);
+	mutex_init(&vm_ctx->lock);
+
+	drm_gpuva_manager_init(&vm_ctx->gpuva_mgr,
+			       is_userspace_context ? "PowerVR-user-VM" : "PowerVR-FW-VM",
+			       0, 1ULL << device_addr_bits, 0, 0, &pvr_vm_gpuva_ops);
+
+	vm_ctx->mmu_ctx = pvr_mmu_context_create(pvr_dev);
+	err = PTR_ERR_OR_ZERO(&vm_ctx->mmu_ctx);
+	if (err) {
+		vm_ctx->mmu_ctx = NULL;
+		goto err_put_ctx;
+	}
+
+	if (is_userspace_context) {
+		/* TODO: Create FW mem context */
+		err = -ENODEV;
+		goto err_put_ctx;
+	}
+
+	return vm_ctx;
+
+err_put_ctx:
+	pvr_vm_context_put(vm_ctx);
+
+	return ERR_PTR(err);
+}
+
+/**
+ * pvr_vm_context_release() - Teardown a VM context.
+ * @ref_count: Pointer to reference counter of the VM context.
+ *
+ * This function ensures that no mappings are left dangling by unmapping them
+ * all in order of ascending device-virtual address.
+ */
+static void
+pvr_vm_context_release(struct kref *ref_count)
+{
+	struct pvr_vm_context *vm_ctx =
+		container_of(ref_count, struct pvr_vm_context, ref_count);
+
+	/* TODO: Destroy FW mem context */
+	WARN_ON(vm_ctx->fw_mem_ctx_obj);
+
+	WARN_ON(pvr_vm_unmap(vm_ctx, vm_ctx->gpuva_mgr.mm_start,
+			     vm_ctx->gpuva_mgr.mm_range));
+
+	drm_gpuva_manager_destroy(&vm_ctx->gpuva_mgr);
+	pvr_mmu_context_destroy(vm_ctx->mmu_ctx);
+	mutex_destroy(&vm_ctx->lock);
+
+	kfree(vm_ctx);
+}
+
+/**
+ * pvr_vm_context_lookup() - Look up VM context from handle
+ * @pvr_file: Pointer to pvr_file structure.
+ * @handle: Object handle.
+ *
+ * Takes reference on VM context object. Call pvr_vm_context_put() to release.
+ *
+ * Returns:
+ *  * The requested object on success, or
+ *  * %NULL on failure (object does not exist in list, or is not a VM context)
+ */
+struct pvr_vm_context *
+pvr_vm_context_lookup(struct pvr_file *pvr_file, u32 handle)
+{
+	struct pvr_vm_context *vm_ctx;
+
+	xa_lock(&pvr_file->vm_ctx_handles);
+	vm_ctx = xa_load(&pvr_file->vm_ctx_handles, handle);
+	if (vm_ctx)
+		kref_get(&vm_ctx->ref_count);
+
+	xa_unlock(&pvr_file->vm_ctx_handles);
+
+	return vm_ctx;
+}
+
+/**
+ * pvr_vm_context_put() - Release a reference on a VM context
+ * @vm_ctx: Target VM context.
+ *
+ * Returns:
+ *  * %true if the VM context was destroyed, or
+ *  * %false if there are any references still remaining.
+ */
+bool
+pvr_vm_context_put(struct pvr_vm_context *vm_ctx)
+{
+	WARN_ON(!vm_ctx);
+
+	if (vm_ctx)
+		return kref_put(&vm_ctx->ref_count, pvr_vm_context_release);
+
+	return true;
+}
+
+/**
+ * pvr_destroy_vm_contexts_for_file: Destroy any VM contexts associated with the
+ * given file.
+ * @pvr_file: Pointer to pvr_file structure.
+ *
+ * Removes all vm_contexts associated with @pvr_file from the device VM context
+ * list and drops initial references. vm_contexts will then be destroyed once
+ * all outstanding references are dropped.
+ */
+void pvr_destroy_vm_contexts_for_file(struct pvr_file *pvr_file)
+{
+	struct pvr_vm_context *vm_ctx;
+	unsigned long handle;
+
+	xa_for_each(&pvr_file->vm_ctx_handles, handle, vm_ctx) {
+		/* vm_ctx is not used here because that would create a race with xa_erase */
+		pvr_vm_context_put(xa_erase(&pvr_file->vm_ctx_handles, handle));
+	}
+}
+
+/**
+ * pvr_vm_map() - Map a section of physical memory into a section of device-virtual memory.
+ * @vm_ctx: Target VM context.
+ * @pvr_obj: Target PowerVR memory object.
+ * @pvr_obj_offset: Offset into @pvr_obj to map from.
+ * @device_addr: Virtual device address at the start of the requested mapping.
+ * @size: Size of the requested mapping.
+ *
+ * No handle is returned to represent the mapping. Instead, callers should
+ * remember @device_addr and use that as a handle.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * -%EINVAL if @device_addr is not a valid page-aligned device-virtual
+ *    address; the region specified by @pvr_obj_offset and @size does not fall
+ *    entirely within @pvr_obj, or any part of the specified region of @pvr_obj
+ *    is not device-virtual page-aligned,
+ *  * Any error encountered while performing internal operations required to
+ *    destroy the mapping (returned from pvr_vm_gpuva_map or
+ *    pvr_vm_gpuva_remap).
+ */
+int
+pvr_vm_map(struct pvr_vm_context *vm_ctx,
+	   struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset,
+	   u64 device_addr, u64 size)
+{
+	const size_t pvr_obj_size = pvr_gem_object_size(pvr_obj);
+	struct pvr_vm_gpuva_op_ctx op_ctx = { .vm_ctx = vm_ctx };
+	struct sg_table *sgt;
+	int err;
+
+	if (!pvr_device_addr_and_size_are_valid(device_addr, size) ||
+	    pvr_obj_offset & ~PAGE_MASK || size & ~PAGE_MASK ||
+	    pvr_obj_offset + size > pvr_obj_size ||
+	    pvr_obj_offset > pvr_obj_size) {
+		return -EINVAL;
+	}
+
+	op_ctx.new_va = kzalloc(sizeof(*op_ctx.new_va), GFP_KERNEL);
+	op_ctx.prev_va = kzalloc(sizeof(*op_ctx.prev_va), GFP_KERNEL);
+	op_ctx.next_va = kzalloc(sizeof(*op_ctx.next_va), GFP_KERNEL);
+	if (!op_ctx.new_va || !op_ctx.prev_va || !op_ctx.next_va) {
+		err = -ENOMEM;
+		goto out_free;
+	}
+
+	sgt = pvr_gem_object_get_pages_sgt(pvr_obj);
+	err = PTR_ERR_OR_ZERO(sgt);
+	if (err)
+		goto out_free;
+
+	op_ctx.mmu_op_ctx = pvr_mmu_op_context_create(vm_ctx->mmu_ctx, sgt,
+						      pvr_obj_offset, size);
+	err = PTR_ERR_OR_ZERO(op_ctx.mmu_op_ctx);
+	if (err) {
+		op_ctx.mmu_op_ctx = NULL;
+		goto out_mmu_op_ctx_destroy;
+	}
+
+	mutex_lock(&vm_ctx->lock);
+	err = drm_gpuva_sm_map(&vm_ctx->gpuva_mgr, &op_ctx, device_addr, size,
+			       gem_from_pvr_gem(pvr_obj), pvr_obj_offset);
+	mutex_unlock(&vm_ctx->lock);
+
+out_mmu_op_ctx_destroy:
+	pvr_mmu_op_context_destroy(op_ctx.mmu_op_ctx);
+
+out_free:
+	kfree(op_ctx.next_va);
+	kfree(op_ctx.prev_va);
+	kfree(op_ctx.new_va);
+
+	return err;
+}
+
+/**
+ * pvr_vm_unmap() - Unmap an already mapped section of device-virtual memory.
+ * @vm_ctx: Target VM context.
+ * @device_addr: Virtual device address at the start of the target mapping.
+ * @size: Size of the target mapping.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * -%EINVAL if @device_addr is not a valid page-aligned device-virtual
+ *    address,
+ *  * Any error encountered while performing internal operations required to
+ *    destroy the mapping (returned from pvr_vm_gpuva_unmap or
+ *    pvr_vm_gpuva_remap).
+ */
+int
+pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size)
+{
+	struct pvr_vm_gpuva_op_ctx op_ctx = { .vm_ctx = vm_ctx };
+	int err;
+
+	if (!pvr_device_addr_and_size_are_valid(device_addr, size))
+		return -EINVAL;
+
+	op_ctx.prev_va = kzalloc(sizeof(*op_ctx.prev_va), GFP_KERNEL);
+	op_ctx.next_va = kzalloc(sizeof(*op_ctx.next_va), GFP_KERNEL);
+	if (!op_ctx.prev_va || !op_ctx.next_va) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	op_ctx.mmu_op_ctx =
+		pvr_mmu_op_context_create(vm_ctx->mmu_ctx, NULL, 0, 0);
+	err = PTR_ERR_OR_ZERO(op_ctx.mmu_op_ctx);
+	if (err) {
+		op_ctx.mmu_op_ctx = NULL;
+		goto out;
+	}
+
+	mutex_lock(&vm_ctx->lock);
+	err = drm_gpuva_sm_unmap(&vm_ctx->gpuva_mgr, &op_ctx, device_addr, size);
+	mutex_unlock(&vm_ctx->lock);
+
+out:
+	pvr_mmu_op_context_destroy(op_ctx.mmu_op_ctx);
+	kfree(op_ctx.next_va);
+	kfree(op_ctx.prev_va);
+
+	return err;
+}
+
+/*
+ * Static data areas are determined by firmware.
+ *
+ * When adding a new static data area you will also need to update the reserved_size field for the
+ * heap in pvr_heaps[].
+ */
+static const struct drm_pvr_static_data_area static_data_areas[] = {
+	{
+		.area_usage = DRM_PVR_STATIC_DATA_AREA_FENCE,
+		.location_heap_id = DRM_PVR_HEAP_GENERAL,
+		.offset = 0,
+		.size = 128,
+	},
+	{
+		.area_usage = DRM_PVR_STATIC_DATA_AREA_YUV_CSC,
+		.location_heap_id = DRM_PVR_HEAP_GENERAL,
+		.offset = 128,
+		.size = 1024,
+	},
+	{
+		.area_usage = DRM_PVR_STATIC_DATA_AREA_VDM_SYNC,
+		.location_heap_id = DRM_PVR_HEAP_PDS_CODE_DATA,
+		.offset = 0,
+		.size = 128,
+	},
+	{
+		.area_usage = DRM_PVR_STATIC_DATA_AREA_EOT,
+		.location_heap_id = DRM_PVR_HEAP_PDS_CODE_DATA,
+		.offset = 128,
+		.size = 128,
+	},
+	{
+		.area_usage = DRM_PVR_STATIC_DATA_AREA_VDM_SYNC,
+		.location_heap_id = DRM_PVR_HEAP_USC_CODE,
+		.offset = 0,
+		.size = 128,
+	},
+};
+
+#define GET_RESERVED_SIZE(last_offset, last_size) round_up((last_offset) + (last_size), PAGE_SIZE)
+
+/*
+ * The values given to GET_RESERVED_SIZE() are taken from the last entry in the corresponding
+ * static data area for each heap.
+ */
+static const struct drm_pvr_heap pvr_heaps[] = {
+	[DRM_PVR_HEAP_GENERAL] = {
+		.base = ROGUE_GENERAL_HEAP_BASE,
+		.size = ROGUE_GENERAL_HEAP_SIZE,
+		.flags = 0,
+		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
+	},
+	[DRM_PVR_HEAP_PDS_CODE_DATA] = {
+		.base = ROGUE_PDSCODEDATA_HEAP_BASE,
+		.size = ROGUE_PDSCODEDATA_HEAP_SIZE,
+		.flags = 0,
+		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
+	},
+	[DRM_PVR_HEAP_USC_CODE] = {
+		.base = ROGUE_USCCODE_HEAP_BASE,
+		.size = ROGUE_USCCODE_HEAP_SIZE,
+		.flags = 0,
+		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
+	},
+	[DRM_PVR_HEAP_RGNHDR] = {
+		.base = ROGUE_RGNHDR_HEAP_BASE,
+		.size = ROGUE_RGNHDR_HEAP_SIZE,
+		.flags = 0,
+		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
+	},
+	[DRM_PVR_HEAP_VIS_TEST] = {
+		.base = ROGUE_VISTEST_HEAP_BASE,
+		.size = ROGUE_VISTEST_HEAP_SIZE,
+		.flags = 0,
+		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
+	},
+	[DRM_PVR_HEAP_TRANSFER_FRAG] = {
+		.base = ROGUE_TRANSFER_FRAG_HEAP_BASE,
+		.size = ROGUE_TRANSFER_FRAG_HEAP_SIZE,
+		.flags = 0,
+		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
+	},
+};
+
+int
+pvr_static_data_areas_get(const struct pvr_device *pvr_dev,
+			  struct drm_pvr_ioctl_dev_query_args *args)
+{
+	struct drm_pvr_dev_query_static_data_areas query = {0};
+	int err;
+
+	if (!args->pointer) {
+		args->size = sizeof(struct drm_pvr_dev_query_static_data_areas);
+		return 0;
+	}
+
+	err = PVR_UOBJ_GET(query, args->size, args->pointer);
+	if (err < 0)
+		return err;
+
+	if (!query.static_data_areas.array) {
+		query.static_data_areas.count = ARRAY_SIZE(static_data_areas);
+		query.static_data_areas.stride = sizeof(struct drm_pvr_static_data_area);
+		goto copy_out;
+	}
+
+	if (query.static_data_areas.count > ARRAY_SIZE(static_data_areas))
+		query.static_data_areas.count = ARRAY_SIZE(static_data_areas);
+
+	err = PVR_UOBJ_SET_ARRAY(&query.static_data_areas, static_data_areas);
+	if (err < 0)
+		return err;
+
+copy_out:
+	err = PVR_UOBJ_SET(args->pointer, args->size, query);
+	if (err < 0)
+		return err;
+
+	args->size = sizeof(query);
+	return 0;
+}
+
+int
+pvr_heap_info_get(const struct pvr_device *pvr_dev,
+		  struct drm_pvr_ioctl_dev_query_args *args)
+{
+	struct drm_pvr_dev_query_heap_info query = {0};
+	u64 dest;
+	int err;
+
+	if (!args->pointer) {
+		args->size = sizeof(struct drm_pvr_dev_query_heap_info);
+		return 0;
+	}
+
+	err = PVR_UOBJ_GET(query, args->size, args->pointer);
+	if (err < 0)
+		return err;
+
+	if (!query.heaps.array) {
+		query.heaps.count = ARRAY_SIZE(pvr_heaps);
+		query.heaps.stride = sizeof(struct drm_pvr_heap);
+		goto copy_out;
+	}
+
+	if (query.heaps.count > ARRAY_SIZE(pvr_heaps))
+		query.heaps.count = ARRAY_SIZE(pvr_heaps);
+
+	/* Region header heap is only present if BRN63142 is present. */
+	dest = query.heaps.array;
+	for (size_t i = 0; i < query.heaps.count; i++) {
+		struct drm_pvr_heap heap = pvr_heaps[i];
+
+		if (i == DRM_PVR_HEAP_RGNHDR && !PVR_HAS_QUIRK(pvr_dev, 63142))
+			heap.size = 0;
+
+		err = PVR_UOBJ_SET(dest, query.heaps.stride, heap);
+		if (err < 0)
+			return err;
+
+		dest += query.heaps.stride;
+	}
+
+copy_out:
+	err = PVR_UOBJ_SET(args->pointer, args->size, query);
+	if (err < 0)
+		return err;
+
+	args->size = sizeof(query);
+	return 0;
+}
+
+/**
+ * pvr_heap_contains_range() - Determine if a given heap contains the specified
+ *                             device-virtual address range.
+ * @pvr_heap: Target heap.
+ * @start: Inclusive start of the target range.
+ * @end: Inclusive end of the target range.
+ *
+ * It is an error to call this function with values of @start and @end that do
+ * not satisfy the condition @start <= @end.
+ */
+static __always_inline bool
+pvr_heap_contains_range(const struct drm_pvr_heap *pvr_heap, u64 start, u64 end)
+{
+	return pvr_heap->base <= start && end < pvr_heap->base + pvr_heap->size;
+}
+
+/**
+ * pvr_find_heap_containing() - Find a heap which contains the specified
+ *                              device-virtual address range.
+ * @pvr_dev: Target PowerVR device.
+ * @start: Start of the target range.
+ * @size: Size of the target range.
+ *
+ * Return:
+ *  * A pointer to a constant instance of struct drm_pvr_heap representing the
+ *    heap containing the entire range specified by @start and @size on
+ *    success, or
+ *  * %NULL if no such heap exists.
+ */
+const struct drm_pvr_heap *
+pvr_find_heap_containing(struct pvr_device *pvr_dev, u64 start, u64 size)
+{
+	u64 end;
+
+	if (check_add_overflow(start, size - 1, &end))
+		return NULL;
+
+	/*
+	 * There are no guarantees about the order of address ranges in
+	 * &pvr_heaps, so iterate over the entire array for a heap whose
+	 * range completely encompasses the given range.
+	 */
+	for (u32 heap_id = 0; heap_id < ARRAY_SIZE(pvr_heaps); heap_id++) {
+		/* Filter heaps that present only with an associated quirk */
+		if (heap_id == DRM_PVR_HEAP_RGNHDR &&
+		    !PVR_HAS_QUIRK(pvr_dev, 63142)) {
+			continue;
+		}
+
+		if (pvr_heap_contains_range(&pvr_heaps[heap_id], start, end))
+			return &pvr_heaps[heap_id];
+	}
+
+	return NULL;
+}
+
+/**
+ * pvr_vm_find_gem_object() - Look up a buffer object from a given
+ *                            device-virtual address.
+ * @vm_ctx: [IN] Target VM context.
+ * @device_addr: [IN] Virtual device address at the start of the required
+ *               object.
+ * @mapped_offset_out: [OUT] Pointer to location to write offset of the start
+ *                     of the mapped region within the buffer object. May be
+ *                     %NULL if this information is not required.
+ * @mapped_size_out: [OUT] Pointer to location to write size of the mapped
+ *                   region. May be %NULL if this information is not required.
+ *
+ * If successful, a reference will be taken on the buffer object. The caller
+ * must drop the reference with pvr_gem_object_put().
+ *
+ * Return:
+ *  * The PowerVR buffer object mapped at @device_addr if one exists, or
+ *  * %NULL otherwise.
+ */
+struct pvr_gem_object *
+pvr_vm_find_gem_object(struct pvr_vm_context *vm_ctx, u64 device_addr,
+		       u64 *mapped_offset_out, u64 *mapped_size_out)
+{
+	struct pvr_gem_object *pvr_obj;
+	struct drm_gpuva *va;
+
+	mutex_lock(&vm_ctx->lock);
+
+	va = drm_gpuva_find_first(&vm_ctx->gpuva_mgr, device_addr, 1);
+	if (!va)
+		goto err_unlock;
+
+	pvr_obj = gem_to_pvr_gem(va->gem.obj);
+	pvr_gem_object_get(pvr_obj);
+
+	if (mapped_offset_out)
+		*mapped_offset_out = va->gem.offset;
+	if (mapped_size_out)
+		*mapped_size_out = va->va.range;
+
+	mutex_unlock(&vm_ctx->lock);
+
+	return pvr_obj;
+
+err_unlock:
+	mutex_unlock(&vm_ctx->lock);
+
+	return NULL;
+}
+
+/**
+ * pvr_vm_get_fw_mem_context: Get object representing firmware memory context
+ * @vm_ctx: Target VM context.
+ *
+ * Returns:
+ *  * FW object representing firmware memory context, or
+ *  * %NULL if this VM context does not have a firmware memory context.
+ */
+struct pvr_fw_object *
+pvr_vm_get_fw_mem_context(struct pvr_vm_context *vm_ctx)
+{
+	return vm_ctx->fw_mem_ctx_obj;
+}
diff --git a/drivers/gpu/drm/imagination/pvr_vm.h b/drivers/gpu/drm/imagination/pvr_vm.h
new file mode 100644
index 000000000000..b98bc3981807
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_vm.h
@@ -0,0 +1,60 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_VM_H
+#define PVR_VM_H
+
+#include "pvr_rogue_mmu_defs.h"
+
+#include <uapi/drm/pvr_drm.h>
+
+#include <linux/types.h>
+
+/* Forward declaration from "pvr_device.h" */
+struct pvr_device;
+struct pvr_file;
+
+/* Forward declaration from "pvr_gem.h" */
+struct pvr_gem_object;
+
+/* Forward declaration from "pvr_vm.c" */
+struct pvr_vm_context;
+
+/* Forward declaration from <uapi/drm/pvr_drm.h> */
+struct drm_pvr_ioctl_get_heap_info_args;
+
+/* Functions defined in pvr_vm.c */
+
+bool pvr_device_addr_is_valid(u64 device_addr);
+bool pvr_device_addr_and_size_are_valid(u64 device_addr, u64 size);
+
+struct pvr_vm_context *pvr_vm_create_context(struct pvr_device *pvr_dev,
+					     bool is_userspace_context);
+
+int pvr_vm_map(struct pvr_vm_context *vm_ctx,
+	       struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset,
+	       u64 device_addr, u64 size);
+int pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size);
+
+dma_addr_t pvr_vm_get_page_table_root_addr(struct pvr_vm_context *vm_ctx);
+
+int pvr_static_data_areas_get(const struct pvr_device *pvr_dev,
+			      struct drm_pvr_ioctl_dev_query_args *args);
+int pvr_heap_info_get(const struct pvr_device *pvr_dev,
+		      struct drm_pvr_ioctl_dev_query_args *args);
+const struct drm_pvr_heap *pvr_find_heap_containing(struct pvr_device *pvr_dev,
+						    u64 addr, u64 size);
+
+struct pvr_gem_object *pvr_vm_find_gem_object(struct pvr_vm_context *vm_ctx,
+					      u64 device_addr,
+					      u64 *mapped_offset_out,
+					      u64 *mapped_size_out);
+
+struct pvr_fw_object *
+pvr_vm_get_fw_mem_context(struct pvr_vm_context *vm_ctx);
+
+struct pvr_vm_context *pvr_vm_context_lookup(struct pvr_file *pvr_file, u32 handle);
+bool pvr_vm_context_put(struct pvr_vm_context *vm_ctx);
+void pvr_destroy_vm_contexts_for_file(struct pvr_file *pvr_file);
+
+#endif /* PVR_VM_H */