Message ID | 20220111013955.3214767-1-daniel.phillips@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/1] Add available memory ioctl for libhsakmt | expand |
Am 2022-01-10 um 8:39 p.m. schrieb Daniel Phillips: > Add an ioctl to inquire memory available for allocation by libhsakmt > per node, allowing for space consumed by page translation tables. > > This ioctl is the underlying mechanism for the new memory availability > library call posted for review here: > > https://lists.freedesktop.org/archives/amd-gfx/2022-January/073352.html > > Signed-off-by: Daniel Phillips <daniel.phillips@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> > > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 ++++++++++++++ > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 17 +++++++++++++++++ > include/uapi/linux/kfd_ioctl.h | 14 ++++++++++++-- > 4 files changed, 44 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index fcbc8a9c9e06..64c6c36685d3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -266,6 +266,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, > void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, > void *drm_priv); > uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); > +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev); > int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( > struct amdgpu_device *adev, uint64_t va, uint64_t size, > void *drm_priv, struct kgd_mem **mem, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index 86a1a6c109d9..b7490a659173 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -190,6 +190,20 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, > return ret; > } > > +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) > +{ > + uint64_t reserved_for_pt = > + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); > + size_t available_memory; > + > + spin_lock(&kfd_mem_limit.mem_limit_lock); > + available_memory = > + adev->gmc.real_vram_size - > + adev->kfd.vram_used - reserved_for_pt; > + spin_unlock(&kfd_mem_limit.mem_limit_lock); > + return available_memory; > +} > + > static void unreserve_mem_limit(struct amdgpu_device *adev, > uint64_t size, u32 alloc_flag) > { > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > index 4bfc0c8ab764..5c2f6d97ff1c 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > @@ -486,6 +486,20 @@ static int kfd_ioctl_get_queue_wave_state(struct file *filep, > return r; > } > > +static int kfd_ioctl_get_available_memory(struct file *filep, > + struct kfd_process *p, void *data) > +{ > + struct kfd_ioctl_get_available_memory_args *args = data; > + struct kfd_dev *dev; > + > + dev = kfd_device_by_id(args->gpu_id); > + if (!dev) > + return -EINVAL; > + > + args->available = amdgpu_amdkfd_get_available_memory(dev->adev); > + return 0; > +} > + > static int kfd_ioctl_set_memory_policy(struct file *filep, > struct kfd_process *p, void *data) > { > @@ -1959,6 +1973,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { > > AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE, > kfd_ioctl_set_xnack_mode, 0), > + > + AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY, > + kfd_ioctl_get_available_memory, 0), > }; > > #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) > diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h > index af96af174dc4..94a99add2432 100644 > --- a/include/uapi/linux/kfd_ioctl.h > +++ b/include/uapi/linux/kfd_ioctl.h > @@ -32,9 +32,10 @@ > * - 1.4 - Indicate new SRAM EDC bit in device properties > * - 1.5 - Add SVM API > * - 1.6 - Query clear flags in SVM get_attr API > + * - 1.7 - Add available_memory ioctl > */ > #define KFD_IOCTL_MAJOR_VERSION 1 > -#define KFD_IOCTL_MINOR_VERSION 6 > +#define KFD_IOCTL_MINOR_VERSION 7 > > struct kfd_ioctl_get_version_args { > __u32 major_version; /* from KFD */ > @@ -98,6 +99,12 @@ struct kfd_ioctl_get_queue_wave_state_args { > __u32 pad; > }; > > +struct kfd_ioctl_get_available_memory_args { > + __u64 available; /* from KFD */ > + __u32 gpu_id; /* to KFD */ > + __u32 pad; > +}; > + > /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ > #define KFD_IOC_CACHE_POLICY_COHERENT 0 > #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 > @@ -742,7 +749,10 @@ struct kfd_ioctl_set_xnack_mode_args { > #define AMDKFD_IOC_SET_XNACK_MODE \ > AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) > > +#define AMDKFD_IOC_AVAILABLE_MEMORY \ > + AMDKFD_IOR(0x22, struct kfd_ioctl_get_available_memory_args) > + > #define AMDKFD_COMMAND_START 0x01 > -#define AMDKFD_COMMAND_END 0x22 > +#define AMDKFD_COMMAND_END 0x23 > > #endif
Am 2022-01-10 um 8:39 p.m. schrieb Daniel Phillips: > Add an ioctl to inquire memory available for allocation by libhsakmt > per node, allowing for space consumed by page translation tables. > > This ioctl is the underlying mechanism for the new memory availability > library call posted for review here: > > https://lists.freedesktop.org/archives/amd-gfx/2022-January/073352.html > > Signed-off-by: Daniel Phillips <daniel.phillips@amd.com> > > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 ++++++++++++++ > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 17 +++++++++++++++++ > include/uapi/linux/kfd_ioctl.h | 14 ++++++++++++-- > 4 files changed, 44 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index fcbc8a9c9e06..64c6c36685d3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -266,6 +266,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, > void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, > void *drm_priv); > uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); > +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev); > int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( > struct amdgpu_device *adev, uint64_t va, uint64_t size, > void *drm_priv, struct kgd_mem **mem, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index 86a1a6c109d9..b7490a659173 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -190,6 +190,20 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, > return ret; > } > > +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) > +{ > + uint64_t reserved_for_pt = > + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); > + size_t available_memory; > + > + spin_lock(&kfd_mem_limit.mem_limit_lock); > + available_memory = > + adev->gmc.real_vram_size - > + adev->kfd.vram_used - reserved_for_pt; > + spin_unlock(&kfd_mem_limit.mem_limit_lock); > + return available_memory; > +} > + > static void unreserve_mem_limit(struct amdgpu_device *adev, > uint64_t size, u32 alloc_flag) > { > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > index 4bfc0c8ab764..5c2f6d97ff1c 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > @@ -486,6 +486,20 @@ static int kfd_ioctl_get_queue_wave_state(struct file *filep, > return r; > } > > +static int kfd_ioctl_get_available_memory(struct file *filep, > + struct kfd_process *p, void *data) > +{ > + struct kfd_ioctl_get_available_memory_args *args = data; > + struct kfd_dev *dev; > + > + dev = kfd_device_by_id(args->gpu_id); > + if (!dev) > + return -EINVAL; > + > + args->available = amdgpu_amdkfd_get_available_memory(dev->adev); > + return 0; > +} > + > static int kfd_ioctl_set_memory_policy(struct file *filep, > struct kfd_process *p, void *data) > { > @@ -1959,6 +1973,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { > > AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE, > kfd_ioctl_set_xnack_mode, 0), > + > + AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY, > + kfd_ioctl_get_available_memory, 0), > }; > > #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) > diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h > index af96af174dc4..94a99add2432 100644 > --- a/include/uapi/linux/kfd_ioctl.h > +++ b/include/uapi/linux/kfd_ioctl.h > @@ -32,9 +32,10 @@ > * - 1.4 - Indicate new SRAM EDC bit in device properties > * - 1.5 - Add SVM API > * - 1.6 - Query clear flags in SVM get_attr API > + * - 1.7 - Add available_memory ioctl > */ > #define KFD_IOCTL_MAJOR_VERSION 1 > -#define KFD_IOCTL_MINOR_VERSION 6 > +#define KFD_IOCTL_MINOR_VERSION 7 > > struct kfd_ioctl_get_version_args { > __u32 major_version; /* from KFD */ > @@ -98,6 +99,12 @@ struct kfd_ioctl_get_queue_wave_state_args { > __u32 pad; > }; > > +struct kfd_ioctl_get_available_memory_args { > + __u64 available; /* from KFD */ > + __u32 gpu_id; /* to KFD */ > + __u32 pad; > +}; > + > /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ > #define KFD_IOC_CACHE_POLICY_COHERENT 0 > #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 > @@ -742,7 +749,10 @@ struct kfd_ioctl_set_xnack_mode_args { > #define AMDKFD_IOC_SET_XNACK_MODE \ > AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) > > +#define AMDKFD_IOC_AVAILABLE_MEMORY \ > + AMDKFD_IOR(0x22, struct kfd_ioctl_get_available_memory_args) > + This needs to be AMDKFD_IOWR. Otherwise the gpu_id doesn't get copied from user mode by kfd_ioctl. I also updated the test (see the V2 I just sent out) and got a little closer to having a working test. However, the test still fails on my Fiji. The available memory reported on that card is about 4094 MB. The card has 4GB, but 6MB of that are already used just in console mode. So the memory allocation in the test fails. I think we need to refine the memory limit to something more realistic, if the goal is to report reliably the largest possible memory allocation that will succeed. If we have to fudge the available memory number to something smaller than the limit, then we also have to abandon the negative test that confirms that bigger allocations will fail. Regards, Felix > #define AMDKFD_COMMAND_START 0x01 > -#define AMDKFD_COMMAND_END 0x22 > +#define AMDKFD_COMMAND_END 0x23 > > #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index fcbc8a9c9e06..64c6c36685d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -266,6 +266,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, void *drm_priv); uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 86a1a6c109d9..b7490a659173 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -190,6 +190,20 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, return ret; } +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) +{ + uint64_t reserved_for_pt = + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); + size_t available_memory; + + spin_lock(&kfd_mem_limit.mem_limit_lock); + available_memory = + adev->gmc.real_vram_size - + adev->kfd.vram_used - reserved_for_pt; + spin_unlock(&kfd_mem_limit.mem_limit_lock); + return available_memory; +} + static void unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 4bfc0c8ab764..5c2f6d97ff1c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -486,6 +486,20 @@ static int kfd_ioctl_get_queue_wave_state(struct file *filep, return r; } +static int kfd_ioctl_get_available_memory(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_available_memory_args *args = data; + struct kfd_dev *dev; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; + + args->available = amdgpu_amdkfd_get_available_memory(dev->adev); + return 0; +} + static int kfd_ioctl_set_memory_policy(struct file *filep, struct kfd_process *p, void *data) { @@ -1959,6 +1973,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE, kfd_ioctl_set_xnack_mode, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY, + kfd_ioctl_get_available_memory, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index af96af174dc4..94a99add2432 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -32,9 +32,10 @@ * - 1.4 - Indicate new SRAM EDC bit in device properties * - 1.5 - Add SVM API * - 1.6 - Query clear flags in SVM get_attr API + * - 1.7 - Add available_memory ioctl */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 6 +#define KFD_IOCTL_MINOR_VERSION 7 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -98,6 +99,12 @@ struct kfd_ioctl_get_queue_wave_state_args { __u32 pad; }; +struct kfd_ioctl_get_available_memory_args { + __u64 available; /* from KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; +}; + /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ #define KFD_IOC_CACHE_POLICY_COHERENT 0 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 @@ -742,7 +749,10 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_SET_XNACK_MODE \ AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) +#define AMDKFD_IOC_AVAILABLE_MEMORY \ + AMDKFD_IOR(0x22, struct kfd_ioctl_get_available_memory_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x22 +#define AMDKFD_COMMAND_END 0x23 #endif
Add an ioctl to inquire memory available for allocation by libhsakmt per node, allowing for space consumed by page translation tables. This ioctl is the underlying mechanism for the new memory availability library call posted for review here: https://lists.freedesktop.org/archives/amd-gfx/2022-January/073352.html Signed-off-by: Daniel Phillips <daniel.phillips@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 ++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 17 +++++++++++++++++ include/uapi/linux/kfd_ioctl.h | 14 ++++++++++++-- 4 files changed, 44 insertions(+), 2 deletions(-)