diff mbox series

[v5,13/17] platform/x86/amd/pmf: Add PMF-AMDGPU get interface

Message ID 20231117080747.3643990-14-Shyam-sundar.S-k@amd.com (mailing list archive)
State Superseded
Headers show
Series Introduce PMF Smart PC Solution Builder Feature | expand

Commit Message

Shyam Sundar S K Nov. 17, 2023, 8:07 a.m. UTC
In order to provide GPU inputs to TA for the Smart PC solution to work, we
need to have interface between the PMF driver and the AMDGPU driver.

Add the initial code path for get interface from AMDGPU.

Co-developed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
---
 drivers/platform/x86/amd/pmf/core.c   |  3 +-
 drivers/platform/x86/amd/pmf/pmf.h    | 18 +++++++
 drivers/platform/x86/amd/pmf/spc.c    | 41 ++++++++++++++
 drivers/platform/x86/amd/pmf/tee-if.c | 77 +++++++++++++++++++++++++++
 4 files changed, 138 insertions(+), 1 deletion(-)

Comments

Hans de Goede Nov. 17, 2023, 10:48 a.m. UTC | #1
Hi Shyam,

On 11/17/23 09:07, Shyam Sundar S K wrote:
> In order to provide GPU inputs to TA for the Smart PC solution to work, we
> need to have interface between the PMF driver and the AMDGPU driver.
> 
> Add the initial code path for get interface from AMDGPU.
> 
> Co-developed-by: Mario Limonciello <mario.limonciello@amd.com>
> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
> Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
> ---
>  drivers/platform/x86/amd/pmf/core.c   |  3 +-
>  drivers/platform/x86/amd/pmf/pmf.h    | 18 +++++++
>  drivers/platform/x86/amd/pmf/spc.c    | 41 ++++++++++++++
>  drivers/platform/x86/amd/pmf/tee-if.c | 77 +++++++++++++++++++++++++++
>  4 files changed, 138 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c
> index 4b8156033fa6..9b14a997cd48 100644
> --- a/drivers/platform/x86/amd/pmf/core.c
> +++ b/drivers/platform/x86/amd/pmf/core.c
> @@ -411,6 +411,7 @@ static int amd_pmf_probe(struct platform_device *pdev)
>  	}
>  
>  	dev->cpu_id = rdev->device;
> +	dev->root = rdev;
>  
>  	err = amd_smn_read(0, AMD_PMF_BASE_ADDR_LO, &val);
>  	if (err) {
> @@ -482,4 +483,4 @@ module_platform_driver(amd_pmf_driver);
>  
>  MODULE_LICENSE("GPL");
>  MODULE_DESCRIPTION("AMD Platform Management Framework Driver");
> -MODULE_SOFTDEP("pre: amdtee");
> +MODULE_SOFTDEP("pre: amdtee amdgpu");
> diff --git a/drivers/platform/x86/amd/pmf/pmf.h b/drivers/platform/x86/amd/pmf/pmf.h
> index 8712299ad52b..525308519fa3 100644
> --- a/drivers/platform/x86/amd/pmf/pmf.h
> +++ b/drivers/platform/x86/amd/pmf/pmf.h
> @@ -11,7 +11,11 @@
>  #ifndef PMF_H
>  #define PMF_H
>  
> +#include <acpi/video.h>
> +#include <drm/drm_connector.h>
>  #include <linux/acpi.h>
> +#include <linux/backlight.h>
> +#include <linux/pci.h>
>  #include <linux/platform_profile.h>
>  
>  #define POLICY_BUF_MAX_SZ		0x4b000
> @@ -83,6 +87,8 @@
>  #define TA_OUTPUT_RESERVED_MEM				906
>  #define MAX_OPERATION_PARAMS					4
>  
> +#define MAX_SUPPORTED_DISPLAY		4
> +
>  /* AMD PMF BIOS interfaces */
>  struct apmf_verify_interface {
>  	u16 size;
> @@ -194,6 +200,15 @@ enum power_modes {
>  	POWER_MODE_MAX,
>  };
>  
> +struct amd_gpu_pmf_data {
> +	struct pci_dev *gpu_dev;
> +	struct backlight_device *raw_bd;
> +	struct thermal_cooling_device *cooling_dev;
> +	enum drm_connector_status con_status[MAX_SUPPORTED_DISPLAY];
> +	int display_count;
> +	int connector_type[MAX_SUPPORTED_DISPLAY];
> +};
> +
>  struct amd_pmf_dev {
>  	void __iomem *regbase;
>  	void __iomem *smu_virt_addr;
> @@ -228,9 +243,12 @@ struct amd_pmf_dev {
>  	void *shbuf;
>  	struct delayed_work pb_work;
>  	struct pmf_action_table *prev_data;
> +	struct amd_gpu_pmf_data gfx_data;
>  	u64 policy_addr;
>  	void *policy_base;
>  	bool smart_pc_enabled;
> +	struct pci_dev *root;
> +	struct drm_device *drm_dev;
>  };
>  
>  struct apmf_sps_prop_granular {
> diff --git a/drivers/platform/x86/amd/pmf/spc.c b/drivers/platform/x86/amd/pmf/spc.c
> index 959146fd483f..47ec563088b8 100644
> --- a/drivers/platform/x86/amd/pmf/spc.c
> +++ b/drivers/platform/x86/amd/pmf/spc.c
> @@ -44,6 +44,10 @@ void amd_pmf_dump_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *
>  	dev_dbg(dev->dev, "Max C0 Residency: %u\n", in->ev_info.max_c0residency);
>  	dev_dbg(dev->dev, "GFX Busy: %u\n", in->ev_info.gfx_busy);
>  	dev_dbg(dev->dev, "Connected Display Count: %u\n", in->ev_info.monitor_count);
> +	dev_dbg(dev->dev, "Primary Display Type: %s\n",
> +		drm_get_connector_type_name(in->ev_info.display_type));
> +	dev_dbg(dev->dev, "Primary Display State: %s\n", in->ev_info.display_state ?
> +			"Connected" : "disconnected/unknown");
>  	dev_dbg(dev->dev, "LID State: %s\n", in->ev_info.lid_state ? "close" : "open");
>  	dev_dbg(dev->dev, "==== TA inputs END ====\n");
>  }
> @@ -146,6 +150,41 @@ static int amd_pmf_get_slider_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_
>  	return 0;
>  }
>  
> +static int amd_pmf_get_gfx_data(struct amd_pmf_dev *pmf)
> +{
> +	struct drm_connector_list_iter iter;
> +	struct drm_connector *connector;
> +	int i = 0;
> +
> +	/* Reset the count to zero */
> +	pmf->gfx_data.display_count = 0;
> +
> +	drm_connector_list_iter_begin(pmf->drm_dev, &iter);
> +	drm_for_each_connector_iter(connector, &iter) {
> +		if (connector->status == connector_status_connected)
> +			pmf->gfx_data.display_count++;
> +		if (connector->status != pmf->gfx_data.con_status[i])
> +			pmf->gfx_data.con_status[i] = connector->status;
> +		if (connector->connector_type != pmf->gfx_data.connector_type[i])
> +			pmf->gfx_data.connector_type[i] = connector->connector_type;
> +
> +		i++;
> +		if (i >= MAX_SUPPORTED_DISPLAY)
> +			break;
> +	}
> +	drm_connector_list_iter_end(&iter);
> +
> +	return 0;
> +}
> +
> +static void amd_pmf_get_gpu_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in)
> +{
> +	amd_pmf_get_gfx_data(dev);
> +	in->ev_info.monitor_count = dev->gfx_data.display_count;
> +	in->ev_info.display_type = dev->gfx_data.connector_type[0];
> +	in->ev_info.display_state = dev->gfx_data.con_status[0];
> +}
> +
>  void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in)
>  {
>  	/* TA side lid open is 1 and close is 0, hence the ! here */
> @@ -154,4 +193,6 @@ void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_tab
>  	amd_pmf_get_smu_info(dev, in);
>  	amd_pmf_get_battery_info(dev, in);
>  	amd_pmf_get_slider_info(dev, in);
> +	if (dev->drm_dev)
> +		amd_pmf_get_gpu_info(dev, in);
>  }
> diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c
> index 81b1bd356e83..82ee2b1c627f 100644
> --- a/drivers/platform/x86/amd/pmf/tee-if.c
> +++ b/drivers/platform/x86/amd/pmf/tee-if.c
> @@ -10,6 +10,7 @@
>  
>  #include <linux/debugfs.h>
>  #include <linux/tee_drv.h>
> +#include <linux/thermal.h>
>  #include <linux/uuid.h>
>  #include "pmf.h"
>  
> @@ -422,6 +423,60 @@ static void amd_pmf_tee_deinit(struct amd_pmf_dev *dev)
>  	tee_client_close_context(dev->tee_ctx);
>  }
>  
> +static int amd_pmf_gpu_get_cur_state(struct thermal_cooling_device *cooling_dev,
> +				     unsigned long *state)
> +{
> +	struct backlight_device *bd;
> +
> +	if (acpi_video_get_backlight_type() != acpi_backlight_native)
> +		return -ENODEV;
> +
> +	bd = backlight_device_get_by_type(BACKLIGHT_RAW);
> +	if (!bd)
> +		return -ENODEV;
> +
> +	*state = backlight_get_brightness(bd);
> +
> +	return 0;
> +}
> +
> +static int amd_pmf_gpu_get_max_state(struct thermal_cooling_device *cooling_dev,
> +				     unsigned long *state)
> +{
> +	struct backlight_device *bd;
> +
> +	if (acpi_video_get_backlight_type() != acpi_backlight_native)
> +		return -ENODEV;
> +
> +	bd = backlight_device_get_by_type(BACKLIGHT_RAW);
> +	if (!bd)
> +		return -ENODEV;
> +
> +	if (backlight_is_blank(bd))
> +		*state = 0;
> +	else
> +		*state = bd->props.max_brightness;
> +
> +	return 0;
> +}
> +
> +static const struct thermal_cooling_device_ops bd_cooling_ops = {
> +	.get_max_state = amd_pmf_gpu_get_max_state,
> +	.get_cur_state = amd_pmf_gpu_get_cur_state,
> +};

This is still the wrong thing to do. The new PMF code MUST only
be a *consumer* of the thermal_cooling_device API.

The thermal-cooling device for the backlight itself MUST be
registered by the amdgpu driver.

I believe that the correct fix for this is to add a new flag to
the backlight_properties struct;
And then make backlight_device_register() register
a thermal_cooling_device for the backlight when this new flag is set.

This way we get a nice generic way to use backlight class devices
as thermal cooling devices and you can make the amdgpu driver
register the thermal cooling device by simply adding the new flag
to the backlight_properties struct used in the amdgpu driver.

> +static int amd_pmf_get_gpu_handle(struct pci_dev *pdev, void *data)
> +{
> +	struct amd_pmf_dev *dev = data;
> +
> +	if (pdev->vendor == PCI_VENDOR_ID_ATI && pdev->devfn == 0) {
> +		dev->gfx_data.gpu_dev = pdev;
> +		return 1; /* Stop walking */
> +	}
> +
> +	return 0; /* Continue walking */
> +}
> +
>  int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
>  {
>  	int ret;
> @@ -433,10 +488,30 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
>  	INIT_DELAYED_WORK(&dev->pb_work, amd_pmf_invoke_cmd);
>  	amd_pmf_set_dram_addr(dev);
>  	amd_pmf_get_bios_buffer(dev);
> +
>  	dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL);
>  	if (!dev->prev_data)
>  		return -ENOMEM;
>  
> +	pci_walk_bus(dev->root->bus, amd_pmf_get_gpu_handle, dev);
> +	if (dev->gfx_data.gpu_dev) {
> +		dev->drm_dev = pci_get_drvdata(dev->gfx_data.gpu_dev);
> +		if (!dev->drm_dev)
> +			return -EINVAL;

You cannot just call pci_get_drvdata() on a device for which you
are not the driver. You have no idea of the lifetime of this device,
the driver may unbind and release the memory pci_get_drvdata()
points to, leaving this code with a dangling pointer which will
crash the kernel the first time you try to use it.

Also since you are not the owner you MUST not assume any specific
type for this memory, you cannot be sure this actually is of
the type drm_device. Basically you MUST not touch another
driver's drvdata *at all*.

The proper way to fix this would be to add some API to the DRM
subsystem to query the information you are looking for form
the DRM subsystem.

Poking directly inside other driver's internals is NOT acceptable,
NACK for this patch.

Regards,

Hans


> +
> +		if (acpi_video_get_backlight_type() != acpi_backlight_native)
> +			return -ENODEV;
> +
> +		dev->gfx_data.raw_bd = backlight_device_get_by_type(BACKLIGHT_RAW);
> +		if (!dev->gfx_data.raw_bd)
> +			return -ENODEV;
> +
> +		dev->gfx_data.cooling_dev = thermal_cooling_device_register("pmf_gpu_bd",
> +									    NULL, &bd_cooling_ops);
> +		if (IS_ERR(dev->gfx_data.cooling_dev))
> +			return -ENODEV;
> +	}
> +
>  	return dev->smart_pc_enabled;
>  }
>  
> @@ -448,5 +523,7 @@ void amd_pmf_deinit_smart_pc(struct amd_pmf_dev *dev)
>  	kfree(dev->prev_data);
>  	kfree(dev->policy_buf);
>  	cancel_delayed_work_sync(&dev->pb_work);
> +	if (dev->gfx_data.cooling_dev)
> +		thermal_cooling_device_unregister(dev->gfx_data.cooling_dev);
>  	amd_pmf_tee_deinit(dev);
>  }
Shyam Sundar S K Nov. 17, 2023, 11:08 a.m. UTC | #2
Adding AMDGPU folks (Alex and Christian) - I had to drop them as the
changes from amdgpu were moved to PMF driver.

Hi Hans,


On 11/17/2023 4:18 PM, Hans de Goede wrote:
> Hi Shyam,
> 
> On 11/17/23 09:07, Shyam Sundar S K wrote:
>> In order to provide GPU inputs to TA for the Smart PC solution to work, we
>> need to have interface between the PMF driver and the AMDGPU driver.
>>
>> Add the initial code path for get interface from AMDGPU.
>>
>> Co-developed-by: Mario Limonciello <mario.limonciello@amd.com>
>> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
>> Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
>> ---
>>  drivers/platform/x86/amd/pmf/core.c   |  3 +-
>>  drivers/platform/x86/amd/pmf/pmf.h    | 18 +++++++
>>  drivers/platform/x86/amd/pmf/spc.c    | 41 ++++++++++++++
>>  drivers/platform/x86/amd/pmf/tee-if.c | 77 +++++++++++++++++++++++++++
>>  4 files changed, 138 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c
>> index 4b8156033fa6..9b14a997cd48 100644
>> --- a/drivers/platform/x86/amd/pmf/core.c
>> +++ b/drivers/platform/x86/amd/pmf/core.c
>> @@ -411,6 +411,7 @@ static int amd_pmf_probe(struct platform_device *pdev)
>>  	}
>>  
>>  	dev->cpu_id = rdev->device;
>> +	dev->root = rdev;
>>  
>>  	err = amd_smn_read(0, AMD_PMF_BASE_ADDR_LO, &val);
>>  	if (err) {
>> @@ -482,4 +483,4 @@ module_platform_driver(amd_pmf_driver);
>>  
>>  MODULE_LICENSE("GPL");
>>  MODULE_DESCRIPTION("AMD Platform Management Framework Driver");
>> -MODULE_SOFTDEP("pre: amdtee");
>> +MODULE_SOFTDEP("pre: amdtee amdgpu");
>> diff --git a/drivers/platform/x86/amd/pmf/pmf.h b/drivers/platform/x86/amd/pmf/pmf.h
>> index 8712299ad52b..525308519fa3 100644
>> --- a/drivers/platform/x86/amd/pmf/pmf.h
>> +++ b/drivers/platform/x86/amd/pmf/pmf.h
>> @@ -11,7 +11,11 @@
>>  #ifndef PMF_H
>>  #define PMF_H
>>  
>> +#include <acpi/video.h>
>> +#include <drm/drm_connector.h>
>>  #include <linux/acpi.h>
>> +#include <linux/backlight.h>
>> +#include <linux/pci.h>
>>  #include <linux/platform_profile.h>
>>  
>>  #define POLICY_BUF_MAX_SZ		0x4b000
>> @@ -83,6 +87,8 @@
>>  #define TA_OUTPUT_RESERVED_MEM				906
>>  #define MAX_OPERATION_PARAMS					4
>>  
>> +#define MAX_SUPPORTED_DISPLAY		4
>> +
>>  /* AMD PMF BIOS interfaces */
>>  struct apmf_verify_interface {
>>  	u16 size;
>> @@ -194,6 +200,15 @@ enum power_modes {
>>  	POWER_MODE_MAX,
>>  };
>>  
>> +struct amd_gpu_pmf_data {
>> +	struct pci_dev *gpu_dev;
>> +	struct backlight_device *raw_bd;
>> +	struct thermal_cooling_device *cooling_dev;
>> +	enum drm_connector_status con_status[MAX_SUPPORTED_DISPLAY];
>> +	int display_count;
>> +	int connector_type[MAX_SUPPORTED_DISPLAY];
>> +};
>> +
>>  struct amd_pmf_dev {
>>  	void __iomem *regbase;
>>  	void __iomem *smu_virt_addr;
>> @@ -228,9 +243,12 @@ struct amd_pmf_dev {
>>  	void *shbuf;
>>  	struct delayed_work pb_work;
>>  	struct pmf_action_table *prev_data;
>> +	struct amd_gpu_pmf_data gfx_data;
>>  	u64 policy_addr;
>>  	void *policy_base;
>>  	bool smart_pc_enabled;
>> +	struct pci_dev *root;
>> +	struct drm_device *drm_dev;
>>  };
>>  
>>  struct apmf_sps_prop_granular {
>> diff --git a/drivers/platform/x86/amd/pmf/spc.c b/drivers/platform/x86/amd/pmf/spc.c
>> index 959146fd483f..47ec563088b8 100644
>> --- a/drivers/platform/x86/amd/pmf/spc.c
>> +++ b/drivers/platform/x86/amd/pmf/spc.c
>> @@ -44,6 +44,10 @@ void amd_pmf_dump_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *
>>  	dev_dbg(dev->dev, "Max C0 Residency: %u\n", in->ev_info.max_c0residency);
>>  	dev_dbg(dev->dev, "GFX Busy: %u\n", in->ev_info.gfx_busy);
>>  	dev_dbg(dev->dev, "Connected Display Count: %u\n", in->ev_info.monitor_count);
>> +	dev_dbg(dev->dev, "Primary Display Type: %s\n",
>> +		drm_get_connector_type_name(in->ev_info.display_type));
>> +	dev_dbg(dev->dev, "Primary Display State: %s\n", in->ev_info.display_state ?
>> +			"Connected" : "disconnected/unknown");
>>  	dev_dbg(dev->dev, "LID State: %s\n", in->ev_info.lid_state ? "close" : "open");
>>  	dev_dbg(dev->dev, "==== TA inputs END ====\n");
>>  }
>> @@ -146,6 +150,41 @@ static int amd_pmf_get_slider_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_
>>  	return 0;
>>  }
>>  
>> +static int amd_pmf_get_gfx_data(struct amd_pmf_dev *pmf)
>> +{
>> +	struct drm_connector_list_iter iter;
>> +	struct drm_connector *connector;
>> +	int i = 0;
>> +
>> +	/* Reset the count to zero */
>> +	pmf->gfx_data.display_count = 0;
>> +
>> +	drm_connector_list_iter_begin(pmf->drm_dev, &iter);
>> +	drm_for_each_connector_iter(connector, &iter) {
>> +		if (connector->status == connector_status_connected)
>> +			pmf->gfx_data.display_count++;
>> +		if (connector->status != pmf->gfx_data.con_status[i])
>> +			pmf->gfx_data.con_status[i] = connector->status;
>> +		if (connector->connector_type != pmf->gfx_data.connector_type[i])
>> +			pmf->gfx_data.connector_type[i] = connector->connector_type;
>> +
>> +		i++;
>> +		if (i >= MAX_SUPPORTED_DISPLAY)
>> +			break;
>> +	}
>> +	drm_connector_list_iter_end(&iter);
>> +
>> +	return 0;
>> +}
>> +
>> +static void amd_pmf_get_gpu_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in)
>> +{
>> +	amd_pmf_get_gfx_data(dev);
>> +	in->ev_info.monitor_count = dev->gfx_data.display_count;
>> +	in->ev_info.display_type = dev->gfx_data.connector_type[0];
>> +	in->ev_info.display_state = dev->gfx_data.con_status[0];
>> +}
>> +
>>  void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in)
>>  {
>>  	/* TA side lid open is 1 and close is 0, hence the ! here */
>> @@ -154,4 +193,6 @@ void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_tab
>>  	amd_pmf_get_smu_info(dev, in);
>>  	amd_pmf_get_battery_info(dev, in);
>>  	amd_pmf_get_slider_info(dev, in);
>> +	if (dev->drm_dev)
>> +		amd_pmf_get_gpu_info(dev, in);
>>  }
>> diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c
>> index 81b1bd356e83..82ee2b1c627f 100644
>> --- a/drivers/platform/x86/amd/pmf/tee-if.c
>> +++ b/drivers/platform/x86/amd/pmf/tee-if.c
>> @@ -10,6 +10,7 @@
>>  
>>  #include <linux/debugfs.h>
>>  #include <linux/tee_drv.h>
>> +#include <linux/thermal.h>
>>  #include <linux/uuid.h>
>>  #include "pmf.h"
>>  
>> @@ -422,6 +423,60 @@ static void amd_pmf_tee_deinit(struct amd_pmf_dev *dev)
>>  	tee_client_close_context(dev->tee_ctx);
>>  }
>>  
>> +static int amd_pmf_gpu_get_cur_state(struct thermal_cooling_device *cooling_dev,
>> +				     unsigned long *state)
>> +{
>> +	struct backlight_device *bd;
>> +
>> +	if (acpi_video_get_backlight_type() != acpi_backlight_native)
>> +		return -ENODEV;
>> +
>> +	bd = backlight_device_get_by_type(BACKLIGHT_RAW);
>> +	if (!bd)
>> +		return -ENODEV;
>> +
>> +	*state = backlight_get_brightness(bd);
>> +
>> +	return 0;
>> +}
>> +
>> +static int amd_pmf_gpu_get_max_state(struct thermal_cooling_device *cooling_dev,
>> +				     unsigned long *state)
>> +{
>> +	struct backlight_device *bd;
>> +
>> +	if (acpi_video_get_backlight_type() != acpi_backlight_native)
>> +		return -ENODEV;
>> +
>> +	bd = backlight_device_get_by_type(BACKLIGHT_RAW);
>> +	if (!bd)
>> +		return -ENODEV;
>> +
>> +	if (backlight_is_blank(bd))
>> +		*state = 0;
>> +	else
>> +		*state = bd->props.max_brightness;
>> +
>> +	return 0;
>> +}
>> +
>> +static const struct thermal_cooling_device_ops bd_cooling_ops = {
>> +	.get_max_state = amd_pmf_gpu_get_max_state,
>> +	.get_cur_state = amd_pmf_gpu_get_cur_state,
>> +};
> 
> This is still the wrong thing to do. The new PMF code MUST only
> be a *consumer* of the thermal_cooling_device API.
> 
> The thermal-cooling device for the backlight itself MUST be
> registered by the amdgpu driver.
> 
> I believe that the correct fix for this is to add a new flag to
> the backlight_properties struct;
> And then make backlight_device_register() register
> a thermal_cooling_device for the backlight when this new flag is set.
> 
> This way we get a nice generic way to use backlight class devices
> as thermal cooling devices and you can make the amdgpu driver
> register the thermal cooling device by simply adding the new flag
> to the backlight_properties struct used in the amdgpu driver.

Agreed. I am also of the same opinion.

> 
>> +static int amd_pmf_get_gpu_handle(struct pci_dev *pdev, void *data)
>> +{
>> +	struct amd_pmf_dev *dev = data;
>> +
>> +	if (pdev->vendor == PCI_VENDOR_ID_ATI && pdev->devfn == 0) {
>> +		dev->gfx_data.gpu_dev = pdev;
>> +		return 1; /* Stop walking */
>> +	}
>> +
>> +	return 0; /* Continue walking */
>> +}
>> +
>>  int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
>>  {
>>  	int ret;
>> @@ -433,10 +488,30 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
>>  	INIT_DELAYED_WORK(&dev->pb_work, amd_pmf_invoke_cmd);
>>  	amd_pmf_set_dram_addr(dev);
>>  	amd_pmf_get_bios_buffer(dev);
>> +
>>  	dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL);
>>  	if (!dev->prev_data)
>>  		return -ENOMEM;
>>  
>> +	pci_walk_bus(dev->root->bus, amd_pmf_get_gpu_handle, dev);
>> +	if (dev->gfx_data.gpu_dev) {
>> +		dev->drm_dev = pci_get_drvdata(dev->gfx_data.gpu_dev);
>> +		if (!dev->drm_dev)
>> +			return -EINVAL;
> 
> You cannot just call pci_get_drvdata() on a device for which you
> are not the driver. You have no idea of the lifetime of this device,
> the driver may unbind and release the memory pci_get_drvdata()
> points to, leaving this code with a dangling pointer which will
> crash the kernel the first time you try to use it.
> 
> Also since you are not the owner you MUST not assume any specific
> type for this memory, you cannot be sure this actually is of
> the type drm_device. Basically you MUST not touch another
> driver's drvdata *at all*.
> 
> The proper way to fix this would be to add some API to the DRM
> subsystem to query the information you are looking for form
> the DRM subsystem.
> 
> Poking directly inside other driver's internals is NOT acceptable,
> NACK for this patch.
> 

I am inline with your remarks, but I could find a way where the
thermal driver registration, handling the backlight without having the
changes in the amdgpu driver very tricky.

Like you said, I am also of the same opinion that PMF driver should
call the DRM/GPU subsystem APIs (like it does with other subsystems).

But Christian had concerns on adding all of these into the GPU driver.
So I had to roll back these into the PMF driver, and hence you see a
pci_get_drvdata() call.

I can add the thermal device registration into the amdgpu driver and
then call the DRM APIs from the PMF driver.

Christian, do you have any feedback here please?

For the sake of simplicity, I can drop patches 13/17 and 14/17 from
the series and send them separately later.

Thanks,
Shyam

> Regards,
> 
> Hans
> 
> 
>> +
>> +		if (acpi_video_get_backlight_type() != acpi_backlight_native)
>> +			return -ENODEV;
>> +
>> +		dev->gfx_data.raw_bd = backlight_device_get_by_type(BACKLIGHT_RAW);
>> +		if (!dev->gfx_data.raw_bd)
>> +			return -ENODEV;
>> +
>> +		dev->gfx_data.cooling_dev = thermal_cooling_device_register("pmf_gpu_bd",
>> +									    NULL, &bd_cooling_ops);
>> +		if (IS_ERR(dev->gfx_data.cooling_dev))
>> +			return -ENODEV;
>> +	}
>> +
>>  	return dev->smart_pc_enabled;
>>  }
>>  
>> @@ -448,5 +523,7 @@ void amd_pmf_deinit_smart_pc(struct amd_pmf_dev *dev)
>>  	kfree(dev->prev_data);
>>  	kfree(dev->policy_buf);
>>  	cancel_delayed_work_sync(&dev->pb_work);
>> +	if (dev->gfx_data.cooling_dev)
>> +		thermal_cooling_device_unregister(dev->gfx_data.cooling_dev);
>>  	amd_pmf_tee_deinit(dev);
>>  }
> 
>
Hans de Goede Nov. 17, 2023, 11:31 a.m. UTC | #3
Hi,

On 11/17/23 12:08, Shyam Sundar S K wrote:
> Adding AMDGPU folks (Alex and Christian) - I had to drop them as the
> changes from amdgpu were moved to PMF driver.
> 
> Hi Hans,
> 
> 
> On 11/17/2023 4:18 PM, Hans de Goede wrote:
>> Hi Shyam,
>>
>> On 11/17/23 09:07, Shyam Sundar S K wrote:
>>> In order to provide GPU inputs to TA for the Smart PC solution to work, we
>>> need to have interface between the PMF driver and the AMDGPU driver.
>>>
>>> Add the initial code path for get interface from AMDGPU.
>>>
>>> Co-developed-by: Mario Limonciello <mario.limonciello@amd.com>
>>> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
>>> Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>

<snip>

>>> diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c
>>> index 81b1bd356e83..82ee2b1c627f 100644
>>> --- a/drivers/platform/x86/amd/pmf/tee-if.c
>>> +++ b/drivers/platform/x86/amd/pmf/tee-if.c
>>> @@ -10,6 +10,7 @@
>>>  
>>>  #include <linux/debugfs.h>
>>>  #include <linux/tee_drv.h>
>>> +#include <linux/thermal.h>
>>>  #include <linux/uuid.h>
>>>  #include "pmf.h"
>>>  
>>> @@ -422,6 +423,60 @@ static void amd_pmf_tee_deinit(struct amd_pmf_dev *dev)
>>>  	tee_client_close_context(dev->tee_ctx);
>>>  }
>>>  
>>> +static int amd_pmf_gpu_get_cur_state(struct thermal_cooling_device *cooling_dev,
>>> +				     unsigned long *state)
>>> +{
>>> +	struct backlight_device *bd;
>>> +
>>> +	if (acpi_video_get_backlight_type() != acpi_backlight_native)
>>> +		return -ENODEV;
>>> +
>>> +	bd = backlight_device_get_by_type(BACKLIGHT_RAW);
>>> +	if (!bd)
>>> +		return -ENODEV;
>>> +
>>> +	*state = backlight_get_brightness(bd);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int amd_pmf_gpu_get_max_state(struct thermal_cooling_device *cooling_dev,
>>> +				     unsigned long *state)
>>> +{
>>> +	struct backlight_device *bd;
>>> +
>>> +	if (acpi_video_get_backlight_type() != acpi_backlight_native)
>>> +		return -ENODEV;
>>> +
>>> +	bd = backlight_device_get_by_type(BACKLIGHT_RAW);
>>> +	if (!bd)
>>> +		return -ENODEV;
>>> +
>>> +	if (backlight_is_blank(bd))
>>> +		*state = 0;
>>> +	else
>>> +		*state = bd->props.max_brightness;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static const struct thermal_cooling_device_ops bd_cooling_ops = {
>>> +	.get_max_state = amd_pmf_gpu_get_max_state,
>>> +	.get_cur_state = amd_pmf_gpu_get_cur_state,
>>> +};
>>
>> This is still the wrong thing to do. The new PMF code MUST only
>> be a *consumer* of the thermal_cooling_device API.
>>
>> The thermal-cooling device for the backlight itself MUST be
>> registered by the amdgpu driver.
>>
>> I believe that the correct fix for this is to add a new flag to
>> the backlight_properties struct;
>> And then make backlight_device_register() register
>> a thermal_cooling_device for the backlight when this new flag is set.
>>
>> This way we get a nice generic way to use backlight class devices
>> as thermal cooling devices and you can make the amdgpu driver
>> register the thermal cooling device by simply adding the new flag
>> to the backlight_properties struct used in the amdgpu driver.
> 
> Agreed. I am also of the same opinion.

So the change to the AMDGPU driver here would just be setting
this one new flag in the backlight_properties struct.

Alex, Christian, would that be acceptable to you ?


>>> +static int amd_pmf_get_gpu_handle(struct pci_dev *pdev, void *data)
>>> +{
>>> +	struct amd_pmf_dev *dev = data;
>>> +
>>> +	if (pdev->vendor == PCI_VENDOR_ID_ATI && pdev->devfn == 0) {
>>> +		dev->gfx_data.gpu_dev = pdev;
>>> +		return 1; /* Stop walking */
>>> +	}
>>> +
>>> +	return 0; /* Continue walking */
>>> +}
>>> +
>>>  int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
>>>  {
>>>  	int ret;
>>> @@ -433,10 +488,30 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
>>>  	INIT_DELAYED_WORK(&dev->pb_work, amd_pmf_invoke_cmd);
>>>  	amd_pmf_set_dram_addr(dev);
>>>  	amd_pmf_get_bios_buffer(dev);
>>> +
>>>  	dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL);
>>>  	if (!dev->prev_data)
>>>  		return -ENOMEM;
>>>  
>>> +	pci_walk_bus(dev->root->bus, amd_pmf_get_gpu_handle, dev);
>>> +	if (dev->gfx_data.gpu_dev) {
>>> +		dev->drm_dev = pci_get_drvdata(dev->gfx_data.gpu_dev);
>>> +		if (!dev->drm_dev)
>>> +			return -EINVAL;
>>
>> You cannot just call pci_get_drvdata() on a device for which you
>> are not the driver. You have no idea of the lifetime of this device,
>> the driver may unbind and release the memory pci_get_drvdata()
>> points to, leaving this code with a dangling pointer which will
>> crash the kernel the first time you try to use it.
>>
>> Also since you are not the owner you MUST not assume any specific
>> type for this memory, you cannot be sure this actually is of
>> the type drm_device. Basically you MUST not touch another
>> driver's drvdata *at all*.
>>
>> The proper way to fix this would be to add some API to the DRM
>> subsystem to query the information you are looking for form
>> the DRM subsystem.
>>
>> Poking directly inside other driver's internals is NOT acceptable,
>> NACK for this patch.
>>
> 
> I am inline with your remarks, but I could find a way where the
> thermal driver registration, handling the backlight without having the
> changes in the amdgpu driver very tricky.

As mentioned above I think there should be generic thermal cooling
device support added to drivers/video/backlight/backlight.c, then
the amdgpu code just needs to pass a flag when registering
the backlight to enable this.

> Like you said, I am also of the same opinion that PMF driver should
> call the DRM/GPU subsystem APIs (like it does with other subsystems).
> 
> But Christian had concerns on adding all of these into the GPU driver.
> So I had to roll back these into the PMF driver, and hence you see a
> pci_get_drvdata() call.

Ok, so I can see how this AMD PMF code is all kinda special
and how the DRM folks don't want to have to add APIs just for
that. But IMHO calling pci_get_drvdata() on an unowned
device is completely unacceptable.

At a minimum we need life-cycle management for the drm_device
which the PMF code is using, something like:

struct drm_device *drm_device_find(const void *data,
   int (*match)(struct drm_device *dev, const void *data));

which works similar to class_find_device() and returns
a reference to the drm_device for which match has returned 0
(which also stops it from looping over devices).

Combined with a:

void drm_device_put(struct drm_device *dev);

for when the PMF code is done with the device.

This way the PMF code can safely get a reference to drm_device
and release it when it is done. Rather then just getting
some random pointer which may or not actually be a drm_device
and the lifetime of which is not guaranteed in anyway.

E.g. if the PMF driver loads before amdgpu then
pci_get_drvdata() will just return NULL.

And as mentioned if the amdgpu driver gets unbound after
the PMF code has called  pci_get_drvdata() the PMF driver
now has a dangling pointer.

So IMHO adding: drm_device_find() + drm_device_put()
to the DRM core are minimum which is necessary here.

If the PMF code then ends up doing things like
drm_for_each_connector_iter() on the gotten drm_device
referemce so be it. But we must make sure we have
a properly lifecycle managed reference first and
pci_get_drvdata() does not give us that.

> For the sake of simplicity, I can drop patches 13/17 and 14/17 from
> the series and send them separately later.

Yes I think that dropping the GPU related patches for
now would be best.

Regards,

Hans
Mario Limonciello Nov. 17, 2023, 6:13 p.m. UTC | #4
On 11/17/2023 02:07, Shyam Sundar S K wrote:
> In order to provide GPU inputs to TA for the Smart PC solution to work, we
> need to have interface between the PMF driver and the AMDGPU driver.
> 
> Add the initial code path for get interface from AMDGPU.

Make sure you update the commit message when you resumbit v6.

> 
> Co-developed-by: Mario Limonciello <mario.limonciello@amd.com>
> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
> Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
> ---
>   drivers/platform/x86/amd/pmf/core.c   |  3 +-
>   drivers/platform/x86/amd/pmf/pmf.h    | 18 +++++++
>   drivers/platform/x86/amd/pmf/spc.c    | 41 ++++++++++++++
>   drivers/platform/x86/amd/pmf/tee-if.c | 77 +++++++++++++++++++++++++++
>   4 files changed, 138 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c
> index 4b8156033fa6..9b14a997cd48 100644
> --- a/drivers/platform/x86/amd/pmf/core.c
> +++ b/drivers/platform/x86/amd/pmf/core.c
> @@ -411,6 +411,7 @@ static int amd_pmf_probe(struct platform_device *pdev)
>   	}
>   
>   	dev->cpu_id = rdev->device;
> +	dev->root = rdev;
>   
>   	err = amd_smn_read(0, AMD_PMF_BASE_ADDR_LO, &val);
>   	if (err) {
> @@ -482,4 +483,4 @@ module_platform_driver(amd_pmf_driver);
>   
>   MODULE_LICENSE("GPL");
>   MODULE_DESCRIPTION("AMD Platform Management Framework Driver");
> -MODULE_SOFTDEP("pre: amdtee");
> +MODULE_SOFTDEP("pre: amdtee amdgpu");
> diff --git a/drivers/platform/x86/amd/pmf/pmf.h b/drivers/platform/x86/amd/pmf/pmf.h
> index 8712299ad52b..525308519fa3 100644
> --- a/drivers/platform/x86/amd/pmf/pmf.h
> +++ b/drivers/platform/x86/amd/pmf/pmf.h
> @@ -11,7 +11,11 @@
>   #ifndef PMF_H
>   #define PMF_H
>   
> +#include <acpi/video.h>
> +#include <drm/drm_connector.h>
>   #include <linux/acpi.h>
> +#include <linux/backlight.h>
> +#include <linux/pci.h>
>   #include <linux/platform_profile.h>
>   
>   #define POLICY_BUF_MAX_SZ		0x4b000
> @@ -83,6 +87,8 @@
>   #define TA_OUTPUT_RESERVED_MEM				906
>   #define MAX_OPERATION_PARAMS					4
>   
> +#define MAX_SUPPORTED_DISPLAY		4
> +
>   /* AMD PMF BIOS interfaces */
>   struct apmf_verify_interface {
>   	u16 size;
> @@ -194,6 +200,15 @@ enum power_modes {
>   	POWER_MODE_MAX,
>   };
>   
> +struct amd_gpu_pmf_data {
> +	struct pci_dev *gpu_dev;
> +	struct backlight_device *raw_bd;
> +	struct thermal_cooling_device *cooling_dev;
> +	enum drm_connector_status con_status[MAX_SUPPORTED_DISPLAY];
> +	int display_count;
> +	int connector_type[MAX_SUPPORTED_DISPLAY];
> +};
> +
>   struct amd_pmf_dev {
>   	void __iomem *regbase;
>   	void __iomem *smu_virt_addr;
> @@ -228,9 +243,12 @@ struct amd_pmf_dev {
>   	void *shbuf;
>   	struct delayed_work pb_work;
>   	struct pmf_action_table *prev_data;
> +	struct amd_gpu_pmf_data gfx_data;
>   	u64 policy_addr;
>   	void *policy_base;
>   	bool smart_pc_enabled;
> +	struct pci_dev *root;
> +	struct drm_device *drm_dev;
>   };
>   
>   struct apmf_sps_prop_granular {
> diff --git a/drivers/platform/x86/amd/pmf/spc.c b/drivers/platform/x86/amd/pmf/spc.c
> index 959146fd483f..47ec563088b8 100644
> --- a/drivers/platform/x86/amd/pmf/spc.c
> +++ b/drivers/platform/x86/amd/pmf/spc.c
> @@ -44,6 +44,10 @@ void amd_pmf_dump_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *
>   	dev_dbg(dev->dev, "Max C0 Residency: %u\n", in->ev_info.max_c0residency);
>   	dev_dbg(dev->dev, "GFX Busy: %u\n", in->ev_info.gfx_busy);
>   	dev_dbg(dev->dev, "Connected Display Count: %u\n", in->ev_info.monitor_count);
> +	dev_dbg(dev->dev, "Primary Display Type: %s\n",
> +		drm_get_connector_type_name(in->ev_info.display_type));
> +	dev_dbg(dev->dev, "Primary Display State: %s\n", in->ev_info.display_state ?
> +			"Connected" : "disconnected/unknown");
>   	dev_dbg(dev->dev, "LID State: %s\n", in->ev_info.lid_state ? "close" : "open");
>   	dev_dbg(dev->dev, "==== TA inputs END ====\n");
>   }
> @@ -146,6 +150,41 @@ static int amd_pmf_get_slider_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_
>   	return 0;
>   }
>   
> +static int amd_pmf_get_gfx_data(struct amd_pmf_dev *pmf)
> +{
> +	struct drm_connector_list_iter iter;
> +	struct drm_connector *connector;
> +	int i = 0;
> +
> +	/* Reset the count to zero */
> +	pmf->gfx_data.display_count = 0;
> +
> +	drm_connector_list_iter_begin(pmf->drm_dev, &iter);
> +	drm_for_each_connector_iter(connector, &iter) {
> +		if (connector->status == connector_status_connected)
> +			pmf->gfx_data.display_count++;
> +		if (connector->status != pmf->gfx_data.con_status[i])
> +			pmf->gfx_data.con_status[i] = connector->status;
> +		if (connector->connector_type != pmf->gfx_data.connector_type[i])
> +			pmf->gfx_data.connector_type[i] = connector->connector_type;
> +
> +		i++;
> +		if (i >= MAX_SUPPORTED_DISPLAY)
> +			break;
> +	}
> +	drm_connector_list_iter_end(&iter);
> +
> +	return 0;
> +}
> +
> +static void amd_pmf_get_gpu_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in)
> +{
> +	amd_pmf_get_gfx_data(dev);
> +	in->ev_info.monitor_count = dev->gfx_data.display_count;
> +	in->ev_info.display_type = dev->gfx_data.connector_type[0];
> +	in->ev_info.display_state = dev->gfx_data.con_status[0];

Can you elaborate on future expansion areas for the TA as it pertains to 
this info?

Do you think it's going to need information from more than just the 
first display?
Is monitor count boolean or does it actually care about the count > 1?
Is it *really* looking at whether the eDP is active (this is what would 
affect power most signifiantly IIUC)?

If this isn't an area that is likely to expand much in the future and is 
really "just about eDP", I wonder if the better answer is a DRM helper 
that is something like:

bool drm_edp_connected(void);

If it is likely to expand, it could be a set of multiple helpers.

> +}
> +
>   void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in)
>   {
>   	/* TA side lid open is 1 and close is 0, hence the ! here */
> @@ -154,4 +193,6 @@ void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_tab
>   	amd_pmf_get_smu_info(dev, in);
>   	amd_pmf_get_battery_info(dev, in);
>   	amd_pmf_get_slider_info(dev, in);
> +	if (dev->drm_dev)
> +		amd_pmf_get_gpu_info(dev, in);
>   }
> diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c
> index 81b1bd356e83..82ee2b1c627f 100644
> --- a/drivers/platform/x86/amd/pmf/tee-if.c
> +++ b/drivers/platform/x86/amd/pmf/tee-if.c
> @@ -10,6 +10,7 @@
>   
>   #include <linux/debugfs.h>
>   #include <linux/tee_drv.h>
> +#include <linux/thermal.h>
>   #include <linux/uuid.h>
>   #include "pmf.h"
>   
> @@ -422,6 +423,60 @@ static void amd_pmf_tee_deinit(struct amd_pmf_dev *dev)
>   	tee_client_close_context(dev->tee_ctx);
>   }
>   
> +static int amd_pmf_gpu_get_cur_state(struct thermal_cooling_device *cooling_dev,
> +				     unsigned long *state)
> +{
> +	struct backlight_device *bd;
> +
> +	if (acpi_video_get_backlight_type() != acpi_backlight_native)
> +		return -ENODEV;
> +
> +	bd = backlight_device_get_by_type(BACKLIGHT_RAW);
> +	if (!bd)
> +		return -ENODEV;
> +
> +	*state = backlight_get_brightness(bd);
> +
> +	return 0;
> +}
> +
> +static int amd_pmf_gpu_get_max_state(struct thermal_cooling_device *cooling_dev,
> +				     unsigned long *state)
> +{
> +	struct backlight_device *bd;
> +
> +	if (acpi_video_get_backlight_type() != acpi_backlight_native)
> +		return -ENODEV;
> +
> +	bd = backlight_device_get_by_type(BACKLIGHT_RAW);
> +	if (!bd)
> +		return -ENODEV;
> +
> +	if (backlight_is_blank(bd))
> +		*state = 0;
> +	else
> +		*state = bd->props.max_brightness;
> +
> +	return 0;
> +}
> +
> +static const struct thermal_cooling_device_ops bd_cooling_ops = {
> +	.get_max_state = amd_pmf_gpu_get_max_state,
> +	.get_cur_state = amd_pmf_gpu_get_cur_state,
> +};
> +
> +static int amd_pmf_get_gpu_handle(struct pci_dev *pdev, void *data)
> +{
> +	struct amd_pmf_dev *dev = data;
> +
> +	if (pdev->vendor == PCI_VENDOR_ID_ATI && pdev->devfn == 0) {
> +		dev->gfx_data.gpu_dev = pdev;
> +		return 1; /* Stop walking */
> +	}
> +
> +	return 0; /* Continue walking */
> +}
> +
>   int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
>   {
>   	int ret;
> @@ -433,10 +488,30 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
>   	INIT_DELAYED_WORK(&dev->pb_work, amd_pmf_invoke_cmd);
>   	amd_pmf_set_dram_addr(dev);
>   	amd_pmf_get_bios_buffer(dev);
> +
>   	dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL);
>   	if (!dev->prev_data)
>   		return -ENOMEM;
>   
> +	pci_walk_bus(dev->root->bus, amd_pmf_get_gpu_handle, dev);
> +	if (dev->gfx_data.gpu_dev) {
> +		dev->drm_dev = pci_get_drvdata(dev->gfx_data.gpu_dev);

I did see Hans response; but I want to mention that specifically a good 
litmus test whether what you're doing to get the data from GPU device 
side is safe would be to ssh into the machine, stop the GUI and then 
unbind the PCI device from amdgpu driver and make sure nothing explodes.

If it does explode, take PMF out of the picture and see if it was caused 
by what you did or an existing problem.


> +		if (!dev->drm_dev)
> +			return -EINVAL;
> +
> +		if (acpi_video_get_backlight_type() != acpi_backlight_native)
> +			return -ENODEV;
> +
> +		dev->gfx_data.raw_bd = backlight_device_get_by_type(BACKLIGHT_RAW);
> +		if (!dev->gfx_data.raw_bd)
> +			return -ENODEV;
> +
> +		dev->gfx_data.cooling_dev = thermal_cooling_device_register("pmf_gpu_bd",
> +									    NULL, &bd_cooling_ops);
> +		if (IS_ERR(dev->gfx_data.cooling_dev))
> +			return -ENODEV;
> +	}
> +
>   	return dev->smart_pc_enabled;
>   }
>   
> @@ -448,5 +523,7 @@ void amd_pmf_deinit_smart_pc(struct amd_pmf_dev *dev)
>   	kfree(dev->prev_data);
>   	kfree(dev->policy_buf);
>   	cancel_delayed_work_sync(&dev->pb_work);
> +	if (dev->gfx_data.cooling_dev)
> +		thermal_cooling_device_unregister(dev->gfx_data.cooling_dev);
>   	amd_pmf_tee_deinit(dev);
>   }
Shyam Sundar S K Nov. 20, 2023, 6:39 a.m. UTC | #5
On 11/17/2023 11:43 PM, Mario Limonciello wrote:
> On 11/17/2023 02:07, Shyam Sundar S K wrote:
>> In order to provide GPU inputs to TA for the Smart PC solution to
>> work, we
>> need to have interface between the PMF driver and the AMDGPU driver.
>>
>> Add the initial code path for get interface from AMDGPU.
> 
> Make sure you update the commit message when you resumbit v6.
> 
>>
>> Co-developed-by: Mario Limonciello <mario.limonciello@amd.com>
>> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
>> Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
>> ---
>>   drivers/platform/x86/amd/pmf/core.c   |  3 +-
>>   drivers/platform/x86/amd/pmf/pmf.h    | 18 +++++++
>>   drivers/platform/x86/amd/pmf/spc.c    | 41 ++++++++++++++
>>   drivers/platform/x86/amd/pmf/tee-if.c | 77
>> +++++++++++++++++++++++++++
>>   4 files changed, 138 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/platform/x86/amd/pmf/core.c
>> b/drivers/platform/x86/amd/pmf/core.c
>> index 4b8156033fa6..9b14a997cd48 100644
>> --- a/drivers/platform/x86/amd/pmf/core.c
>> +++ b/drivers/platform/x86/amd/pmf/core.c
>> @@ -411,6 +411,7 @@ static int amd_pmf_probe(struct platform_device
>> *pdev)
>>       }
>>         dev->cpu_id = rdev->device;
>> +    dev->root = rdev;
>>         err = amd_smn_read(0, AMD_PMF_BASE_ADDR_LO, &val);
>>       if (err) {
>> @@ -482,4 +483,4 @@ module_platform_driver(amd_pmf_driver);
>>     MODULE_LICENSE("GPL");
>>   MODULE_DESCRIPTION("AMD Platform Management Framework Driver");
>> -MODULE_SOFTDEP("pre: amdtee");
>> +MODULE_SOFTDEP("pre: amdtee amdgpu");
>> diff --git a/drivers/platform/x86/amd/pmf/pmf.h
>> b/drivers/platform/x86/amd/pmf/pmf.h
>> index 8712299ad52b..525308519fa3 100644
>> --- a/drivers/platform/x86/amd/pmf/pmf.h
>> +++ b/drivers/platform/x86/amd/pmf/pmf.h
>> @@ -11,7 +11,11 @@
>>   #ifndef PMF_H
>>   #define PMF_H
>>   +#include <acpi/video.h>
>> +#include <drm/drm_connector.h>
>>   #include <linux/acpi.h>
>> +#include <linux/backlight.h>
>> +#include <linux/pci.h>
>>   #include <linux/platform_profile.h>
>>     #define POLICY_BUF_MAX_SZ        0x4b000
>> @@ -83,6 +87,8 @@
>>   #define TA_OUTPUT_RESERVED_MEM                906
>>   #define MAX_OPERATION_PARAMS                    4
>>   +#define MAX_SUPPORTED_DISPLAY        4
>> +
>>   /* AMD PMF BIOS interfaces */
>>   struct apmf_verify_interface {
>>       u16 size;
>> @@ -194,6 +200,15 @@ enum power_modes {
>>       POWER_MODE_MAX,
>>   };
>>   +struct amd_gpu_pmf_data {
>> +    struct pci_dev *gpu_dev;
>> +    struct backlight_device *raw_bd;
>> +    struct thermal_cooling_device *cooling_dev;
>> +    enum drm_connector_status con_status[MAX_SUPPORTED_DISPLAY];
>> +    int display_count;
>> +    int connector_type[MAX_SUPPORTED_DISPLAY];
>> +};
>> +
>>   struct amd_pmf_dev {
>>       void __iomem *regbase;
>>       void __iomem *smu_virt_addr;
>> @@ -228,9 +243,12 @@ struct amd_pmf_dev {
>>       void *shbuf;
>>       struct delayed_work pb_work;
>>       struct pmf_action_table *prev_data;
>> +    struct amd_gpu_pmf_data gfx_data;
>>       u64 policy_addr;
>>       void *policy_base;
>>       bool smart_pc_enabled;
>> +    struct pci_dev *root;
>> +    struct drm_device *drm_dev;
>>   };
>>     struct apmf_sps_prop_granular {
>> diff --git a/drivers/platform/x86/amd/pmf/spc.c
>> b/drivers/platform/x86/amd/pmf/spc.c
>> index 959146fd483f..47ec563088b8 100644
>> --- a/drivers/platform/x86/amd/pmf/spc.c
>> +++ b/drivers/platform/x86/amd/pmf/spc.c
>> @@ -44,6 +44,10 @@ void amd_pmf_dump_ta_inputs(struct amd_pmf_dev
>> *dev, struct ta_pmf_enact_table *
>>       dev_dbg(dev->dev, "Max C0 Residency: %u\n",
>> in->ev_info.max_c0residency);
>>       dev_dbg(dev->dev, "GFX Busy: %u\n", in->ev_info.gfx_busy);
>>       dev_dbg(dev->dev, "Connected Display Count: %u\n",
>> in->ev_info.monitor_count);
>> +    dev_dbg(dev->dev, "Primary Display Type: %s\n",
>> +        drm_get_connector_type_name(in->ev_info.display_type));
>> +    dev_dbg(dev->dev, "Primary Display State: %s\n",
>> in->ev_info.display_state ?
>> +            "Connected" : "disconnected/unknown");
>>       dev_dbg(dev->dev, "LID State: %s\n", in->ev_info.lid_state ?
>> "close" : "open");
>>       dev_dbg(dev->dev, "==== TA inputs END ====\n");
>>   }
>> @@ -146,6 +150,41 @@ static int amd_pmf_get_slider_info(struct
>> amd_pmf_dev *dev, struct ta_pmf_enact_
>>       return 0;
>>   }
>>   +static int amd_pmf_get_gfx_data(struct amd_pmf_dev *pmf)
>> +{
>> +    struct drm_connector_list_iter iter;
>> +    struct drm_connector *connector;
>> +    int i = 0;
>> +
>> +    /* Reset the count to zero */
>> +    pmf->gfx_data.display_count = 0;
>> +
>> +    drm_connector_list_iter_begin(pmf->drm_dev, &iter);
>> +    drm_for_each_connector_iter(connector, &iter) {
>> +        if (connector->status == connector_status_connected)
>> +            pmf->gfx_data.display_count++;
>> +        if (connector->status != pmf->gfx_data.con_status[i])
>> +            pmf->gfx_data.con_status[i] = connector->status;
>> +        if (connector->connector_type !=
>> pmf->gfx_data.connector_type[i])
>> +            pmf->gfx_data.connector_type[i] =
>> connector->connector_type;
>> +
>> +        i++;
>> +        if (i >= MAX_SUPPORTED_DISPLAY)
>> +            break;
>> +    }
>> +    drm_connector_list_iter_end(&iter);
>> +
>> +    return 0;
>> +}
>> +
>> +static void amd_pmf_get_gpu_info(struct amd_pmf_dev *dev, struct
>> ta_pmf_enact_table *in)
>> +{
>> +    amd_pmf_get_gfx_data(dev);
>> +    in->ev_info.monitor_count = dev->gfx_data.display_count;
>> +    in->ev_info.display_type = dev->gfx_data.connector_type[0];
>> +    in->ev_info.display_state = dev->gfx_data.con_status[0];
> 
> Can you elaborate on future expansion areas for the TA as it pertains
> to this info?
> 

For now, the planned items are:
- GPU Engine Running Time
- GPU Engine Utilization Percent
- GPU Workload type
- GPU Power

more items are still being worked out from the TA. So definately there
is a need to build the pipe to get information from amdgpu.

> Do you think it's going to need information from more than just the
> first display?

Yes. We are looking at if there is an extended display also. Future,
we can include to see if that behind the dock etc.

But to make it simple, today TA only looks if there is a first display.

> Is monitor count boolean or does it actually care about the count > 1?
> Is it *really* looking at whether the eDP is active (this is what
> would affect power most signifiantly IIUC)?
> 

Not Boolean.

> If this isn't an area that is likely to expand much in the future and
> is really "just about eDP", I wonder if the better answer is a DRM
> helper that is something like:
> 
> bool drm_edp_connected(void);
> 
> If it is likely to expand, it could be a set of multiple helpers.
> 

Ok. Thanks. I will see how it gets shaped up.

>> +}
>> +
>>   void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct
>> ta_pmf_enact_table *in)
>>   {
>>       /* TA side lid open is 1 and close is 0, hence the ! here */
>> @@ -154,4 +193,6 @@ void amd_pmf_populate_ta_inputs(struct
>> amd_pmf_dev *dev, struct ta_pmf_enact_tab
>>       amd_pmf_get_smu_info(dev, in);
>>       amd_pmf_get_battery_info(dev, in);
>>       amd_pmf_get_slider_info(dev, in);
>> +    if (dev->drm_dev)
>> +        amd_pmf_get_gpu_info(dev, in);
>>   }
>> diff --git a/drivers/platform/x86/amd/pmf/tee-if.c
>> b/drivers/platform/x86/amd/pmf/tee-if.c
>> index 81b1bd356e83..82ee2b1c627f 100644
>> --- a/drivers/platform/x86/amd/pmf/tee-if.c
>> +++ b/drivers/platform/x86/amd/pmf/tee-if.c
>> @@ -10,6 +10,7 @@
>>     #include <linux/debugfs.h>
>>   #include <linux/tee_drv.h>
>> +#include <linux/thermal.h>
>>   #include <linux/uuid.h>
>>   #include "pmf.h"
>>   @@ -422,6 +423,60 @@ static void amd_pmf_tee_deinit(struct
>> amd_pmf_dev *dev)
>>       tee_client_close_context(dev->tee_ctx);
>>   }
>>   +static int amd_pmf_gpu_get_cur_state(struct
>> thermal_cooling_device *cooling_dev,
>> +                     unsigned long *state)
>> +{
>> +    struct backlight_device *bd;
>> +
>> +    if (acpi_video_get_backlight_type() != acpi_backlight_native)
>> +        return -ENODEV;
>> +
>> +    bd = backlight_device_get_by_type(BACKLIGHT_RAW);
>> +    if (!bd)
>> +        return -ENODEV;
>> +
>> +    *state = backlight_get_brightness(bd);
>> +
>> +    return 0;
>> +}
>> +
>> +static int amd_pmf_gpu_get_max_state(struct thermal_cooling_device
>> *cooling_dev,
>> +                     unsigned long *state)
>> +{
>> +    struct backlight_device *bd;
>> +
>> +    if (acpi_video_get_backlight_type() != acpi_backlight_native)
>> +        return -ENODEV;
>> +
>> +    bd = backlight_device_get_by_type(BACKLIGHT_RAW);
>> +    if (!bd)
>> +        return -ENODEV;
>> +
>> +    if (backlight_is_blank(bd))
>> +        *state = 0;
>> +    else
>> +        *state = bd->props.max_brightness;
>> +
>> +    return 0;
>> +}
>> +
>> +static const struct thermal_cooling_device_ops bd_cooling_ops = {
>> +    .get_max_state = amd_pmf_gpu_get_max_state,
>> +    .get_cur_state = amd_pmf_gpu_get_cur_state,
>> +};
>> +
>> +static int amd_pmf_get_gpu_handle(struct pci_dev *pdev, void *data)
>> +{
>> +    struct amd_pmf_dev *dev = data;
>> +
>> +    if (pdev->vendor == PCI_VENDOR_ID_ATI && pdev->devfn == 0) {
>> +        dev->gfx_data.gpu_dev = pdev;
>> +        return 1; /* Stop walking */
>> +    }
>> +
>> +    return 0; /* Continue walking */
>> +}
>> +
>>   int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
>>   {
>>       int ret;
>> @@ -433,10 +488,30 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev
>> *dev)
>>       INIT_DELAYED_WORK(&dev->pb_work, amd_pmf_invoke_cmd);
>>       amd_pmf_set_dram_addr(dev);
>>       amd_pmf_get_bios_buffer(dev);
>> +
>>       dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL);
>>       if (!dev->prev_data)
>>           return -ENOMEM;
>>   +    pci_walk_bus(dev->root->bus, amd_pmf_get_gpu_handle, dev);
>> +    if (dev->gfx_data.gpu_dev) {
>> +        dev->drm_dev = pci_get_drvdata(dev->gfx_data.gpu_dev);
> 
> I did see Hans response; but I want to mention that specifically a
> good litmus test whether what you're doing to get the data from GPU
> device side is safe would be to ssh into the machine, stop the GUI and
> then unbind the PCI device from amdgpu driver and make sure nothing
> explodes.
> 
> If it does explode, take PMF out of the picture and see if it was
> caused by what you did or an existing problem.
> 

Ok. I will take a look.

Thanks,
Shyam

> 
>> +        if (!dev->drm_dev)
>> +            return -EINVAL;
>> +
>> +        if (acpi_video_get_backlight_type() != acpi_backlight_native)
>> +            return -ENODEV;
>> +
>> +        dev->gfx_data.raw_bd =
>> backlight_device_get_by_type(BACKLIGHT_RAW);
>> +        if (!dev->gfx_data.raw_bd)
>> +            return -ENODEV;
>> +
>> +        dev->gfx_data.cooling_dev =
>> thermal_cooling_device_register("pmf_gpu_bd",
>> +                                        NULL, &bd_cooling_ops);
>> +        if (IS_ERR(dev->gfx_data.cooling_dev))
>> +            return -ENODEV;
>> +    }
>> +
>>       return dev->smart_pc_enabled;
>>   }
>>   @@ -448,5 +523,7 @@ void amd_pmf_deinit_smart_pc(struct
>> amd_pmf_dev *dev)
>>       kfree(dev->prev_data);
>>       kfree(dev->policy_buf);
>>       cancel_delayed_work_sync(&dev->pb_work);
>> +    if (dev->gfx_data.cooling_dev)
>> +        thermal_cooling_device_unregister(dev->gfx_data.cooling_dev);
>>       amd_pmf_tee_deinit(dev);
>>   }
>
Shyam Sundar S K Nov. 20, 2023, 6:42 a.m. UTC | #6
Hi Hans,

On 11/17/2023 5:01 PM, Hans de Goede wrote:
> Hi,
> 
> On 11/17/23 12:08, Shyam Sundar S K wrote:
>> Adding AMDGPU folks (Alex and Christian) - I had to drop them as the
>> changes from amdgpu were moved to PMF driver.
>>
>> Hi Hans,
>>
>>
>> On 11/17/2023 4:18 PM, Hans de Goede wrote:
>>> Hi Shyam,
>>>
>>> On 11/17/23 09:07, Shyam Sundar S K wrote:
>>>> In order to provide GPU inputs to TA for the Smart PC solution to work, we
>>>> need to have interface between the PMF driver and the AMDGPU driver.
>>>>
>>>> Add the initial code path for get interface from AMDGPU.
>>>>
>>>> Co-developed-by: Mario Limonciello <mario.limonciello@amd.com>
>>>> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
>>>> Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
> 
> <snip>
> 
>>>> diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c
>>>> index 81b1bd356e83..82ee2b1c627f 100644
>>>> --- a/drivers/platform/x86/amd/pmf/tee-if.c
>>>> +++ b/drivers/platform/x86/amd/pmf/tee-if.c
>>>> @@ -10,6 +10,7 @@
>>>>  
>>>>  #include <linux/debugfs.h>
>>>>  #include <linux/tee_drv.h>
>>>> +#include <linux/thermal.h>
>>>>  #include <linux/uuid.h>
>>>>  #include "pmf.h"
>>>>  
>>>> @@ -422,6 +423,60 @@ static void amd_pmf_tee_deinit(struct amd_pmf_dev *dev)
>>>>  	tee_client_close_context(dev->tee_ctx);
>>>>  }
>>>>  
>>>> +static int amd_pmf_gpu_get_cur_state(struct thermal_cooling_device *cooling_dev,
>>>> +				     unsigned long *state)
>>>> +{
>>>> +	struct backlight_device *bd;
>>>> +
>>>> +	if (acpi_video_get_backlight_type() != acpi_backlight_native)
>>>> +		return -ENODEV;
>>>> +
>>>> +	bd = backlight_device_get_by_type(BACKLIGHT_RAW);
>>>> +	if (!bd)
>>>> +		return -ENODEV;
>>>> +
>>>> +	*state = backlight_get_brightness(bd);
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static int amd_pmf_gpu_get_max_state(struct thermal_cooling_device *cooling_dev,
>>>> +				     unsigned long *state)
>>>> +{
>>>> +	struct backlight_device *bd;
>>>> +
>>>> +	if (acpi_video_get_backlight_type() != acpi_backlight_native)
>>>> +		return -ENODEV;
>>>> +
>>>> +	bd = backlight_device_get_by_type(BACKLIGHT_RAW);
>>>> +	if (!bd)
>>>> +		return -ENODEV;
>>>> +
>>>> +	if (backlight_is_blank(bd))
>>>> +		*state = 0;
>>>> +	else
>>>> +		*state = bd->props.max_brightness;
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static const struct thermal_cooling_device_ops bd_cooling_ops = {
>>>> +	.get_max_state = amd_pmf_gpu_get_max_state,
>>>> +	.get_cur_state = amd_pmf_gpu_get_cur_state,
>>>> +};
>>>
>>> This is still the wrong thing to do. The new PMF code MUST only
>>> be a *consumer* of the thermal_cooling_device API.
>>>
>>> The thermal-cooling device for the backlight itself MUST be
>>> registered by the amdgpu driver.
>>>
>>> I believe that the correct fix for this is to add a new flag to
>>> the backlight_properties struct;
>>> And then make backlight_device_register() register
>>> a thermal_cooling_device for the backlight when this new flag is set.
>>>
>>> This way we get a nice generic way to use backlight class devices
>>> as thermal cooling devices and you can make the amdgpu driver
>>> register the thermal cooling device by simply adding the new flag
>>> to the backlight_properties struct used in the amdgpu driver.
>>
>> Agreed. I am also of the same opinion.
> 
> So the change to the AMDGPU driver here would just be setting
> this one new flag in the backlight_properties struct.
> 
> Alex, Christian, would that be acceptable to you ?
> 
> 
>>>> +static int amd_pmf_get_gpu_handle(struct pci_dev *pdev, void *data)
>>>> +{
>>>> +	struct amd_pmf_dev *dev = data;
>>>> +
>>>> +	if (pdev->vendor == PCI_VENDOR_ID_ATI && pdev->devfn == 0) {
>>>> +		dev->gfx_data.gpu_dev = pdev;
>>>> +		return 1; /* Stop walking */
>>>> +	}
>>>> +
>>>> +	return 0; /* Continue walking */
>>>> +}
>>>> +
>>>>  int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
>>>>  {
>>>>  	int ret;
>>>> @@ -433,10 +488,30 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
>>>>  	INIT_DELAYED_WORK(&dev->pb_work, amd_pmf_invoke_cmd);
>>>>  	amd_pmf_set_dram_addr(dev);
>>>>  	amd_pmf_get_bios_buffer(dev);
>>>> +
>>>>  	dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL);
>>>>  	if (!dev->prev_data)
>>>>  		return -ENOMEM;
>>>>  
>>>> +	pci_walk_bus(dev->root->bus, amd_pmf_get_gpu_handle, dev);
>>>> +	if (dev->gfx_data.gpu_dev) {
>>>> +		dev->drm_dev = pci_get_drvdata(dev->gfx_data.gpu_dev);
>>>> +		if (!dev->drm_dev)
>>>> +			return -EINVAL;
>>>
>>> You cannot just call pci_get_drvdata() on a device for which you
>>> are not the driver. You have no idea of the lifetime of this device,
>>> the driver may unbind and release the memory pci_get_drvdata()
>>> points to, leaving this code with a dangling pointer which will
>>> crash the kernel the first time you try to use it.
>>>
>>> Also since you are not the owner you MUST not assume any specific
>>> type for this memory, you cannot be sure this actually is of
>>> the type drm_device. Basically you MUST not touch another
>>> driver's drvdata *at all*.
>>>
>>> The proper way to fix this would be to add some API to the DRM
>>> subsystem to query the information you are looking for form
>>> the DRM subsystem.
>>>
>>> Poking directly inside other driver's internals is NOT acceptable,
>>> NACK for this patch.
>>>
>>
>> I am inline with your remarks, but I could find a way where the
>> thermal driver registration, handling the backlight without having the
>> changes in the amdgpu driver very tricky.
> 
> As mentioned above I think there should be generic thermal cooling
> device support added to drivers/video/backlight/backlight.c, then
> the amdgpu code just needs to pass a flag when registering
> the backlight to enable this.
> 
>> Like you said, I am also of the same opinion that PMF driver should
>> call the DRM/GPU subsystem APIs (like it does with other subsystems).
>>
>> But Christian had concerns on adding all of these into the GPU driver.
>> So I had to roll back these into the PMF driver, and hence you see a
>> pci_get_drvdata() call.
> 
> Ok, so I can see how this AMD PMF code is all kinda special
> and how the DRM folks don't want to have to add APIs just for
> that. But IMHO calling pci_get_drvdata() on an unowned
> device is completely unacceptable.
> 
> At a minimum we need life-cycle management for the drm_device
> which the PMF code is using, something like:
> 
> struct drm_device *drm_device_find(const void *data,
>    int (*match)(struct drm_device *dev, const void *data));
> 
> which works similar to class_find_device() and returns
> a reference to the drm_device for which match has returned 0
> (which also stops it from looping over devices).
> 
> Combined with a:
> 
> void drm_device_put(struct drm_device *dev);
> 
> for when the PMF code is done with the device.
> 
> This way the PMF code can safely get a reference to drm_device
> and release it when it is done. Rather then just getting
> some random pointer which may or not actually be a drm_device
> and the lifetime of which is not guaranteed in anyway.
> 
> E.g. if the PMF driver loads before amdgpu then
> pci_get_drvdata() will just return NULL.
> 
> And as mentioned if the amdgpu driver gets unbound after
> the PMF code has called  pci_get_drvdata() the PMF driver
> now has a dangling pointer.
> 
> So IMHO adding: drm_device_find() + drm_device_put()
> to the DRM core are minimum which is necessary here.
> 
> If the PMF code then ends up doing things like
> drm_for_each_connector_iter() on the gotten drm_device
> referemce so be it. But we must make sure we have
> a properly lifecycle managed reference first and
> pci_get_drvdata() does not give us that.
> 

Ok I will work on your feedback.

>> For the sake of simplicity, I can drop patches 13/17 and 14/17 from
>> the series and send them separately later.
> 
> Yes I think that dropping the GPU related patches for
> now would be best.
> 

Thank you. Let me wait for feedback from others before I drop the GPU
patches.

Thanks,
Shyam

> Regards,
> 
> Hans
> 
> 
> 
>
diff mbox series

Patch

diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c
index 4b8156033fa6..9b14a997cd48 100644
--- a/drivers/platform/x86/amd/pmf/core.c
+++ b/drivers/platform/x86/amd/pmf/core.c
@@ -411,6 +411,7 @@  static int amd_pmf_probe(struct platform_device *pdev)
 	}
 
 	dev->cpu_id = rdev->device;
+	dev->root = rdev;
 
 	err = amd_smn_read(0, AMD_PMF_BASE_ADDR_LO, &val);
 	if (err) {
@@ -482,4 +483,4 @@  module_platform_driver(amd_pmf_driver);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("AMD Platform Management Framework Driver");
-MODULE_SOFTDEP("pre: amdtee");
+MODULE_SOFTDEP("pre: amdtee amdgpu");
diff --git a/drivers/platform/x86/amd/pmf/pmf.h b/drivers/platform/x86/amd/pmf/pmf.h
index 8712299ad52b..525308519fa3 100644
--- a/drivers/platform/x86/amd/pmf/pmf.h
+++ b/drivers/platform/x86/amd/pmf/pmf.h
@@ -11,7 +11,11 @@ 
 #ifndef PMF_H
 #define PMF_H
 
+#include <acpi/video.h>
+#include <drm/drm_connector.h>
 #include <linux/acpi.h>
+#include <linux/backlight.h>
+#include <linux/pci.h>
 #include <linux/platform_profile.h>
 
 #define POLICY_BUF_MAX_SZ		0x4b000
@@ -83,6 +87,8 @@ 
 #define TA_OUTPUT_RESERVED_MEM				906
 #define MAX_OPERATION_PARAMS					4
 
+#define MAX_SUPPORTED_DISPLAY		4
+
 /* AMD PMF BIOS interfaces */
 struct apmf_verify_interface {
 	u16 size;
@@ -194,6 +200,15 @@  enum power_modes {
 	POWER_MODE_MAX,
 };
 
+struct amd_gpu_pmf_data {
+	struct pci_dev *gpu_dev;
+	struct backlight_device *raw_bd;
+	struct thermal_cooling_device *cooling_dev;
+	enum drm_connector_status con_status[MAX_SUPPORTED_DISPLAY];
+	int display_count;
+	int connector_type[MAX_SUPPORTED_DISPLAY];
+};
+
 struct amd_pmf_dev {
 	void __iomem *regbase;
 	void __iomem *smu_virt_addr;
@@ -228,9 +243,12 @@  struct amd_pmf_dev {
 	void *shbuf;
 	struct delayed_work pb_work;
 	struct pmf_action_table *prev_data;
+	struct amd_gpu_pmf_data gfx_data;
 	u64 policy_addr;
 	void *policy_base;
 	bool smart_pc_enabled;
+	struct pci_dev *root;
+	struct drm_device *drm_dev;
 };
 
 struct apmf_sps_prop_granular {
diff --git a/drivers/platform/x86/amd/pmf/spc.c b/drivers/platform/x86/amd/pmf/spc.c
index 959146fd483f..47ec563088b8 100644
--- a/drivers/platform/x86/amd/pmf/spc.c
+++ b/drivers/platform/x86/amd/pmf/spc.c
@@ -44,6 +44,10 @@  void amd_pmf_dump_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *
 	dev_dbg(dev->dev, "Max C0 Residency: %u\n", in->ev_info.max_c0residency);
 	dev_dbg(dev->dev, "GFX Busy: %u\n", in->ev_info.gfx_busy);
 	dev_dbg(dev->dev, "Connected Display Count: %u\n", in->ev_info.monitor_count);
+	dev_dbg(dev->dev, "Primary Display Type: %s\n",
+		drm_get_connector_type_name(in->ev_info.display_type));
+	dev_dbg(dev->dev, "Primary Display State: %s\n", in->ev_info.display_state ?
+			"Connected" : "disconnected/unknown");
 	dev_dbg(dev->dev, "LID State: %s\n", in->ev_info.lid_state ? "close" : "open");
 	dev_dbg(dev->dev, "==== TA inputs END ====\n");
 }
@@ -146,6 +150,41 @@  static int amd_pmf_get_slider_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_
 	return 0;
 }
 
+static int amd_pmf_get_gfx_data(struct amd_pmf_dev *pmf)
+{
+	struct drm_connector_list_iter iter;
+	struct drm_connector *connector;
+	int i = 0;
+
+	/* Reset the count to zero */
+	pmf->gfx_data.display_count = 0;
+
+	drm_connector_list_iter_begin(pmf->drm_dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		if (connector->status == connector_status_connected)
+			pmf->gfx_data.display_count++;
+		if (connector->status != pmf->gfx_data.con_status[i])
+			pmf->gfx_data.con_status[i] = connector->status;
+		if (connector->connector_type != pmf->gfx_data.connector_type[i])
+			pmf->gfx_data.connector_type[i] = connector->connector_type;
+
+		i++;
+		if (i >= MAX_SUPPORTED_DISPLAY)
+			break;
+	}
+	drm_connector_list_iter_end(&iter);
+
+	return 0;
+}
+
+static void amd_pmf_get_gpu_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in)
+{
+	amd_pmf_get_gfx_data(dev);
+	in->ev_info.monitor_count = dev->gfx_data.display_count;
+	in->ev_info.display_type = dev->gfx_data.connector_type[0];
+	in->ev_info.display_state = dev->gfx_data.con_status[0];
+}
+
 void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in)
 {
 	/* TA side lid open is 1 and close is 0, hence the ! here */
@@ -154,4 +193,6 @@  void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_tab
 	amd_pmf_get_smu_info(dev, in);
 	amd_pmf_get_battery_info(dev, in);
 	amd_pmf_get_slider_info(dev, in);
+	if (dev->drm_dev)
+		amd_pmf_get_gpu_info(dev, in);
 }
diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c
index 81b1bd356e83..82ee2b1c627f 100644
--- a/drivers/platform/x86/amd/pmf/tee-if.c
+++ b/drivers/platform/x86/amd/pmf/tee-if.c
@@ -10,6 +10,7 @@ 
 
 #include <linux/debugfs.h>
 #include <linux/tee_drv.h>
+#include <linux/thermal.h>
 #include <linux/uuid.h>
 #include "pmf.h"
 
@@ -422,6 +423,60 @@  static void amd_pmf_tee_deinit(struct amd_pmf_dev *dev)
 	tee_client_close_context(dev->tee_ctx);
 }
 
+static int amd_pmf_gpu_get_cur_state(struct thermal_cooling_device *cooling_dev,
+				     unsigned long *state)
+{
+	struct backlight_device *bd;
+
+	if (acpi_video_get_backlight_type() != acpi_backlight_native)
+		return -ENODEV;
+
+	bd = backlight_device_get_by_type(BACKLIGHT_RAW);
+	if (!bd)
+		return -ENODEV;
+
+	*state = backlight_get_brightness(bd);
+
+	return 0;
+}
+
+static int amd_pmf_gpu_get_max_state(struct thermal_cooling_device *cooling_dev,
+				     unsigned long *state)
+{
+	struct backlight_device *bd;
+
+	if (acpi_video_get_backlight_type() != acpi_backlight_native)
+		return -ENODEV;
+
+	bd = backlight_device_get_by_type(BACKLIGHT_RAW);
+	if (!bd)
+		return -ENODEV;
+
+	if (backlight_is_blank(bd))
+		*state = 0;
+	else
+		*state = bd->props.max_brightness;
+
+	return 0;
+}
+
+static const struct thermal_cooling_device_ops bd_cooling_ops = {
+	.get_max_state = amd_pmf_gpu_get_max_state,
+	.get_cur_state = amd_pmf_gpu_get_cur_state,
+};
+
+static int amd_pmf_get_gpu_handle(struct pci_dev *pdev, void *data)
+{
+	struct amd_pmf_dev *dev = data;
+
+	if (pdev->vendor == PCI_VENDOR_ID_ATI && pdev->devfn == 0) {
+		dev->gfx_data.gpu_dev = pdev;
+		return 1; /* Stop walking */
+	}
+
+	return 0; /* Continue walking */
+}
+
 int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
 {
 	int ret;
@@ -433,10 +488,30 @@  int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
 	INIT_DELAYED_WORK(&dev->pb_work, amd_pmf_invoke_cmd);
 	amd_pmf_set_dram_addr(dev);
 	amd_pmf_get_bios_buffer(dev);
+
 	dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL);
 	if (!dev->prev_data)
 		return -ENOMEM;
 
+	pci_walk_bus(dev->root->bus, amd_pmf_get_gpu_handle, dev);
+	if (dev->gfx_data.gpu_dev) {
+		dev->drm_dev = pci_get_drvdata(dev->gfx_data.gpu_dev);
+		if (!dev->drm_dev)
+			return -EINVAL;
+
+		if (acpi_video_get_backlight_type() != acpi_backlight_native)
+			return -ENODEV;
+
+		dev->gfx_data.raw_bd = backlight_device_get_by_type(BACKLIGHT_RAW);
+		if (!dev->gfx_data.raw_bd)
+			return -ENODEV;
+
+		dev->gfx_data.cooling_dev = thermal_cooling_device_register("pmf_gpu_bd",
+									    NULL, &bd_cooling_ops);
+		if (IS_ERR(dev->gfx_data.cooling_dev))
+			return -ENODEV;
+	}
+
 	return dev->smart_pc_enabled;
 }
 
@@ -448,5 +523,7 @@  void amd_pmf_deinit_smart_pc(struct amd_pmf_dev *dev)
 	kfree(dev->prev_data);
 	kfree(dev->policy_buf);
 	cancel_delayed_work_sync(&dev->pb_work);
+	if (dev->gfx_data.cooling_dev)
+		thermal_cooling_device_unregister(dev->gfx_data.cooling_dev);
 	amd_pmf_tee_deinit(dev);
 }