diff mbox series

[v4,7/8] thermal: Add PCIe cooling driver

Message ID 20240105112547.7301-8-ilpo.jarvinen@linux.intel.com (mailing list archive)
State Handled Elsewhere, archived
Headers show
Series Add PCIe bandwidth controller | expand

Commit Message

Ilpo Järvinen Jan. 5, 2024, 11:25 a.m. UTC
Add a thermal cooling driver to provide path to access PCIe bandwidth
controller using the usual thermal interfaces.

A cooling device is instantiated for controllable PCIe Ports from the
bwctrl service driver.

The thermal side state 0 means no throttling, i.e., maximum supported
PCIe Link Speed.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael@kernel.org> # From the cooling device interface perspective
---
 MAINTAINERS                    |   1 +
 drivers/pci/pcie/bwctrl.c      |   6 ++
 drivers/thermal/Kconfig        |  10 +++
 drivers/thermal/Makefile       |   2 +
 drivers/thermal/pcie_cooling.c | 107 +++++++++++++++++++++++++++++++++
 include/linux/pci-bwctrl.h     |  16 +++++
 6 files changed, 142 insertions(+)
 create mode 100644 drivers/thermal/pcie_cooling.c

Comments

Christophe JAILLET Jan. 5, 2024, 11:56 a.m. UTC | #1
Le 05/01/2024 à 12:25, Ilpo Järvinen a écrit :
> Add a thermal cooling driver to provide path to access PCIe bandwidth
> controller using the usual thermal interfaces.
> 
> A cooling device is instantiated for controllable PCIe Ports from the
> bwctrl service driver.
> 
> The thermal side state 0 means no throttling, i.e., maximum supported
> PCIe Link Speed.
> 
> Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
> Acked-by: Rafael J. Wysocki <rafael@kernel.org> # From the cooling device interface perspective
> ---

...

> +struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port,
> +							    struct pcie_device *pdev)
> +{
> +	struct pcie_cooling_device *pcie_cdev;
> +	struct thermal_cooling_device *cdev;
> +	size_t name_len;
> +	char *name;
> +
> +	pcie_cdev = kzalloc(sizeof(*pcie_cdev), GFP_KERNEL);
> +	if (!pcie_cdev)
> +		return ERR_PTR(-ENOMEM);
> +
> +	pcie_cdev->port = port;
> +	pcie_cdev->pdev = pdev;
> +
> +	name_len = strlen(COOLING_DEV_TYPE_PREFIX) + strlen(pci_name(port)) + 1;
> +	name = kzalloc(name_len, GFP_KERNEL);
> +	if (!name) {
> +		kfree(pcie_cdev);
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	snprintf(name, name_len, COOLING_DEV_TYPE_PREFIX "%s", pci_name(port));

Nit: kasprintf() ?

> +	cdev = thermal_cooling_device_register(name, pcie_cdev, &pcie_cooling_ops);
> +	kfree(name);
> +
> +	return cdev;
> +}
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index ccc5d50bf340..0cacc7e777c5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16771,6 +16771,7 @@  M:	Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
 L:	linux-pci@vger.kernel.org
 S:	Supported
 F:	drivers/pci/pcie/bwctrl.c
+F:	drivers/thermal/pcie_cooling.c
 F:	include/linux/pci-bwctrl.h
 
 PCIE DRIVER FOR AMAZON ANNAPURNA LABS
diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c
index e17c98c25f06..6c50fabfea39 100644
--- a/drivers/pci/pcie/bwctrl.c
+++ b/drivers/pci/pcie/bwctrl.c
@@ -35,9 +35,11 @@ 
 /**
  * struct pcie_bwctrl_service_data - PCIe bandwidth controller
  * @set_speed_mutex: serializes link speed changes
+ * @cdev: thermal cooling device associated with the port
  */
 struct pcie_bwctrl_service_data {
 	struct mutex set_speed_mutex;
+	struct thermal_cooling_device *cdev;
 };
 
 static bool pcie_valid_speed(enum pci_bus_speed speed)
@@ -224,6 +226,8 @@  static int pcie_bwnotif_probe(struct pcie_device *srv)
 	pcie_bwnotif_enable(port);
 	pci_info(port, "enabled with IRQ %d\n", srv->irq);
 
+	data->cdev = pcie_cooling_device_register(port, srv);
+
 	return 0;
 }
 
@@ -231,6 +235,8 @@  static void pcie_bwnotif_remove(struct pcie_device *srv)
 {
 	struct pcie_bwctrl_service_data *data = get_service_data(srv);
 
+	if (data->cdev)
+		pcie_cooling_device_unregister(data->cdev);
 	pcie_bwnotif_disable(srv->port);
 	mutex_destroy(&data->set_speed_mutex);
 }
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index c81a00fbca7d..3a071396f1c6 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -219,6 +219,16 @@  config DEVFREQ_THERMAL
 
 	  If you want this support, you should say Y here.
 
+config PCIE_THERMAL
+	bool "PCIe cooling support"
+	depends on PCIEPORTBUS
+	select PCIE_BW
+	help
+	  This implements PCIe cooling mechanism through bandwidth reduction
+	  for PCIe devices.
+
+	  If you want this support, you should say Y here.
+
 config THERMAL_EMULATION
 	bool "Thermal emulation mode support"
 	help
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index c934cab309ae..a0b25a2742b7 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -30,6 +30,8 @@  thermal_sys-$(CONFIG_CPU_IDLE_THERMAL)	+= cpuidle_cooling.o
 # devfreq cooling
 thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o
 
+thermal_sys-$(CONFIG_PCIE_THERMAL) += pcie_cooling.o
+
 obj-$(CONFIG_K3_THERMAL)	+= k3_bandgap.o k3_j72xx_bandgap.o
 # platform thermal drivers
 obj-y				+= broadcom/
diff --git a/drivers/thermal/pcie_cooling.c b/drivers/thermal/pcie_cooling.c
new file mode 100644
index 000000000000..5fc52d410161
--- /dev/null
+++ b/drivers/thermal/pcie_cooling.c
@@ -0,0 +1,107 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PCIe cooling device
+ *
+ * Copyright (C) 2023 Intel Corporation
+ */
+
+#include <linux/build_bug.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/pci-bwctrl.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/thermal.h>
+
+#define COOLING_DEV_TYPE_PREFIX		"PCIe_Port_Link_Speed_"
+
+struct pcie_cooling_device {
+	struct pci_dev *port;
+	struct pcie_device *pdev;
+};
+
+static int pcie_cooling_get_max_level(struct thermal_cooling_device *cdev, unsigned long *state)
+{
+	struct pcie_cooling_device *pcie_cdev = cdev->devdata;
+
+	/* cooling state 0 is same as the maximum PCIe speed */
+	*state = pcie_cdev->port->subordinate->max_bus_speed - PCIE_SPEED_2_5GT;
+
+	return 0;
+}
+
+static int pcie_cooling_get_cur_level(struct thermal_cooling_device *cdev, unsigned long *state)
+{
+	struct pcie_cooling_device *pcie_cdev = cdev->devdata;
+
+	/* cooling state 0 is same as the maximum PCIe speed */
+	*state = cdev->max_state -
+		 (pcie_cdev->port->subordinate->cur_bus_speed - PCIE_SPEED_2_5GT);
+
+	return 0;
+}
+
+static int pcie_cooling_set_cur_level(struct thermal_cooling_device *cdev, unsigned long state)
+{
+	struct pcie_cooling_device *pcie_cdev = cdev->devdata;
+	enum pci_bus_speed speed;
+
+	/* cooling state 0 is same as the maximum PCIe speed */
+	speed = (cdev->max_state - state) + PCIE_SPEED_2_5GT;
+
+	return pcie_bwctrl_set_current_speed(pcie_cdev->pdev, speed);
+}
+
+static struct thermal_cooling_device_ops pcie_cooling_ops = {
+	.get_max_state = pcie_cooling_get_max_level,
+	.get_cur_state = pcie_cooling_get_cur_level,
+	.set_cur_state = pcie_cooling_set_cur_level,
+};
+
+struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port,
+							    struct pcie_device *pdev)
+{
+	struct pcie_cooling_device *pcie_cdev;
+	struct thermal_cooling_device *cdev;
+	size_t name_len;
+	char *name;
+
+	pcie_cdev = kzalloc(sizeof(*pcie_cdev), GFP_KERNEL);
+	if (!pcie_cdev)
+		return ERR_PTR(-ENOMEM);
+
+	pcie_cdev->port = port;
+	pcie_cdev->pdev = pdev;
+
+	name_len = strlen(COOLING_DEV_TYPE_PREFIX) + strlen(pci_name(port)) + 1;
+	name = kzalloc(name_len, GFP_KERNEL);
+	if (!name) {
+		kfree(pcie_cdev);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	snprintf(name, name_len, COOLING_DEV_TYPE_PREFIX "%s", pci_name(port));
+	cdev = thermal_cooling_device_register(name, pcie_cdev, &pcie_cooling_ops);
+	kfree(name);
+
+	return cdev;
+}
+
+void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev)
+{
+	struct pcie_cooling_device *pcie_cdev = cdev->devdata;
+
+	thermal_cooling_device_unregister(cdev);
+	kfree(pcie_cdev);
+}
+
+/* For bus_speed <-> state arithmetic */
+static_assert(PCIE_SPEED_2_5GT + 1 == PCIE_SPEED_5_0GT);
+static_assert(PCIE_SPEED_5_0GT + 1 == PCIE_SPEED_8_0GT);
+static_assert(PCIE_SPEED_8_0GT + 1 == PCIE_SPEED_16_0GT);
+static_assert(PCIE_SPEED_16_0GT + 1 == PCIE_SPEED_32_0GT);
+static_assert(PCIE_SPEED_32_0GT + 1 == PCIE_SPEED_64_0GT);
+
+MODULE_AUTHOR("Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>");
+MODULE_DESCRIPTION("PCIe cooling driver");
diff --git a/include/linux/pci-bwctrl.h b/include/linux/pci-bwctrl.h
index be51f6f2b340..ec5b1c3515d8 100644
--- a/include/linux/pci-bwctrl.h
+++ b/include/linux/pci-bwctrl.h
@@ -11,7 +11,23 @@ 
 #include <linux/pci.h>
 
 struct pcie_device;
+struct thermal_cooling_device;
 
 int pcie_bwctrl_set_current_speed(struct pcie_device *srv, enum pci_bus_speed speed_req);
 
+#ifdef CONFIG_PCIE_THERMAL
+struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port,
+							    struct pcie_device *pdev);
+void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev);
+#else
+static inline struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port,
+									  struct pcie_device *pdev)
+{
+	return NULL;
+}
+static inline void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev)
+{
+}
+#endif
+
 #endif