@@ -155,6 +155,7 @@ extern bool x86_topology_update;
#include <asm/percpu.h>
DECLARE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
+extern unsigned int __read_mostly sysctl_sched_itmt_enabled;
/* Interface to set priority of a cpu */
void sched_set_itmt_core_prio(int prio, int core_cpu);
@@ -167,6 +168,7 @@ void sched_clear_itmt_support(void);
#else /* CONFIG_SCHED_ITMT */
+#define sysctl_sched_itmt_enabled 0
static inline void sched_set_itmt_core_prio(int prio, int core_cpu)
{
}
@@ -34,6 +34,67 @@ DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
/* Boolean to track if system has ITMT capabilities */
static bool __read_mostly sched_itmt_capable;
+/*
+ * Boolean to control whether we want to move processes to cpu capable
+ * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
+ * Technology 3.0.
+ *
+ * It can be set via /proc/sys/kernel/sched_itmt_enabled
+ */
+unsigned int __read_mostly sysctl_sched_itmt_enabled;
+
+static int sched_itmt_update_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret;
+ unsigned int old_sysctl;
+
+ mutex_lock(&itmt_update_mutex);
+
+ if (!sched_itmt_capable) {
+ mutex_unlock(&itmt_update_mutex);
+ return -EINVAL;
+ }
+
+ old_sysctl = sysctl_sched_itmt_enabled;
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+ if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
+ x86_topology_update = true;
+ rebuild_sched_domains();
+ }
+
+ mutex_unlock(&itmt_update_mutex);
+
+ return ret;
+}
+
+static unsigned int zero;
+static unsigned int one = 1;
+static struct ctl_table itmt_kern_table[] = {
+ {
+ .procname = "sched_itmt_enabled",
+ .data = &sysctl_sched_itmt_enabled,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_itmt_update_handler,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {}
+};
+
+static struct ctl_table itmt_root_table[] = {
+ {
+ .procname = "kernel",
+ .mode = 0555,
+ .child = itmt_kern_table,
+ },
+ {}
+};
+
+static struct ctl_table_header *itmt_sysctl_header;
+
/**
* sched_set_itmt_support - Indicate platform supports ITMT
*
@@ -47,13 +108,40 @@ static bool __read_mostly sched_itmt_capable;
*
* This must be done only after sched_set_itmt_core_prio
* has been called to set the cpus' priorities.
+ *
+ * It must not be called with cpu hot plug lock
+ * held as we need to acquire the lock to rebuild sched domains
+ * later.
*/
int sched_set_itmt_support(void)
{
mutex_lock(&itmt_update_mutex);
+ if (sched_itmt_capable) {
+ mutex_unlock(&itmt_update_mutex);
+ return 0;
+ }
+
+ itmt_sysctl_header = register_sysctl_table(itmt_root_table);
+ if (!itmt_sysctl_header) {
+ mutex_unlock(&itmt_update_mutex);
+ return -ENOMEM;
+ }
+
sched_itmt_capable = true;
+ /*
+ * ITMT capability automatically enables ITMT
+ * scheduling for small systems (single node).
+ */
+ if (topology_num_packages() == 1)
+ sysctl_sched_itmt_enabled = 1;
+
+ if (sysctl_sched_itmt_enabled) {
+ x86_topology_update = true;
+ rebuild_sched_domains();
+ }
+
mutex_unlock(&itmt_update_mutex);
return 0;
}
@@ -64,13 +152,30 @@ int sched_set_itmt_support(void)
* This function is used by the OS to indicate that it has
* revoked the platform's support of ITMT feature.
*
+ * It must not be called with cpu hot plug lock
+ * held as we need to acquire the lock to rebuild sched domains
+ * later.
*/
void sched_clear_itmt_support(void)
{
mutex_lock(&itmt_update_mutex);
+ if (!sched_itmt_capable) {
+ mutex_unlock(&itmt_update_mutex);
+ return;
+ }
sched_itmt_capable = false;
+ if (itmt_sysctl_header)
+ unregister_sysctl_table(itmt_sysctl_header);
+
+ if (sysctl_sched_itmt_enabled) {
+ /* disable sched_itmt if we are no longer ITMT capable */
+ sysctl_sched_itmt_enabled = 0;
+ x86_topology_update = true;
+ rebuild_sched_domains();
+ }
+
mutex_unlock(&itmt_update_mutex);
}