diff mbox

cpufreq: Add sfi based cpufreq driver support

Message ID 20141007140734.GB24200@intel-desktop (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Kasagar, Srinidhi Oct. 7, 2014, 2:07 p.m. UTC
From b3037348db47f0629316dd0027c7f1a1b28be959 Mon Sep 17 00:00:00 2001
From: Srinidhi Kasagar <srinidhi.kasagar@intel.com>
Date: Fri, 12 Sep 2014 22:52:37 +0530
Subject: [PATCH] cpufreq: Add sfi based cpufreq driver support

This adds the sfi based cpu freq driver for some of the
Intel's Silver Mont based atom architectures like
Merrifield and Moorfield (intel-mid)

Signed-off-by: Rudramuni, Vishwesh M <vishwesh.m.rudramuni@intel.com>
Signed-off-by: Srinidhi Kasagar <srinidhi.kasagar@intel.com>
---
 drivers/cpufreq/Kconfig.x86   |   10 +
 drivers/cpufreq/Makefile      |    1 +
 drivers/cpufreq/sfi-cpufreq.c |  426 +++++++++++++++++++++++++++++++++++++++++
 drivers/cpufreq/sfi-cpufreq.h |   53 +++++
 4 files changed, 490 insertions(+)
 create mode 100644 drivers/cpufreq/sfi-cpufreq.c
 create mode 100644 drivers/cpufreq/sfi-cpufreq.h

Comments

Viresh Kumar Oct. 7, 2014, 10:22 a.m. UTC | #1
Adding Dirk as well..

On 7 October 2014 19:37, Kasagar, Srinidhi <srinidhi.kasagar@intel.com> wrote:
> From b3037348db47f0629316dd0027c7f1a1b28be959 Mon Sep 17 00:00:00 2001
> From: Srinidhi Kasagar <srinidhi.kasagar@intel.com>
> Date: Fri, 12 Sep 2014 22:52:37 +0530
> Subject: [PATCH] cpufreq: Add sfi based cpufreq driver support

You didn't use git send-email ?

> This adds the sfi based cpu freq driver for some of the
> Intel's Silver Mont based atom architectures like
> Merrifield and Moorfield (intel-mid)
>
> Signed-off-by: Rudramuni, Vishwesh M <vishwesh.m.rudramuni@intel.com>
> Signed-off-by: Srinidhi Kasagar <srinidhi.kasagar@intel.com>
> ---
>  drivers/cpufreq/Kconfig.x86   |   10 +
>  drivers/cpufreq/Makefile      |    1 +
>  drivers/cpufreq/sfi-cpufreq.c |  426 +++++++++++++++++++++++++++++++++++++++++
>  drivers/cpufreq/sfi-cpufreq.h |   53 +++++
>  4 files changed, 490 insertions(+)
>  create mode 100644 drivers/cpufreq/sfi-cpufreq.c
>  create mode 100644 drivers/cpufreq/sfi-cpufreq.h
>
> diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
> index 89ae88f91895..3d6886b558fa 100644
> --- a/drivers/cpufreq/Kconfig.x86
> +++ b/drivers/cpufreq/Kconfig.x86
> @@ -57,6 +57,16 @@ config X86_ACPI_CPUFREQ_CPB
>           By enabling this option the acpi_cpufreq driver provides the old
>           entry in addition to the new boost ones, for compatibility reasons.
>
> +config X86_SFI_CPUFREQ
> +       tristate "SFI Processor P-States driver"
> +       depends on X86_INTEL_MID && SFI
> +       help
> +         This driver adds a CPUFreq driver which utilizes the SFI

s/This driver/This/

> +         Processor Performance States enumeration on some Silver Mont
> +         based Intel Atom architecture (like intel-mid)

Looks like the sentence isn't complete here, would you like to?

> +
> +         If in doubt, say N.
> +
>  config ELAN_CPUFREQ
>         tristate "AMD Elan SC400 and SC410"
>         depends on MELAN
> diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
> index db6d9a2fea4d..c3b51efd4a85 100644
> --- a/drivers/cpufreq/Makefile
> +++ b/drivers/cpufreq/Makefile
> @@ -41,6 +41,7 @@ obj-$(CONFIG_X86_P4_CLOCKMOD)         += p4-clockmod.o
>  obj-$(CONFIG_X86_CPUFREQ_NFORCE2)      += cpufreq-nforce2.o
>  obj-$(CONFIG_X86_INTEL_PSTATE)         += intel_pstate.o
>  obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY) += amd_freq_sensitivity.o
> +obj-$(CONFIG_X86_SFI_CPUFREQ)          += sfi-cpufreq.o
>
>  ##################################################################################
>  # ARM SoC drivers
> diff --git a/drivers/cpufreq/sfi-cpufreq.c b/drivers/cpufreq/sfi-cpufreq.c
> new file mode 100644
> index 000000000000..1400e0387528
> --- /dev/null
> +++ b/drivers/cpufreq/sfi-cpufreq.c
> @@ -0,0 +1,426 @@
> +/*
> + *  sfi_cpufreq.c - sfi Processor P-States Driver

No need of mentioning file name here.

> + *  Based on ACPI Processor P-States Driver
> + *

How much different is it from that ? Sorry I haven't checked :(

I mean, will it be possible to make changes to acpi-cpufreq driver or
get the common part out?

> + *  This program is free software; you can redistribute it and/or modify
> + *  it under the terms of the GNU General Public License as published by
> + *  the Free Software Foundation; either version 2 of the License.
> + *
> + *  This program is distributed in the hope that it will be useful, but
> + *  WITHOUT ANY WARRANTY; without even the implied warranty of
> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + *  General Public License for more details.
> + *
> + *  Author: Vishwesh M Rudramuni <vishwesh.m.rudramuni@intel.com>
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/smp.h>
> +#include <linux/cpufreq.h>
> +#include <linux/slab.h>
> +#include <linux/sfi.h>

Keep them in ascending order please.

> +#include <asm/msr.h>
> +#include <asm/processor.h>
> +
> +#include "sfi-cpufreq.h"

Do you plan to share the definitions here with any other file? If no, please
merge the .h file here only.

> +#define SFI_FREQ_MAX           32
> +#define INTEL_MSR_RANGE                0xffff
> +#define INTEL_MSR_BUSRATIO_MASK        0xff00
> +#define SFI_CPU_MAX            8

Aligning the values in a single column is preferred for readability.

> +
> +DEFINE_PER_CPU(struct sfi_processor *, sfi_processors);
> +
> +static DEFINE_MUTEX(performance_mutex);
> +static DEFINE_PER_CPU(struct sfi_cpufreq_data *, drv_data);
> +struct sfi_freq_table_entry sfi_cpufreq_array[SFI_FREQ_MAX];
> +static int sfi_cpufreq_num;
> +
> +/* sfi_perf_data is a pointer to percpu data. */
> +static struct sfi_processor_performance *sfi_perf_data;
> +static struct cpufreq_driver sfi_cpufreq_driver;
> +
> +struct sfi_cpufreq_data {
> +       struct sfi_processor_performance *sfi_data;
> +       struct cpufreq_frequency_table *freq_table;
> +       unsigned int resume;
> +};
> +
> +static int parse_freq(struct sfi_table_header *table)
> +{
> +       struct sfi_table_simple *sb;
> +       struct sfi_freq_table_entry *pentry;
> +       int total_len;
> +
> +       sb = (struct sfi_table_simple *)table;
> +       if (!sb) {
> +               printk(KERN_WARNING "SFI: Unable to map FREQ\n");
> +               return -ENODEV;
> +       }
> +
> +       if (!sfi_cpufreq_num) {
> +               sfi_cpufreq_num = SFI_GET_NUM_ENTRIES(sb,
> +                        struct sfi_freq_table_entry);
> +               pentry = (struct sfi_freq_table_entry *)sb->pentry;
> +               total_len = sfi_cpufreq_num * sizeof(*pentry);
> +               memcpy(sfi_cpufreq_array, pentry, total_len);
> +       }
> +
> +       return 0;
> +}
> +
> +static int sfi_processor_get_performance_states(struct sfi_processor *pr)
> +{
> +       int result = 0;
> +       int i;
> +
> +       pr->performance->state_count = sfi_cpufreq_num;
> +       pr->performance->states =
> +           kmalloc(sizeof(struct sfi_processor_px) * sfi_cpufreq_num,
> +                   GFP_KERNEL);
> +       if (!pr->performance->states)
> +               result = -ENOMEM;
> +
> +       printk(KERN_INFO "Num p-states %d\n", sfi_cpufreq_num);
> +
> +       /* Populate the P-states info from the SFI table here */
> +       for (i = 0; i < sfi_cpufreq_num; i++) {
> +               pr->performance->states[i].core_frequency =
> +                       sfi_cpufreq_array[i].freq_mhz;
> +               pr->performance->states[i].transition_latency =
> +                       sfi_cpufreq_array[i].latency;
> +               pr->performance->states[i].control =
> +                       sfi_cpufreq_array[i].ctrl_val;
> +               printk(KERN_INFO "State [%d]: core_frequency[%d] transition_latency[%d] control[0x%x]\n",
> +                       i,
> +                       (u32) pr->performance->states[i].core_frequency,
> +                       (u32) pr->performance->states[i].transition_latency,
> +                       (u32) pr->performance->states[i].control);
> +       }
> +
> +       return result;
> +}
> +
> +static int sfi_processor_register_performance(struct sfi_processor_performance
> +                                   *performance, unsigned int cpu)
> +{
> +       struct sfi_processor *pr;
> +
> +       mutex_lock(&performance_mutex);
> +
> +       pr = per_cpu(sfi_processors, cpu);
> +       if (!pr) {
> +               mutex_unlock(&performance_mutex);
> +               return -ENODEV;
> +       }
> +
> +       if (pr->performance) {
> +               mutex_unlock(&performance_mutex);
> +               return -EBUSY;
> +       }
> +
> +       WARN_ON(!performance);
> +
> +       pr->performance = performance;
> +
> +       /* parse the freq table from sfi */
> +       sfi_cpufreq_num = 0;
> +       sfi_table_parse(SFI_SIG_FREQ, NULL, NULL, parse_freq);
> +
> +       sfi_processor_get_performance_states(pr);
> +
> +       mutex_unlock(&performance_mutex);
> +       return 0;
> +}
> +
> +void sfi_processor_unregister_performance(struct sfi_processor_performance
> +                                     *performance, unsigned int cpu)
> +{
> +       struct sfi_processor *pr;
> +
> +       mutex_lock(&performance_mutex);
> +
> +       pr = per_cpu(sfi_processors, cpu);
> +       if (!pr) {
> +               mutex_unlock(&performance_mutex);
> +               return;
> +       }
> +
> +       if (pr->performance)
> +               kfree(pr->performance->states);
> +       pr->performance = NULL;
> +
> +       mutex_unlock(&performance_mutex);
> +}
> +
> +static unsigned extract_freq(u32 msr, struct sfi_cpufreq_data *data)
> +{
> +       struct cpufreq_frequency_table *pos;
> +       struct sfi_processor_performance *perf;
> +       u32 sfi_ctrl;
> +
> +       msr &= INTEL_MSR_BUSRATIO_MASK;
> +       perf = data->sfi_data;
> +
> +       cpufreq_for_each_entry(pos, data->freq_table) {
> +               sfi_ctrl = perf->states[pos->driver_data].control
> +                       & INTEL_MSR_BUSRATIO_MASK;
> +               if (sfi_ctrl == msr)
> +                       return pos->frequency;
> +       }
> +       return data->freq_table[0].frequency;
> +}
> +
> +static u32 get_cur_val(const struct cpumask *mask)
> +{
> +       u32 val, dummy;
> +
> +       if (unlikely(cpumask_empty(mask)))
> +               return 0;
> +
> +       rdmsr_on_cpu(cpumask_any(mask), MSR_IA32_PERF_STATUS, &val, &dummy);
> +
> +       return val;
> +}
> +
> +static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
> +{
> +       struct sfi_cpufreq_data *data = per_cpu(drv_data, cpu);
> +       unsigned int freq;
> +       unsigned int cached_freq;
> +
> +       pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
> +
> +       if (unlikely(data == NULL ||
> +               data->sfi_data == NULL || data->freq_table == NULL)) {
> +               return 0;
> +       }
> +
> +       cached_freq = data->freq_table[data->sfi_data->state].frequency;
> +       freq = extract_freq(get_cur_val(cpumask_of(cpu)), data);
> +       if (freq != cached_freq)
> +                /* Force the frequency on next target call */
> +               data->resume = 1;
> +
> +       pr_debug("cur freq = %u\n", freq);
> +
> +       return freq;
> +}
> +
> +static int sfi_cpufreq_target(struct cpufreq_policy *policy,
> +                               unsigned int index)
> +{
> +       struct sfi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
> +       struct sfi_processor_performance *perf;
> +       unsigned int next_perf_state = 0; /* Index into perf table */
> +       u32 lo, hi;

You meant unsigned int only?

> +
> +       if (unlikely(data == NULL ||
> +            data->sfi_data == NULL || data->freq_table == NULL)) {
> +               return -ENODEV;
> +       }

Probably this looks better:
       if (unlikely(!data || !data->sfi_data || !data->freq_table))
               return -ENODEV;

But do we need this check at all ? Also same comments for similar usage
in above routines.

> +
> +       perf = data->sfi_data;
> +       next_perf_state = data->freq_table[index].driver_data;
> +       if (perf->state == next_perf_state) {

Can this happen? Probably cpufreq core will never do it ?

> +               if (unlikely(data->resume)) {
> +                       pr_debug("Called after resume, resetting to P%d\n",
> +                               next_perf_state);
> +                       data->resume = 0;
> +               } else {
> +                       pr_debug("Already at target state (P%d)\n",
> +                               next_perf_state);
> +                       return 0;
> +               }
> +       }
> +
> +       rdmsr_on_cpu(policy->cpu, MSR_IA32_PERF_CTL, &lo, &hi);
> +       lo = (lo & ~INTEL_MSR_RANGE) |
> +               ((u32) perf->states[next_perf_state].control & INTEL_MSR_RANGE);
> +       wrmsr_on_cpu(policy->cpu, MSR_IA32_PERF_CTL, lo, hi);
> +
> +       perf->state = next_perf_state;
> +
> +       return 0;
> +}
> +
> +static int sfi_cpufreq_cpu_init(struct cpufreq_policy *policy)
> +{
> +       unsigned int i;
> +       unsigned int valid_states = 0;
> +       unsigned int cpu = policy->cpu;
> +       struct sfi_cpufreq_data *data;
> +       unsigned int result = 0;

Can merge all definitions of similar data type in a single line.

> +       struct sfi_processor_performance *perf;
> +       struct sfi_processor *pr;
> +
> +       pr = kzalloc(sizeof(struct sfi_processor), GFP_KERNEL);

sizeof(*pr)

> +       if (!pr)
> +               return -ENOMEM;
> +
> +       per_cpu(sfi_processors, cpu) = pr;
> +
> +       pr_debug("sfi_cpufreq_cpu_init CPU:%d\n", policy->cpu);

I would prefer
pr_debug("%s: CPU:%d\n", __func__, policy->cpu);

> +
> +       data = kzalloc(sizeof(struct sfi_cpufreq_data), GFP_KERNEL);

sizeof(*data)

> +       if (!data)
> +               return -ENOMEM;

What about freeing pr ?

> +
> +       data->sfi_data = per_cpu_ptr(sfi_perf_data, cpu);
> +       per_cpu(drv_data, cpu) = data;
> +
> +       sfi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;

Why here? and not in the structure definition itself..

> +
> +       result = sfi_processor_register_performance(data->sfi_data, cpu);
> +       if (result)
> +               goto err_free;
> +
> +       perf = data->sfi_data;
> +       policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
> +
> +       cpumask_set_cpu(policy->cpu, policy->cpus);
> +       cpumask_set_cpu(policy->cpu, policy->related_cpus);

You don't have to set related_cpus, its done by core. Also policy->cpus
is already set by core to policy->cpu.

> +
> +       /* capability check */
> +       if (perf->state_count <= 1) {
> +               pr_debug("No P-States\n");
> +               result = -ENODEV;
> +               goto err_unreg;
> +       }
> +
> +       data->freq_table = kzalloc(sizeof(struct cpufreq_frequency_table) *

sizeof (*data->freq_table)

Also, you want to do kzalloc? or kmalloc will work as well ?

> +                   (perf->state_count+1), GFP_KERNEL);
> +       if (!data->freq_table) {
> +               result = -ENOMEM;
> +               goto err_unreg;
> +       }
> +
> +       /* detect transition latency */
> +       policy->cpuinfo.transition_latency = 0;
> +       for (i = 0; i < perf->state_count; i++) {
> +               if ((perf->states[i].transition_latency * 1000) >
> +                   policy->cpuinfo.transition_latency)
> +                       policy->cpuinfo.transition_latency =
> +                           perf->states[i].transition_latency * 1000;
> +       }
> +
> +       /* initialize the freq table */
> +       for (i = 0; i < perf->state_count; i++) {
> +               if (i > 0 && perf->states[i].core_frequency >=
> +                   data->freq_table[valid_states-1].frequency / 1000)
> +                       continue;

What are you doing here ?

> +
> +               data->freq_table[valid_states].driver_data = i;
> +               data->freq_table[valid_states].frequency =
> +                   perf->states[i].core_frequency * 1000;
> +               valid_states++;
> +       }
> +       data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
> +       perf->state = 0;
> +
> +       result = cpufreq_table_validate_and_show(policy, data->freq_table);
> +       if (result)
> +               goto err_freqfree;
> +
> +       policy->cur = get_cur_freq_on_cpu(cpu);

This is done by core and so you need to do it.

> +       pr_debug("CPU%u - SFI performance management activated.\n", cpu);
> +       for (i = 0; i < perf->state_count; i++)
> +               pr_debug("     %cP%d: %d MHz, %d uS\n",
> +                       (i == perf->state ? '*' : ' '), i,
> +                       (u32) perf->states[i].core_frequency,
> +                       (u32) perf->states[i].transition_latency);

What about printing this in the above for loop only?

> +
> +       data->resume = 1;
> +
> +       return result;
> +
> +err_freqfree:
> +       kfree(data->freq_table);
> +err_unreg:
> +       sfi_processor_unregister_performance(perf, cpu);
> +err_free:
> +       kfree(data);
> +       per_cpu(drv_data, cpu) = NULL;

Free pr ?

> +
> +       return result;
> +}
> +
> +static int sfi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
> +{
> +       struct sfi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
> +       struct sfi_processor *pr = per_cpu(sfi_processors, policy->cpu);
> +
> +       pr_debug("sfi_cpufreq_cpu_exit\n");
> +
> +       if (data) {

Why will data be NULL here ?

> +               per_cpu(drv_data, policy->cpu) = NULL;
> +               sfi_processor_unregister_performance(data->sfi_data,
> +                                               policy->cpu);
> +               kfree(data->freq_table);
> +               kfree(data);
> +               kfree(pr);
> +       }
> +
> +       return 0;
> +}
> +
> +static int sfi_cpufreq_resume(struct cpufreq_policy *policy)
> +{
> +       struct sfi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);

Your code is dependent on policy->cpu at multiple places and it can change on
CPU hotplug. You should worry about it only if there can be more than one CPU
in a single policy, which doesn't look like the case.

Also there is a patch from Thomas Petazzoni
[PATCHv2 1/4] cpufreq: allow driver-specific data
https://www.mail-archive.com/linux-arm-msm@vger.kernel.org/msg10696.html

you can use that for data.

> +
> +       pr_debug("sfi_cpufreq_resume\n");
> +
> +       data->resume = 1;
> +
> +       return 0;
> +}
> +
> +static struct freq_attr *sfi_cpufreq_attr[] = {
> +       &cpufreq_freq_attr_scaling_available_freqs,
> +       NULL,
> +};

Can reuse cpufreq_generic_attr ??

> +
> +static struct cpufreq_driver sfi_cpufreq_driver = {
> +       .get = get_cur_freq_on_cpu,
> +       .verify = cpufreq_generic_frequency_table_verify,
> +       .target_index = sfi_cpufreq_target,
> +       .init = sfi_cpufreq_cpu_init,
> +       .exit = sfi_cpufreq_cpu_exit,
> +       .resume = sfi_cpufreq_resume,
> +       .name = "sfi-cpufreq",
> +       .attr = sfi_cpufreq_attr,
> +};
> +
> +static int __init sfi_cpufreq_init(void)
> +{
> +       sfi_perf_data = alloc_percpu(struct sfi_processor_performance);
> +       if (!sfi_perf_data) {
> +               pr_debug("Memory allocation error for sfi_perf_data.\n");

Shouldn't this be pr_err ??

> +               return -ENOMEM;
> +       }
> +
> +       return cpufreq_register_driver(&sfi_cpufreq_driver);
> +}
> +
> +static void __exit sfi_cpufreq_exit(void)
> +{
> +       struct sfi_processor *pr;
> +
> +       pr = per_cpu(sfi_processors, 0);
> +       kfree(pr);

What about: kfree(per_cpu(sfi_processors, 0)); instead of above 4 lines.
Also why here when its already done in sfi_cpufreq_cpu_exit ?

> +
> +       cpufreq_unregister_driver(&sfi_cpufreq_driver);
> +
> +       free_percpu(sfi_perf_data);
> +}
> +late_initcall(sfi_cpufreq_init);
> +module_exit(sfi_cpufreq_exit);

Normally we add them just below the respective routines.

> +
> +MODULE_ALIAS("sfi");
> +MODULE_AUTHOR("Vishwesh Rudramuni");

Can add email address as well..

> +MODULE_DESCRIPTION("SFI Processor P-States Driver");
> +MODULE_LICENSE("GPL");
> diff --git a/drivers/cpufreq/sfi-cpufreq.h b/drivers/cpufreq/sfi-cpufreq.h
> new file mode 100644
> index 000000000000..b24f958cdaa8
> --- /dev/null
> +++ b/drivers/cpufreq/sfi-cpufreq.h
> @@ -0,0 +1,53 @@
> +/*
> + * Copyright (c) 2010, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + */
> +
> +#ifndef __SFI_PROCESSOR_H__
> +#define __SFI_PROCESSOR_H__
> +
> +#include <linux/sfi.h>
> +#include <linux/cpuidle.h>
> +
> +struct sfi_processor_power {
> +       struct cpuidle_device dev;
> +       u32 default_state;
> +       int count;
> +       struct cpuidle_state *states;
> +       struct sfi_cstate_table_entry *sfi_cstates;
> +};
> +
> +struct sfi_processor_flags {
> +       u8 valid;
> +       u8 power;
> +};
> +
> +struct sfi_processor {
> +       u32 id;
> +       struct sfi_processor_flags flags;
> +       struct sfi_processor_power power;
> +       struct sfi_processor_performance *performance;
> +};
> +
> +/* Performance management */
> +struct sfi_processor_px {
> +       u32 core_frequency;     /* megahertz */
> +       u32 transition_latency; /* microseconds */
> +       u32 control;    /* control value */
> +};
> +
> +struct sfi_processor_performance {
> +       unsigned int state;
> +       unsigned int state_count;
> +       struct sfi_processor_px *states;
> +};
> +
> +#endif /*__SFI_PROCESSOR_H__*/
> --
> 1.7.9.5
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kasagar, Srinidhi Oct. 8, 2014, 7:14 p.m. UTC | #2
The author is on vacation, let me take some of your comments as i'm in the
delivery path..

On Tue, Oct 07, 2014 at 03:52:42PM +0530, Viresh Kumar wrote:
> Adding Dirk as well..
> 
> On 7 October 2014 19:37, Kasagar, Srinidhi <srinidhi.kasagar@intel.com> wrote:
> > From b3037348db47f0629316dd0027c7f1a1b28be959 Mon Sep 17 00:00:00 2001
> > From: Srinidhi Kasagar <srinidhi.kasagar@intel.com>
> > Date: Fri, 12 Sep 2014 22:52:37 +0530
> > Subject: [PATCH] cpufreq: Add sfi based cpufreq driver support
> 
> You didn't use git send-email ?

No, looks like it is broken. Will fix it anyway.

> 
> > This adds the sfi based cpu freq driver for some of the
> > Intel's Silver Mont based atom architectures like
> > Merrifield and Moorfield (intel-mid)
> >
> > Signed-off-by: Rudramuni, Vishwesh M <vishwesh.m.rudramuni@intel.com>
> > Signed-off-by: Srinidhi Kasagar <srinidhi.kasagar@intel.com>
> > ---
> >  drivers/cpufreq/Kconfig.x86   |   10 +
> >  drivers/cpufreq/Makefile      |    1 +
> >  drivers/cpufreq/sfi-cpufreq.c |  426 +++++++++++++++++++++++++++++++++++++++++
> >  drivers/cpufreq/sfi-cpufreq.h |   53 +++++
> >  4 files changed, 490 insertions(+)
> >  create mode 100644 drivers/cpufreq/sfi-cpufreq.c
> >  create mode 100644 drivers/cpufreq/sfi-cpufreq.h
> >
> > diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
> > index 89ae88f91895..3d6886b558fa 100644
> > --- a/drivers/cpufreq/Kconfig.x86
> > +++ b/drivers/cpufreq/Kconfig.x86
> > @@ -57,6 +57,16 @@ config X86_ACPI_CPUFREQ_CPB
> >           By enabling this option the acpi_cpufreq driver provides the old
> >           entry in addition to the new boost ones, for compatibility reasons.
> >
> > +config X86_SFI_CPUFREQ
> > +       tristate "SFI Processor P-States driver"
> > +       depends on X86_INTEL_MID && SFI
> > +       help
> > +         This driver adds a CPUFreq driver which utilizes the SFI
> 
> s/This driver/This/

Thanks, will fix.

> 
> > +         Processor Performance States enumeration on some Silver Mont
> > +         based Intel Atom architecture (like intel-mid)
> 
> Looks like the sentence isn't complete here, would you like to?

This is good enough..let me rephrase it.


> 
> > +
> > +         If in doubt, say N.
> > +
> >  config ELAN_CPUFREQ
> >         tristate "AMD Elan SC400 and SC410"
> >         depends on MELAN
> > diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
> > index db6d9a2fea4d..c3b51efd4a85 100644
> > --- a/drivers/cpufreq/Makefile
> > +++ b/drivers/cpufreq/Makefile
> > @@ -41,6 +41,7 @@ obj-$(CONFIG_X86_P4_CLOCKMOD)         += p4-clockmod.o
> >  obj-$(CONFIG_X86_CPUFREQ_NFORCE2)      += cpufreq-nforce2.o
> >  obj-$(CONFIG_X86_INTEL_PSTATE)         += intel_pstate.o
> >  obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY) += amd_freq_sensitivity.o
> > +obj-$(CONFIG_X86_SFI_CPUFREQ)          += sfi-cpufreq.o
> >
> >  ##################################################################################
> >  # ARM SoC drivers
> > diff --git a/drivers/cpufreq/sfi-cpufreq.c b/drivers/cpufreq/sfi-cpufreq.c
> > new file mode 100644
> > index 000000000000..1400e0387528
> > --- /dev/null
> > +++ b/drivers/cpufreq/sfi-cpufreq.c
> > @@ -0,0 +1,426 @@
> > +/*
> > + *  sfi_cpufreq.c - sfi Processor P-States Driver
> 
> No need of mentioning file name here.

Thanks for spotting..these are all left over stuffs from the original code.
Let me clean up further..

> 
> > + *  Based on ACPI Processor P-States Driver
> > + *
> 
> How much different is it from that ? Sorry I haven't checked :(
> 
> I mean, will it be possible to make changes to acpi-cpufreq driver or
> get the common part out?

I dont think so. That is ACPI, this one is SFI. Both are architecturally not
aligned.

> 
> > + *  This program is free software; you can redistribute it and/or modify
> > + *  it under the terms of the GNU General Public License as published by
> > + *  the Free Software Foundation; either version 2 of the License.
> > + *
> > + *  This program is distributed in the hope that it will be useful, but
> > + *  WITHOUT ANY WARRANTY; without even the implied warranty of
> > + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + *  General Public License for more details.
> > + *
> > + *  Author: Vishwesh M Rudramuni <vishwesh.m.rudramuni@intel.com>
> > + */
> > +
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <linux/init.h>
> > +#include <linux/smp.h>
> > +#include <linux/cpufreq.h>
> > +#include <linux/slab.h>
> > +#include <linux/sfi.h>
> 
> Keep them in ascending order please.

Hm..Will fix.

> 
> > +#include <asm/msr.h>
> > +#include <asm/processor.h>
> > +
> > +#include "sfi-cpufreq.h"
> 
> Do you plan to share the definitions here with any other file? If no, please
> merge the .h file here only.

No, will fix it.

> 
> > +#define SFI_FREQ_MAX           32
> > +#define INTEL_MSR_RANGE                0xffff
> > +#define INTEL_MSR_BUSRATIO_MASK        0xff00
> > +#define SFI_CPU_MAX            8
> 
> Aligning the values in a single column is preferred for readability.

I did, not sure how it got realigned. Let me take a look.


[...]

> > +}
> > +
> > +static int sfi_cpufreq_target(struct cpufreq_policy *policy,
> > +                               unsigned int index)
> > +{
> > +       struct sfi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
> > +       struct sfi_processor_performance *perf;
> > +       unsigned int next_perf_state = 0; /* Index into perf table */
> > +       u32 lo, hi;
> 
> You meant unsigned int only?

Yes.

> 
> > +
> > +       if (unlikely(data == NULL ||
> > +            data->sfi_data == NULL || data->freq_table == NULL)) {
> > +               return -ENODEV;
> > +       }
> 
> Probably this looks better:
>        if (unlikely(!data || !data->sfi_data || !data->freq_table))
>                return -ENODEV;

Why not?

> 
> But do we need this check at all ? Also same comments for similar usage
> in above routines.

Not really..Will remove these stuffs. 

> 
> > +
> > +       perf = data->sfi_data;
> > +       next_perf_state = data->freq_table[index].driver_data;
> > +       if (perf->state == next_perf_state) {
> 
> Can this happen? Probably cpufreq core will never do it ?

I think it did. Let me keep this.

> 
> > +               if (unlikely(data->resume)) {
> > +                       pr_debug("Called after resume, resetting to P%d\n",
> > +                               next_perf_state);
> > +                       data->resume = 0;
> > +               } else {
> > +                       pr_debug("Already at target state (P%d)\n",
> > +                               next_perf_state);
> > +                       return 0;
> > +               }
> > +       }
> > +
> > +       rdmsr_on_cpu(policy->cpu, MSR_IA32_PERF_CTL, &lo, &hi);
> > +       lo = (lo & ~INTEL_MSR_RANGE) |
> > +               ((u32) perf->states[next_perf_state].control & INTEL_MSR_RANGE);
> > +       wrmsr_on_cpu(policy->cpu, MSR_IA32_PERF_CTL, lo, hi);
> > +
> > +       perf->state = next_perf_state;
> > +
> > +       return 0;
> > +}
> > +
> > +static int sfi_cpufreq_cpu_init(struct cpufreq_policy *policy)
> > +{
> > +       unsigned int i;
> > +       unsigned int valid_states = 0;
> > +       unsigned int cpu = policy->cpu;
> > +       struct sfi_cpufreq_data *data;
> > +       unsigned int result = 0;
> 
> Can merge all definitions of similar data type in a single line.

Why not?

> 
> > +       struct sfi_processor_performance *perf;
> > +       struct sfi_processor *pr;
> > +
> > +       pr = kzalloc(sizeof(struct sfi_processor), GFP_KERNEL);
> 
> sizeof(*pr)

Style issues. Will fix.

> 
> > +       if (!pr)
> > +               return -ENOMEM;
> > +
> > +       per_cpu(sfi_processors, cpu) = pr;
> > +
> > +       pr_debug("sfi_cpufreq_cpu_init CPU:%d\n", policy->cpu);
> 
> I would prefer
> pr_debug("%s: CPU:%d\n", __func__, policy->cpu);

Me too. Will fix.

> 
> > +
> > +       data = kzalloc(sizeof(struct sfi_cpufreq_data), GFP_KERNEL);
> 
> sizeof(*data)

ditto..

> 
> > +       if (!data)
> > +               return -ENOMEM;
> 
> What about freeing pr ?

My bad. Will fix.

> 
> > +
> > +       data->sfi_data = per_cpu_ptr(sfi_perf_data, cpu);
> > +       per_cpu(drv_data, cpu) = data;
> > +
> > +       sfi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
> 
> Why here? and not in the structure definition itself..

Hmm..another legacy style. Will add it in the definition rather..Thanks. 

> 
> > +
> > +       result = sfi_processor_register_performance(data->sfi_data, cpu);
> > +       if (result)
> > +               goto err_free;
> > +
> > +       perf = data->sfi_data;
> > +       policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
> > +
> > +       cpumask_set_cpu(policy->cpu, policy->cpus);
> > +       cpumask_set_cpu(policy->cpu, policy->related_cpus);
> 
> You don't have to set related_cpus, its done by core. Also policy->cpus
> is already set by core to policy->cpu.

Yes, Will fix.

> 
> > +
> > +       /* capability check */
> > +       if (perf->state_count <= 1) {
> > +               pr_debug("No P-States\n");
> > +               result = -ENODEV;
> > +               goto err_unreg;
> > +       }
> > +
> > +       data->freq_table = kzalloc(sizeof(struct cpufreq_frequency_table) *
> 
> sizeof (*data->freq_table)

ok..

> 
> Also, you want to do kzalloc? or kmalloc will work as well ?

kmalloc makes my table corrupted, perhaps the addition of new flag variable.
Let me keep kzalloc.

> 
> > +                   (perf->state_count+1), GFP_KERNEL);
> > +       if (!data->freq_table) {
> > +               result = -ENOMEM;
> > +               goto err_unreg;
> > +       }
> > +
> > +       /* detect transition latency */
> > +       policy->cpuinfo.transition_latency = 0;
> > +       for (i = 0; i < perf->state_count; i++) {
> > +               if ((perf->states[i].transition_latency * 1000) >
> > +                   policy->cpuinfo.transition_latency)
> > +                       policy->cpuinfo.transition_latency =
> > +                           perf->states[i].transition_latency * 1000;
> > +       }
> > +
> > +       /* initialize the freq table */
> > +       for (i = 0; i < perf->state_count; i++) {
> > +               if (i > 0 && perf->states[i].core_frequency >=
> > +                   data->freq_table[valid_states-1].frequency / 1000)
> > +                       continue;
> 
> What are you doing here ?

This is unncessary check, not needed. Again a left over stuff. Will clean it.

> 
> > +
> > +               data->freq_table[valid_states].driver_data = i;
> > +               data->freq_table[valid_states].frequency =
> > +                   perf->states[i].core_frequency * 1000;
> > +               valid_states++;
> > +       }
> > +       data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
> > +       perf->state = 0;
> > +
> > +       result = cpufreq_table_validate_and_show(policy, data->freq_table);
> > +       if (result)
> > +               goto err_freqfree;
> > +
> > +       policy->cur = get_cur_freq_on_cpu(cpu);
> 
> This is done by core and so you need to do it.

Agree. Will remove.


> 
> > +       pr_debug("CPU%u - SFI performance management activated.\n", cpu);
> > +       for (i = 0; i < perf->state_count; i++)
> > +               pr_debug("     %cP%d: %d MHz, %d uS\n",
> > +                       (i == perf->state ? '*' : ' '), i,
> > +                       (u32) perf->states[i].core_frequency,
> > +                       (u32) perf->states[i].transition_latency);
> 
> What about printing this in the above for loop only?

Why not?

> 
> > +
> > +       data->resume = 1;
> > +
> > +       return result;
> > +
> > +err_freqfree:
> > +       kfree(data->freq_table);
> > +err_unreg:
> > +       sfi_processor_unregister_performance(perf, cpu);
> > +err_free:
> > +       kfree(data);
> > +       per_cpu(drv_data, cpu) = NULL;
> 
> Free pr ?

My bad. Wilf fix

> 
> > +
> > +       return result;
> > +}
> > +
> > +static int sfi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
> > +{
> > +       struct sfi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
> > +       struct sfi_processor *pr = per_cpu(sfi_processors, policy->cpu);
> > +
> > +       pr_debug("sfi_cpufreq_cpu_exit\n");
> > +
> > +       if (data) {
> 
> Why will data be NULL here ?

Perhaps to satisfy the static code check tools. Will remove.

> 
> > +               per_cpu(drv_data, policy->cpu) = NULL;
> > +               sfi_processor_unregister_performance(data->sfi_data,
> > +                                               policy->cpu);
> > +               kfree(data->freq_table);
> > +               kfree(data);
> > +               kfree(pr);
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +static int sfi_cpufreq_resume(struct cpufreq_policy *policy)
> > +{
> > +       struct sfi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
> 
> Your code is dependent on policy->cpu at multiple places and it can change on
> CPU hotplug. You should worry about it only if there can be more than one CPU
> in a single policy, which doesn't look like the case.

Not sure what do you mean by not using policy->cpu. I need all of these
to support hotplug. Can you elaborate if you mean otherwise please?

> 
> Also there is a patch from Thomas Petazzoni
> [PATCHv2 1/4] cpufreq: allow driver-specific data
> https://www.mail-archive.com/linux-arm-msm@vger.kernel.org/msg10696.html
> 
> you can use that for data.
> 
> > +
> > +       pr_debug("sfi_cpufreq_resume\n");
> > +
> > +       data->resume = 1;
> > +
> > +       return 0;
> > +}
> > +
> > +static struct freq_attr *sfi_cpufreq_attr[] = {
> > +       &cpufreq_freq_attr_scaling_available_freqs,
> > +       NULL,
> > +};
> 
> Can reuse cpufreq_generic_attr ??

We should be using that. Will fix.

> 
> > +
> > +static struct cpufreq_driver sfi_cpufreq_driver = {
> > +       .get = get_cur_freq_on_cpu,
> > +       .verify = cpufreq_generic_frequency_table_verify,
> > +       .target_index = sfi_cpufreq_target,
> > +       .init = sfi_cpufreq_cpu_init,
> > +       .exit = sfi_cpufreq_cpu_exit,
> > +       .resume = sfi_cpufreq_resume,
> > +       .name = "sfi-cpufreq",
> > +       .attr = sfi_cpufreq_attr,
> > +};
> > +
> > +static int __init sfi_cpufreq_init(void)
> > +{
> > +       sfi_perf_data = alloc_percpu(struct sfi_processor_performance);
> > +       if (!sfi_perf_data) {
> > +               pr_debug("Memory allocation error for sfi_perf_data.\n");
> 
> Shouldn't this be pr_err ??

Yes. Will fix.

> 
> > +               return -ENOMEM;
> > +       }
> > +
> > +       return cpufreq_register_driver(&sfi_cpufreq_driver);
> > +}
> > +
> > +static void __exit sfi_cpufreq_exit(void)
> > +{
> > +       struct sfi_processor *pr;
> > +
> > +       pr = per_cpu(sfi_processors, 0);
> > +       kfree(pr);
> 
> What about: kfree(per_cpu(sfi_processors, 0)); instead of above 4 lines.
> Also why here when its already done in sfi_cpufreq_cpu_exit ?

I think it is not required. Will remove them.

> 
> > +
> > +       cpufreq_unregister_driver(&sfi_cpufreq_driver);
> > +
> > +       free_percpu(sfi_perf_data);
> > +}
> > +late_initcall(sfi_cpufreq_init);
> > +module_exit(sfi_cpufreq_exit);
> 
> Normally we add them just below the respective routines.

Some prefer like this :)

> 
> > +
> > +MODULE_ALIAS("sfi");
> > +MODULE_AUTHOR("Vishwesh Rudramuni");
> 
> Can add email address as well..

Yes..Will add.

Srinidhi
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Viresh Kumar Oct. 9, 2014, 4:03 a.m. UTC | #3
On 9 October 2014 00:44, Kasagar, Srinidhi <srinidhi.kasagar@intel.com> wrote:
> On Tue, Oct 07, 2014 at 03:52:42PM +0530, Viresh Kumar wrote:
>> > +static int sfi_cpufreq_target(struct cpufreq_policy *policy,
>> > +                               unsigned int index)
>> > +{
>> > +       struct sfi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
>> > +       struct sfi_processor_performance *perf;
>> > +       unsigned int next_perf_state = 0; /* Index into perf table */
>> > +       u32 lo, hi;
>>
>> You meant unsigned int only?
>
> Yes.

Then use that only to be aligned with other data types.

>> > +       perf = data->sfi_data;
>> > +       next_perf_state = data->freq_table[index].driver_data;
>> > +       if (perf->state == next_perf_state) {
>>
>> Can this happen? Probably cpufreq core will never do it ?
>
> I think it did. Let me keep this.

Okay, but how? The cpufreq core doesn't propagate call to ->target_index()
if the freq isn't changing.. Please check if this is getting hit at all or not.

>> > +static int sfi_cpufreq_resume(struct cpufreq_policy *policy)
>> > +{
>> > +       struct sfi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
>>
>> Your code is dependent on policy->cpu at multiple places and it can change on
>> CPU hotplug. You should worry about it only if there can be more than one CPU
>> in a single policy, which doesn't look like the case.
>
> Not sure what do you mean by not using policy->cpu. I need all of these
> to support hotplug. Can you elaborate if you mean otherwise please?

So you are using policy->cpu to set/get a value out of a per-cpu variable.
Now if a policy has 4 cpus then per-cpu variable will only be initialized for
policy->cpu and not others.

But now if we do hotplug of that cpu, policy->cpu will move to next cpu.
In this case accessing the per-cpu variable will not be a good idea :)
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Viresh Kumar Oct. 9, 2014, 10:18 a.m. UTC | #4
On 9 October 2014 23:38, Kasagar, Srinidhi <srinidhi.kasagar@intel.com> wrote:
> I guess if you offline a cpu and then bring it online, then even though
> there is no change in frequency, the core calls ->target_index().

No.

> am i missing something?

Yes.
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Viresh Kumar Oct. 9, 2014, 11:15 a.m. UTC | #5
On 10 October 2014 00:37, Kasagar, Srinidhi <srinidhi.kasagar@intel.com> wrote:
> Hmm..I noticed later that I was doing cpumask_copy for my internal experiments.
> Even in that case, as you pointed, the ->target_index() was not called
> for the onlined CPU, but for the other related CPU.

But why is it getting called for even that ?
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kasagar, Srinidhi Oct. 9, 2014, 6:08 p.m. UTC | #6
On Thu, Oct 09, 2014 at 09:33:24AM +0530, Viresh Kumar wrote:
> On 9 October 2014 00:44, Kasagar, Srinidhi <srinidhi.kasagar@intel.com> wrote:
> > On Tue, Oct 07, 2014 at 03:52:42PM +0530, Viresh Kumar wrote:
> >> > +static int sfi_cpufreq_target(struct cpufreq_policy *policy,
> >> > +                               unsigned int index)
> >> > +{
> >> > +       struct sfi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
> >> > +       struct sfi_processor_performance *perf;
> >> > +       unsigned int next_perf_state = 0; /* Index into perf table */
> >> > +       u32 lo, hi;
> >>
> >> You meant unsigned int only?
> >
> > Yes.
> 
> Then use that only to be aligned with other data types.

Sorry, I meant it should be u32 as they are register reads.

> 
> >> > +       perf = data->sfi_data;
> >> > +       next_perf_state = data->freq_table[index].driver_data;
> >> > +       if (perf->state == next_perf_state) {
> >>
> >> Can this happen? Probably cpufreq core will never do it ?
> >
> > I think it did. Let me keep this.
> 
> Okay, but how? The cpufreq core doesn't propagate call to ->target_index()
> if the freq isn't changing.. Please check if this is getting hit at all or not.

I guess if you offline a cpu and then bring it online, then even though
there is no change in frequency, the core calls ->target_index().

am i missing something?

> 
> >> > +static int sfi_cpufreq_resume(struct cpufreq_policy *policy)
> >> > +{
> >> > +       struct sfi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
> >>
> >> Your code is dependent on policy->cpu at multiple places and it can change on
> >> CPU hotplug. You should worry about it only if there can be more than one CPU
> >> in a single policy, which doesn't look like the case.
> >
> > Not sure what do you mean by not using policy->cpu. I need all of these
> > to support hotplug. Can you elaborate if you mean otherwise please?
> 
> So you are using policy->cpu to set/get a value out of a per-cpu variable.
> Now if a policy has 4 cpus then per-cpu variable will only be initialized for
> policy->cpu and not others.
> 
> But now if we do hotplug of that cpu, policy->cpu will move to next cpu.
> In this case accessing the per-cpu variable will not be a good idea :)

Not sure what I can do with this. Let me have a look.

Srinidhi
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kasagar, Srinidhi Oct. 9, 2014, 7:07 p.m. UTC | #7
On Thu, Oct 09, 2014 at 03:48:08PM +0530, Viresh Kumar wrote:
> On 9 October 2014 23:38, Kasagar, Srinidhi <srinidhi.kasagar@intel.com> wrote:
> > I guess if you offline a cpu and then bring it online, then even though
> > there is no change in frequency, the core calls ->target_index().
> 
> No.
Hmm..I noticed later that I was doing cpumask_copy for my internal experiments.
Even in that case, as you pointed, the ->target_index() was not called
for the onlined CPU, but for the other related CPU. I should have 
debug printed policy->cpu in my ->target() implementation..:(

Anyway, thanks much for it. Will fix that useless check in the next version
of my patch.

Srinidhi 
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Viresh Kumar Oct. 10, 2014, 8:57 a.m. UTC | #8
On 10 October 2014 22:21, Kasagar, Srinidhi <srinidhi.kasagar@intel.com> wrote:
> I have not debugged that. However, to double check the same I have rebased to the
> clean upstream tree and I don't see ->target_index() is called for any
> other CPUs during the offline-online path..

OKay, this is getting a bit confusing.

->target_index() is called for which CPU?
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Viresh Kumar Oct. 10, 2014, 9:27 a.m. UTC | #9
On 10 October 2014 22:44, Kasagar, Srinidhi <srinidhi.kasagar@intel.com> wrote:
> It was called for the sibling cpu of the onlined one.

Still not clear. Can you give me output of this:

cat /sys/devices/system/cpu/cpu*/cpufreq/affected_cpus and
cat /sys/devices/system/cpu/cpu*/cpufreq/related_cpus

And then also tell me which CPU are you trying to remove ?
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kasagar, Srinidhi Oct. 10, 2014, 10:47 a.m. UTC | #10
On Fri, Oct 10, 2014 at 02:57:42PM +0530, Viresh Kumar wrote:
> On 10 October 2014 22:44, Kasagar, Srinidhi <srinidhi.kasagar@intel.com> wrote:
> > It was called for the sibling cpu of the onlined one.
> 
> Still not clear. Can you give me output of this:
> 
> cat /sys/devices/system/cpu/cpu*/cpufreq/affected_cpus and
> cat /sys/devices/system/cpu/cpu*/cpufreq/related_cpus

# cat /sys/devices/system/cpu/cpu*/cpufreq/affected_cpus      
0 1
0 1
2 3
2 3

# cat /sys/devices/system/cpu/cpu*/cpufreq/related_cpus
0 1
0 1
2 3
2 3

> 
> And then also tell me which CPU are you trying to remove ?

cpu1

BTW, I had to switch to my own tree to get the issue back (where
I have my legacy custom cpufreq/core changes. So I suspect my changes
not the upstream tree)

Srinidhi
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Viresh Kumar Oct. 10, 2014, 11:11 a.m. UTC | #11
On 10 October 2014 16:17, Kasagar, Srinidhi <srinidhi.kasagar@intel.com> wrote:
> # cat /sys/devices/system/cpu/cpu*/cpufreq/affected_cpus
> 0 1
> 0 1
> 2 3
> 2 3
>
> # cat /sys/devices/system/cpu/cpu*/cpufreq/related_cpus
> 0 1
> 0 1
> 2 3
> 2 3
>
>>
>> And then also tell me which CPU are you trying to remove ?
>
> cpu1

So when to remove CPU1, you get a call for freq change for CPU0. Right ?

There are few things worth noticing here:
- Freq change is only requested for policy->cpu. In your case there will be two
policies present in system. For the first one policy->cpu will be 0,
for the second
one 2.
- So target_index() will always be called for 0 and 2. Unless
policy->cpu is changed
by hotplugging out policy->cpu, i.e. 0 or 2.
-  This whole thread started because you said target_index() is called
for policy->cur
frequency. So even if ->target_index() is called (which might happen
due to governor
requesting a new freq), it shouldn't be called for the same freq.

> BTW, I had to switch to my own tree to get the issue back (where
> I have my legacy custom cpufreq/core changes. So I suspect my changes
> not the upstream tree)

That's another side of the story :)
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kasagar, Srinidhi Oct. 10, 2014, 4:51 p.m. UTC | #12
On Thu, Oct 09, 2014 at 04:45:01PM +0530, Viresh Kumar wrote:
> On 10 October 2014 00:37, Kasagar, Srinidhi <srinidhi.kasagar@intel.com> wrote:
> > Hmm..I noticed later that I was doing cpumask_copy for my internal experiments.
> > Even in that case, as you pointed, the ->target_index() was not called
> > for the onlined CPU, but for the other related CPU.
> 
> But why is it getting called for even that ?
I have not debugged that. However, to double check the same I have rebased to the
clean upstream tree and I don't see ->target_index() is called for any
other CPUs during the offline-online path..

Srinidhi
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kasagar, Srinidhi Oct. 10, 2014, 5:14 p.m. UTC | #13
On Fri, Oct 10, 2014 at 02:27:17PM +0530, Viresh Kumar wrote:
> On 10 October 2014 22:21, Kasagar, Srinidhi <srinidhi.kasagar@intel.com> wrote:
> > I have not debugged that. However, to double check the same I have rebased to the
> > clean upstream tree and I don't see ->target_index() is called for any
> > other CPUs during the offline-online path..
> 
> OKay, this is getting a bit confusing.
> 
> ->target_index() is called for which CPU?
It was called for the sibling cpu of the onlined one.

Srinidhi
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index 89ae88f91895..3d6886b558fa 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -57,6 +57,16 @@  config X86_ACPI_CPUFREQ_CPB
 	  By enabling this option the acpi_cpufreq driver provides the old
 	  entry in addition to the new boost ones, for compatibility reasons.
 
+config X86_SFI_CPUFREQ
+	tristate "SFI Processor P-States driver"
+	depends on X86_INTEL_MID && SFI
+	help
+	  This driver adds a CPUFreq driver which utilizes the SFI
+	  Processor Performance States enumeration on some Silver Mont
+	  based Intel Atom architecture (like intel-mid)
+
+	  If in doubt, say N.
+
 config ELAN_CPUFREQ
 	tristate "AMD Elan SC400 and SC410"
 	depends on MELAN
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index db6d9a2fea4d..c3b51efd4a85 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -41,6 +41,7 @@  obj-$(CONFIG_X86_P4_CLOCKMOD)		+= p4-clockmod.o
 obj-$(CONFIG_X86_CPUFREQ_NFORCE2)	+= cpufreq-nforce2.o
 obj-$(CONFIG_X86_INTEL_PSTATE)		+= intel_pstate.o
 obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY)	+= amd_freq_sensitivity.o
+obj-$(CONFIG_X86_SFI_CPUFREQ)		+= sfi-cpufreq.o
 
 ##################################################################################
 # ARM SoC drivers
diff --git a/drivers/cpufreq/sfi-cpufreq.c b/drivers/cpufreq/sfi-cpufreq.c
new file mode 100644
index 000000000000..1400e0387528
--- /dev/null
+++ b/drivers/cpufreq/sfi-cpufreq.c
@@ -0,0 +1,426 @@ 
+/*
+ *  sfi_cpufreq.c - sfi Processor P-States Driver
+ *  Based on ACPI Processor P-States Driver
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  Author: Vishwesh M Rudramuni <vishwesh.m.rudramuni@intel.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/cpufreq.h>
+#include <linux/slab.h>
+#include <linux/sfi.h>
+
+#include <asm/msr.h>
+#include <asm/processor.h>
+
+#include "sfi-cpufreq.h"
+
+#define SFI_FREQ_MAX		32
+#define INTEL_MSR_RANGE		0xffff
+#define INTEL_MSR_BUSRATIO_MASK	0xff00
+#define SFI_CPU_MAX		8
+
+DEFINE_PER_CPU(struct sfi_processor *, sfi_processors);
+
+static DEFINE_MUTEX(performance_mutex);
+static DEFINE_PER_CPU(struct sfi_cpufreq_data *, drv_data);
+struct sfi_freq_table_entry sfi_cpufreq_array[SFI_FREQ_MAX];
+static int sfi_cpufreq_num;
+
+/* sfi_perf_data is a pointer to percpu data. */
+static struct sfi_processor_performance *sfi_perf_data;
+static struct cpufreq_driver sfi_cpufreq_driver;
+
+struct sfi_cpufreq_data {
+	struct sfi_processor_performance *sfi_data;
+	struct cpufreq_frequency_table *freq_table;
+	unsigned int resume;
+};
+
+static int parse_freq(struct sfi_table_header *table)
+{
+	struct sfi_table_simple *sb;
+	struct sfi_freq_table_entry *pentry;
+	int total_len;
+
+	sb = (struct sfi_table_simple *)table;
+	if (!sb) {
+		printk(KERN_WARNING "SFI: Unable to map FREQ\n");
+		return -ENODEV;
+	}
+
+	if (!sfi_cpufreq_num) {
+		sfi_cpufreq_num = SFI_GET_NUM_ENTRIES(sb,
+			 struct sfi_freq_table_entry);
+		pentry = (struct sfi_freq_table_entry *)sb->pentry;
+		total_len = sfi_cpufreq_num * sizeof(*pentry);
+		memcpy(sfi_cpufreq_array, pentry, total_len);
+	}
+
+	return 0;
+}
+
+static int sfi_processor_get_performance_states(struct sfi_processor *pr)
+{
+	int result = 0;
+	int i;
+
+	pr->performance->state_count = sfi_cpufreq_num;
+	pr->performance->states =
+	    kmalloc(sizeof(struct sfi_processor_px) * sfi_cpufreq_num,
+		    GFP_KERNEL);
+	if (!pr->performance->states)
+		result = -ENOMEM;
+
+	printk(KERN_INFO "Num p-states %d\n", sfi_cpufreq_num);
+
+	/* Populate the P-states info from the SFI table here */
+	for (i = 0; i < sfi_cpufreq_num; i++) {
+		pr->performance->states[i].core_frequency =
+			sfi_cpufreq_array[i].freq_mhz;
+		pr->performance->states[i].transition_latency =
+			sfi_cpufreq_array[i].latency;
+		pr->performance->states[i].control =
+			sfi_cpufreq_array[i].ctrl_val;
+		printk(KERN_INFO "State [%d]: core_frequency[%d] transition_latency[%d] control[0x%x]\n",
+			i,
+			(u32) pr->performance->states[i].core_frequency,
+			(u32) pr->performance->states[i].transition_latency,
+			(u32) pr->performance->states[i].control);
+	}
+
+	return result;
+}
+
+static int sfi_processor_register_performance(struct sfi_processor_performance
+				    *performance, unsigned int cpu)
+{
+	struct sfi_processor *pr;
+
+	mutex_lock(&performance_mutex);
+
+	pr = per_cpu(sfi_processors, cpu);
+	if (!pr) {
+		mutex_unlock(&performance_mutex);
+		return -ENODEV;
+	}
+
+	if (pr->performance) {
+		mutex_unlock(&performance_mutex);
+		return -EBUSY;
+	}
+
+	WARN_ON(!performance);
+
+	pr->performance = performance;
+
+	/* parse the freq table from sfi */
+	sfi_cpufreq_num = 0;
+	sfi_table_parse(SFI_SIG_FREQ, NULL, NULL, parse_freq);
+
+	sfi_processor_get_performance_states(pr);
+
+	mutex_unlock(&performance_mutex);
+	return 0;
+}
+
+void sfi_processor_unregister_performance(struct sfi_processor_performance
+				      *performance, unsigned int cpu)
+{
+	struct sfi_processor *pr;
+
+	mutex_lock(&performance_mutex);
+
+	pr = per_cpu(sfi_processors, cpu);
+	if (!pr) {
+		mutex_unlock(&performance_mutex);
+		return;
+	}
+
+	if (pr->performance)
+		kfree(pr->performance->states);
+	pr->performance = NULL;
+
+	mutex_unlock(&performance_mutex);
+}
+
+static unsigned extract_freq(u32 msr, struct sfi_cpufreq_data *data)
+{
+	struct cpufreq_frequency_table *pos;
+	struct sfi_processor_performance *perf;
+	u32 sfi_ctrl;
+
+	msr &= INTEL_MSR_BUSRATIO_MASK;
+	perf = data->sfi_data;
+
+	cpufreq_for_each_entry(pos, data->freq_table) {
+		sfi_ctrl = perf->states[pos->driver_data].control
+			& INTEL_MSR_BUSRATIO_MASK;
+		if (sfi_ctrl == msr)
+			return pos->frequency;
+	}
+	return data->freq_table[0].frequency;
+}
+
+static u32 get_cur_val(const struct cpumask *mask)
+{
+	u32 val, dummy;
+
+	if (unlikely(cpumask_empty(mask)))
+		return 0;
+
+	rdmsr_on_cpu(cpumask_any(mask), MSR_IA32_PERF_STATUS, &val, &dummy);
+
+	return val;
+}
+
+static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
+{
+	struct sfi_cpufreq_data *data = per_cpu(drv_data, cpu);
+	unsigned int freq;
+	unsigned int cached_freq;
+
+	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
+
+	if (unlikely(data == NULL ||
+		data->sfi_data == NULL || data->freq_table == NULL)) {
+		return 0;
+	}
+
+	cached_freq = data->freq_table[data->sfi_data->state].frequency;
+	freq = extract_freq(get_cur_val(cpumask_of(cpu)), data);
+	if (freq != cached_freq)
+		 /* Force the frequency on next target call */
+		data->resume = 1;
+
+	pr_debug("cur freq = %u\n", freq);
+
+	return freq;
+}
+
+static int sfi_cpufreq_target(struct cpufreq_policy *policy,
+				unsigned int index)
+{
+	struct sfi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+	struct sfi_processor_performance *perf;
+	unsigned int next_perf_state = 0; /* Index into perf table */
+	u32 lo, hi;
+
+	if (unlikely(data == NULL ||
+	     data->sfi_data == NULL || data->freq_table == NULL)) {
+		return -ENODEV;
+	}
+
+	perf = data->sfi_data;
+	next_perf_state = data->freq_table[index].driver_data;
+	if (perf->state == next_perf_state) {
+		if (unlikely(data->resume)) {
+			pr_debug("Called after resume, resetting to P%d\n",
+				next_perf_state);
+			data->resume = 0;
+		} else {
+			pr_debug("Already at target state (P%d)\n",
+				next_perf_state);
+			return 0;
+		}
+	}
+
+	rdmsr_on_cpu(policy->cpu, MSR_IA32_PERF_CTL, &lo, &hi);
+	lo = (lo & ~INTEL_MSR_RANGE) |
+		((u32) perf->states[next_perf_state].control & INTEL_MSR_RANGE);
+	wrmsr_on_cpu(policy->cpu, MSR_IA32_PERF_CTL, lo, hi);
+
+	perf->state = next_perf_state;
+
+	return 0;
+}
+
+static int sfi_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+	unsigned int i;
+	unsigned int valid_states = 0;
+	unsigned int cpu = policy->cpu;
+	struct sfi_cpufreq_data *data;
+	unsigned int result = 0;
+	struct sfi_processor_performance *perf;
+	struct sfi_processor *pr;
+
+	pr = kzalloc(sizeof(struct sfi_processor), GFP_KERNEL);
+	if (!pr)
+		return -ENOMEM;
+
+	per_cpu(sfi_processors, cpu) = pr;
+
+	pr_debug("sfi_cpufreq_cpu_init CPU:%d\n", policy->cpu);
+
+	data = kzalloc(sizeof(struct sfi_cpufreq_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->sfi_data = per_cpu_ptr(sfi_perf_data, cpu);
+	per_cpu(drv_data, cpu) = data;
+
+	sfi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
+
+	result = sfi_processor_register_performance(data->sfi_data, cpu);
+	if (result)
+		goto err_free;
+
+	perf = data->sfi_data;
+	policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
+
+	cpumask_set_cpu(policy->cpu, policy->cpus);
+	cpumask_set_cpu(policy->cpu, policy->related_cpus);
+
+	/* capability check */
+	if (perf->state_count <= 1) {
+		pr_debug("No P-States\n");
+		result = -ENODEV;
+		goto err_unreg;
+	}
+
+	data->freq_table = kzalloc(sizeof(struct cpufreq_frequency_table) *
+		    (perf->state_count+1), GFP_KERNEL);
+	if (!data->freq_table) {
+		result = -ENOMEM;
+		goto err_unreg;
+	}
+
+	/* detect transition latency */
+	policy->cpuinfo.transition_latency = 0;
+	for (i = 0; i < perf->state_count; i++) {
+		if ((perf->states[i].transition_latency * 1000) >
+		    policy->cpuinfo.transition_latency)
+			policy->cpuinfo.transition_latency =
+			    perf->states[i].transition_latency * 1000;
+	}
+
+	/* initialize the freq table */
+	for (i = 0; i < perf->state_count; i++) {
+		if (i > 0 && perf->states[i].core_frequency >=
+		    data->freq_table[valid_states-1].frequency / 1000)
+			continue;
+
+		data->freq_table[valid_states].driver_data = i;
+		data->freq_table[valid_states].frequency =
+		    perf->states[i].core_frequency * 1000;
+		valid_states++;
+	}
+	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
+	perf->state = 0;
+
+	result = cpufreq_table_validate_and_show(policy, data->freq_table);
+	if (result)
+		goto err_freqfree;
+
+	policy->cur = get_cur_freq_on_cpu(cpu);
+
+	pr_debug("CPU%u - SFI performance management activated.\n", cpu);
+	for (i = 0; i < perf->state_count; i++)
+		pr_debug("     %cP%d: %d MHz, %d uS\n",
+			(i == perf->state ? '*' : ' '), i,
+			(u32) perf->states[i].core_frequency,
+			(u32) perf->states[i].transition_latency);
+
+	data->resume = 1;
+
+	return result;
+
+err_freqfree:
+	kfree(data->freq_table);
+err_unreg:
+	sfi_processor_unregister_performance(perf, cpu);
+err_free:
+	kfree(data);
+	per_cpu(drv_data, cpu) = NULL;
+
+	return result;
+}
+
+static int sfi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+	struct sfi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+	struct sfi_processor *pr = per_cpu(sfi_processors, policy->cpu);
+
+	pr_debug("sfi_cpufreq_cpu_exit\n");
+
+	if (data) {
+		per_cpu(drv_data, policy->cpu) = NULL;
+		sfi_processor_unregister_performance(data->sfi_data,
+						policy->cpu);
+		kfree(data->freq_table);
+		kfree(data);
+		kfree(pr);
+	}
+
+	return 0;
+}
+
+static int sfi_cpufreq_resume(struct cpufreq_policy *policy)
+{
+	struct sfi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+
+	pr_debug("sfi_cpufreq_resume\n");
+
+	data->resume = 1;
+
+	return 0;
+}
+
+static struct freq_attr *sfi_cpufreq_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver sfi_cpufreq_driver = {
+	.get = get_cur_freq_on_cpu,
+	.verify = cpufreq_generic_frequency_table_verify,
+	.target_index = sfi_cpufreq_target,
+	.init = sfi_cpufreq_cpu_init,
+	.exit = sfi_cpufreq_cpu_exit,
+	.resume = sfi_cpufreq_resume,
+	.name = "sfi-cpufreq",
+	.attr = sfi_cpufreq_attr,
+};
+
+static int __init sfi_cpufreq_init(void)
+{
+	sfi_perf_data = alloc_percpu(struct sfi_processor_performance);
+	if (!sfi_perf_data) {
+		pr_debug("Memory allocation error for sfi_perf_data.\n");
+		return -ENOMEM;
+	}
+
+	return cpufreq_register_driver(&sfi_cpufreq_driver);
+}
+
+static void __exit sfi_cpufreq_exit(void)
+{
+	struct sfi_processor *pr;
+
+	pr = per_cpu(sfi_processors, 0);
+	kfree(pr);
+
+	cpufreq_unregister_driver(&sfi_cpufreq_driver);
+
+	free_percpu(sfi_perf_data);
+}
+late_initcall(sfi_cpufreq_init);
+module_exit(sfi_cpufreq_exit);
+
+MODULE_ALIAS("sfi");
+MODULE_AUTHOR("Vishwesh Rudramuni");
+MODULE_DESCRIPTION("SFI Processor P-States Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/sfi-cpufreq.h b/drivers/cpufreq/sfi-cpufreq.h
new file mode 100644
index 000000000000..b24f958cdaa8
--- /dev/null
+++ b/drivers/cpufreq/sfi-cpufreq.h
@@ -0,0 +1,53 @@ 
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __SFI_PROCESSOR_H__
+#define __SFI_PROCESSOR_H__
+
+#include <linux/sfi.h>
+#include <linux/cpuidle.h>
+
+struct sfi_processor_power {
+	struct cpuidle_device dev;
+	u32 default_state;
+	int count;
+	struct cpuidle_state *states;
+	struct sfi_cstate_table_entry *sfi_cstates;
+};
+
+struct sfi_processor_flags {
+	u8 valid;
+	u8 power;
+};
+
+struct sfi_processor {
+	u32 id;
+	struct sfi_processor_flags flags;
+	struct sfi_processor_power power;
+	struct sfi_processor_performance *performance;
+};
+
+/* Performance management */
+struct sfi_processor_px {
+	u32 core_frequency;	/* megahertz */
+	u32 transition_latency;	/* microseconds */
+	u32 control;	/* control value */
+};
+
+struct sfi_processor_performance {
+	unsigned int state;
+	unsigned int state_count;
+	struct sfi_processor_px *states;
+};
+
+#endif /*__SFI_PROCESSOR_H__*/