diff mbox series

[3/3] memory: samsung: exynos5422-dmc: Add support for interrupt from performance counters

Message ID 20190925161813.21117-4-l.luba@partner.samsung.com (mailing list archive)
State Superseded
Headers show
Series [1/3] ARM: dts: exynos: Add interrupt to DMC controller in Exynos5422 | expand

Commit Message

Lukasz Luba Sept. 25, 2019, 4:18 p.m. UTC
Introduce a new interrupt driven mechanism for managing speed of the
memory controller. The interrupts are generated due to performance
counters overflow. The performance counters might track memory reads,
writes, transfers, page misses, etc. In the basic algorithm tracking
read transfers and calculating memory pressure should be enough to
skip polling mode in devfreq.

Signed-off-by: Lukasz Luba <l.luba@partner.samsung.com>
---
 drivers/memory/samsung/exynos5422-dmc.c | 297 ++++++++++++++++++++++--
 1 file changed, 272 insertions(+), 25 deletions(-)

Comments

Krzysztof Kozlowski Sept. 27, 2019, 9:19 a.m. UTC | #1
On Wed, Sep 25, 2019 at 06:18:13PM +0200, Lukasz Luba wrote:
> Introduce a new interrupt driven mechanism for managing speed of the
> memory controller. The interrupts are generated due to performance
> counters overflow. The performance counters might track memory reads,
> writes, transfers, page misses, etc. In the basic algorithm tracking
> read transfers and calculating memory pressure should be enough to
> skip polling mode in devfreq.
> 
> Signed-off-by: Lukasz Luba <l.luba@partner.samsung.com>
> ---
>  drivers/memory/samsung/exynos5422-dmc.c | 297 ++++++++++++++++++++++--
>  1 file changed, 272 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/memory/samsung/exynos5422-dmc.c b/drivers/memory/samsung/exynos5422-dmc.c
> index 0fe5f2186139..86e1844b97ef 100644
> --- a/drivers/memory/samsung/exynos5422-dmc.c
> +++ b/drivers/memory/samsung/exynos5422-dmc.c
> @@ -8,6 +8,7 @@
>  #include <linux/devfreq.h>
>  #include <linux/devfreq-event.h>
>  #include <linux/device.h>
> +#include <linux/interrupt.h>
>  #include <linux/io.h>
>  #include <linux/mfd/syscon.h>
>  #include <linux/module.h>
> @@ -35,6 +36,29 @@
>  #define USE_BPLL_TIMINGS			(0)
>  #define EXYNOS5_AREF_NORMAL			(0x2e)
>  
> +#define DREX_PPCCLKCON		(0x0130)
> +#define DREX_PEREV2CONFIG	(0x013c)
> +#define DREX_PMNC_PPC		(0xE000)
> +#define DREX_CNTENS_PPC		(0xE010)
> +#define DREX_CNTENC_PPC		(0xE020)
> +#define DREX_INTENS_PPC		(0xE030)
> +#define DREX_INTENC_PPC		(0xE040)
> +#define DREX_FLAG_PPC		(0xE050)
> +#define DREX_PMCNT2_PPC		(0xE130)
> +
> +#define CC_RESET		BIT(2)
> +#define PPC_COUNTER_RESET	BIT(1)
> +#define PPC_ENABLE		BIT(0)
> +#define PEREV_CLK_EN		BIT(0)
> +#define PERF_CNT2		BIT(2)
> +#define PERF_CCNT		BIT(31)

Describe to which registers these bitfields are applicable.

> +
> +#define READ_TRANSFER_CH0	(0x6d)
> +#define READ_TRANSFER_CH1	(0x6f)

The same. Otherwise they all look like some generic constants which is
not true.

> +
> +#define PERF_COUNTER_START_VALUE 0xff000000
> +#define PERF_EVENT_UP_DOWN_THRESHOLD 900000000ULL
> +
>  /**
>   * struct dmc_opp_table - Operating level desciption
>   *
> @@ -85,6 +109,9 @@ struct exynos5_dmc {
>  	struct clk *mout_mx_mspll_ccore_phy;
>  	struct devfreq_event_dev **counter;
>  	int num_counters;
> +	u64 last_overflow_ts[2];
> +	unsigned long load, total;

One member per line.  This decreases readability.

> +	bool in_irq_mode;
>  };
>  
>  #define TIMING_FIELD(t_name, t_bit_beg, t_bit_end) \
> @@ -653,6 +680,167 @@ static int exynos5_counters_get(struct exynos5_dmc *dmc,
>  	return 0;
>  }
>  
> +/**
> + * exynos5_dmc_start_perf_events() - Setup and start performance event counters
> + * @dmc:	device for which the counters are going to be checked
> + * @beg_value:	initial value for the counter
> + *
> + * Function which enables needed counters, interrupts and sets initial values
> + * then starts the counters.
> + */
> +static void exynos5_dmc_start_perf_events(struct exynos5_dmc *dmc,
> +					  u32 beg_value)
> +{
> +	/* Enable interrupts for counter 2 */
> +	writel(PERF_CNT2, dmc->base_drexi0 + DREX_INTENS_PPC);
> +	writel(PERF_CNT2, dmc->base_drexi1 + DREX_INTENS_PPC);

Blank line.

> +	/* Enable counter 2 and CCNT  */
> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_CNTENS_PPC);
> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_CNTENS_PPC);

Blank line.

> +	/* Clear overflow flag for all counters */
> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_FLAG_PPC);
> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_FLAG_PPC);

Blank line.

> +	/* Reset all counters */
> +	writel(CC_RESET | PPC_COUNTER_RESET, dmc->base_drexi0 + DREX_PMNC_PPC);
> +	writel(CC_RESET | PPC_COUNTER_RESET, dmc->base_drexi1 + DREX_PMNC_PPC);

Blank line.

> +	/*
> +	 * Set start value for the counters, the number of samples that
> +	 * will be gathered is calculated as: 0xffffffff - beg_value
> +	 */
> +	writel(beg_value, dmc->base_drexi0 + DREX_PMCNT2_PPC);
> +	writel(beg_value, dmc->base_drexi1 + DREX_PMCNT2_PPC);

Blank line.

> +	/* Start all counters */
> +	writel(PPC_ENABLE, dmc->base_drexi0 + DREX_PMNC_PPC);
> +	writel(PPC_ENABLE, dmc->base_drexi1 + DREX_PMNC_PPC);
> +}
> +
> +/**
> + * exynos5_dmc_perf_events_calc() - Calculate utilization
> + * @dmc:	device for which the counters are going to be checked
> + * @diff_ts:	time between last interrupt and current one
> + *
> + * Function which calculates needed utilization for the devfreq governor.
> + * It prepares values for 'busy_time' and 'total_time' based on elapsed time
> + * between interrupts, which approximates utilization.
> + */
> +static void exynos5_dmc_perf_events_calc(struct exynos5_dmc *dmc, u64 diff_ts)
> +{
> +	/*
> +	 * This is a simple algorithm for managing traffic on DMC.
> +	 * When there is almost no load the counters overflow every 4s,
> +	 * no mater the DMC frequency.
> +	 * The high load might be approximated using linear function.
> +	 * Knowing that, simple calculation can provide 'busy_time' and
> +	 * 'total_time' to the devfreq governor which picks up target
> +	 * frequency.
> +	 * We want a fast ramp up and slow decay in frequency change function.
> +	 */
> +	if (diff_ts < PERF_EVENT_UP_DOWN_THRESHOLD) {
> +		/*
> +		 * Set higher utilization for the simple_ondemand governor.
> +		 * The governor should increase the frequency of the DMC.
> +		 */
> +		dmc->load = 70;
> +		dmc->total = 100;
> +	} else {
> +		/*
> +		 * Set low utilization for the simple_ondemand governor.
> +		 * The governor should decrease the frequency of the DMC.
> +		 */
> +		dmc->load = 35;
> +		dmc->total = 100;
> +	}
> +
> +	dev_dbg(dmc->dev, "diff_ts=%llu\n", diff_ts);
> +}
> +
> +/**
> + * exynos5_dmc_perf_events_check() - Checks the status of the counters
> + * @dmc:	device for which the counters are going to be checked
> + *
> + * Function which is called from threaded IRQ to check the counters state
> + * and to call approximation for the needed utilization.
> + */
> +static void exynos5_dmc_perf_events_check(struct exynos5_dmc *dmc)
> +{
> +	u32 val;
> +	u64 diff_ts, ts;
> +
> +	ts = ktime_get_ns();
> +
> +	/* Stop all counters */
> +	writel(0, dmc->base_drexi0 + DREX_PMNC_PPC);
> +	writel(0, dmc->base_drexi1 + DREX_PMNC_PPC);
> +
> +	/* Check the source in interrupt flag registers (which channel) */
> +	val = readl(dmc->base_drexi0 + DREX_FLAG_PPC);
> +	if (val) {
> +		diff_ts = ts - dmc->last_overflow_ts[0];
> +		dmc->last_overflow_ts[0] = ts;
> +		dev_dbg(dmc->dev, "drex0 0xE050 val= 0x%08x\n",  val);
> +	} else {
> +		val = readl(dmc->base_drexi1 + DREX_FLAG_PPC);
> +		diff_ts = ts - dmc->last_overflow_ts[1];
> +		dmc->last_overflow_ts[1] = ts;
> +		dev_dbg(dmc->dev, "drex1 0xE050 val= 0x%08x\n",  val);
> +	}
> +
> +	exynos5_dmc_perf_events_calc(dmc, diff_ts);
> +
> +	exynos5_dmc_start_perf_events(dmc, PERF_COUNTER_START_VALUE);
> +}
> +
> +/**
> + * exynos5_dmc_enable_perf_events() - Enable performance events
> + * @dmc:	device for which the counters are going to be checked
> + *
> + * Function which is setup needed environment and enables counters.
> + */
> +static void exynos5_dmc_enable_perf_events(struct exynos5_dmc *dmc)
> +{
> +	u64 ts;
> +
> +	/* Enable Performance Event Clock */
> +	writel(PEREV_CLK_EN, dmc->base_drexi0 + DREX_PPCCLKCON);
> +	writel(PEREV_CLK_EN, dmc->base_drexi1 + DREX_PPCCLKCON);
> +
> +	/* Select read transfers as performance event2 */
> +	writel(READ_TRANSFER_CH0, dmc->base_drexi0 + DREX_PEREV2CONFIG);
> +	writel(READ_TRANSFER_CH1, dmc->base_drexi1 + DREX_PEREV2CONFIG);
> +
> +	dmc->in_irq_mode = 1;

Move this outside, to the probe. Logically it belongs there.

> +
> +	ts = ktime_get_ns();
> +	dmc->last_overflow_ts[0] = ts;
> +	dmc->last_overflow_ts[1] = ts;
> +
> +	/* Devfreq shouldn't be faster than initialization, play safe though. */
> +	dmc->load = 99;
> +	dmc->total = 100;
> +}
> +
> +/**
> + * exynos5_dmc_disable_perf_events() - Disable performance events
> + * @dmc:	device for which the counters are going to be checked
> + *
> + * Function which stops, disables performance event counters and interrupts.
> + */
> +static void exynos5_dmc_disable_perf_events(struct exynos5_dmc *dmc)
> +{
> +	/* Stop all counters */
> +	writel(0, dmc->base_drexi0 + DREX_PMNC_PPC);
> +	writel(0, dmc->base_drexi1 + DREX_PMNC_PPC);

Blank line here and later.

> +	/* Disable interrupts for counter 2 */
> +	writel(PERF_CNT2, dmc->base_drexi0 + DREX_INTENC_PPC);
> +	writel(PERF_CNT2, dmc->base_drexi1 + DREX_INTENC_PPC);
> +	/* Disable counter 2 and CCNT  */
> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_CNTENC_PPC);
> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_CNTENC_PPC);
> +	/* Clear overflow flag for all counters */
> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_FLAG_PPC);
> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_FLAG_PPC);
> +}
> +
>  /**
>   * exynos5_dmc_get_status() - Read current DMC performance statistics.
>   * @dev:	device for which the statistics are requested
> @@ -669,18 +857,24 @@ static int exynos5_dmc_get_status(struct device *dev,
>  	unsigned long load, total;
>  	int ret;
>  
> -	ret = exynos5_counters_get(dmc, &load, &total);
> -	if (ret < 0)
> -		return -EINVAL;
> +	if (dmc->in_irq_mode) {
> +		stat->current_frequency = dmc->curr_rate;
> +		stat->busy_time = dmc->load;
> +		stat->total_time = dmc->total;
> +	} else {
> +		ret = exynos5_counters_get(dmc, &load, &total);
> +		if (ret < 0)
> +			return -EINVAL;
>  
> -	/* To protect from overflow in calculation ratios, divide by 1024 */
> -	stat->busy_time = load >> 10;
> -	stat->total_time = total >> 10;
> +		/* To protect from overflow, divide by 1024 */
> +		stat->busy_time = load >> 10;
> +		stat->total_time = total >> 10;
>  
> -	ret = exynos5_counters_set_event(dmc);
> -	if (ret < 0) {
> -		dev_err(dev, "could not set event counter\n");
> -		return ret;
> +		ret = exynos5_counters_set_event(dmc);
> +		if (ret < 0) {
> +			dev_err(dev, "could not set event counter\n");
> +			return ret;
> +		}
>  	}
>  
>  	return 0;
> @@ -712,7 +906,6 @@ static int exynos5_dmc_get_cur_freq(struct device *dev, unsigned long *freq)
>   * It provides to the devfreq framework needed functions and polling period.
>   */
>  static struct devfreq_dev_profile exynos5_dmc_df_profile = {
> -	.polling_ms = 500,
>  	.target = exynos5_dmc_target,
>  	.get_dev_status = exynos5_dmc_get_status,
>  	.get_cur_freq = exynos5_dmc_get_cur_freq,
> @@ -1108,6 +1301,26 @@ static inline int exynos5_dmc_set_pause_on_switching(struct exynos5_dmc *dmc)
>  	return 0;
>  }
>  
> +static irqreturn_t dmc_irq_thread(int irq, void *priv)
> +{
> +	int res;
> +	struct exynos5_dmc *dmc = priv;
> +
> +	dev_dbg(dmc->dev, "irq thread handler\n");

Skip a debug in thread handler for memory. It can pollute your log (I
guess depending on workload).

> +
> +	mutex_lock(&dmc->df->lock);
> +
> +	exynos5_dmc_perf_events_check(dmc);
> +
> +	res = update_devfreq(dmc->df);
> +	if (res)
> +		dev_err(dmc->dev, "devfreq failed with %d\n", res);

dev_warn()

> +
> +	mutex_unlock(&dmc->df->lock);
> +
> +	return IRQ_HANDLED;
> +}
> +
>  /**
>   * exynos5_dmc_probe() - Probe function for the DMC driver
>   * @pdev:	platform device for which the driver is going to be initialized
> @@ -1125,6 +1338,7 @@ static int exynos5_dmc_probe(struct platform_device *pdev)
>  	struct device_node *np = dev->of_node;
>  	struct exynos5_dmc *dmc;
>  	struct resource *res;
> +	int irq;
>  
>  	dmc = devm_kzalloc(dev, sizeof(*dmc), GFP_KERNEL);
>  	if (!dmc)
> @@ -1172,24 +1386,48 @@ static int exynos5_dmc_probe(struct platform_device *pdev)
>  		goto remove_clocks;
>  	}
>  
> -	ret = exynos5_performance_counters_init(dmc);
> -	if (ret) {
> -		dev_warn(dev, "couldn't probe performance counters\n");
> -		goto remove_clocks;
> -	}
> -
>  	ret = exynos5_dmc_set_pause_on_switching(dmc);
>  	if (ret) {
>  		dev_warn(dev, "couldn't get access to PAUSE register\n");
>  		goto err_devfreq_add;

This is wrong now, I think.

>  	}
>  
> -	/*
> -	 * Setup default thresholds for the devfreq governor.
> -	 * The values are chosen based on experiments.
> -	 */
> -	dmc->gov_data.upthreshold = 30;
> -	dmc->gov_data.downdifferential = 5;
> +	/* There is two modes in which the driver works: polling or IRQ */
> +	irq = platform_get_irq(pdev, 0);

You need to document it in bindings.

> +	if (irq < 0) {
> +		ret = exynos5_performance_counters_init(dmc);
> +		if (ret) {
> +			dev_warn(dev, "couldn't probe performance counters\n");
> +			goto remove_clocks;

Weird, previous error jump goes to err_devfreq_add. This goes to error
label which is narrower (less to cleanup).

Best regards,
Krzysztof

> +		}
> +
> +		/*
> +		 * Setup default thresholds for the devfreq governor.
> +		 * The values are chosen based on experiments.
> +		 */
> +		dmc->gov_data.upthreshold = 30;
> +		dmc->gov_data.downdifferential = 5;
> +
> +		exynos5_dmc_df_profile.polling_ms = 500;
> +	} else {
> +		ret = devm_request_threaded_irq(dev, irq, NULL,
> +						dmc_irq_thread, IRQF_ONESHOT,
> +						dev_name(dev), dmc);
> +		if (ret) {
> +			dev_err(dev, "couldn't grab IRQ\n");
> +			goto remove_clocks;
> +		}
> +
> +		/*
> +		 * Setup default thresholds for the devfreq governor.
> +		 * The values are chosen based on experiments.
> +		 */
> +		dmc->gov_data.upthreshold = 55;
> +		dmc->gov_data.downdifferential = 5;
> +
> +		exynos5_dmc_enable_perf_events(dmc);
> +	}
> +
>  
>  	dmc->df = devm_devfreq_add_device(dev, &exynos5_dmc_df_profile,
>  					  DEVFREQ_GOV_SIMPLE_ONDEMAND,
> @@ -1200,12 +1438,18 @@ static int exynos5_dmc_probe(struct platform_device *pdev)
>  		goto err_devfreq_add;
>  	}
>  
> +	if (dmc->in_irq_mode)
> +		exynos5_dmc_start_perf_events(dmc, PERF_COUNTER_START_VALUE);
> +
>  	dev_info(dev, "DMC initialized\n");
>  
>  	return 0;
>  
>  err_devfreq_add:
> -	exynos5_counters_disable_edev(dmc);
> +	if (dmc->in_irq_mode)
> +		exynos5_dmc_disable_perf_events(dmc);
> +	else
> +		exynos5_counters_disable_edev(dmc);
>  remove_clocks:
>  	clk_disable_unprepare(dmc->mout_bpll);
>  	clk_disable_unprepare(dmc->fout_bpll);
> @@ -1225,7 +1469,10 @@ static int exynos5_dmc_remove(struct platform_device *pdev)
>  {
>  	struct exynos5_dmc *dmc = dev_get_drvdata(&pdev->dev);
>  
> -	exynos5_counters_disable_edev(dmc);
> +	if (dmc->in_irq_mode)
> +		exynos5_dmc_disable_perf_events(dmc);
> +	else
> +		exynos5_counters_disable_edev(dmc);
>  
>  	clk_disable_unprepare(dmc->mout_bpll);
>  	clk_disable_unprepare(dmc->fout_bpll);
> -- 
> 2.17.1
>
Lukasz Luba Oct. 1, 2019, 10:11 a.m. UTC | #2
Hi Krzysztof,

On 9/27/19 11:19 AM, Krzysztof Kozlowski wrote:
> On Wed, Sep 25, 2019 at 06:18:13PM +0200, Lukasz Luba wrote:
>> Introduce a new interrupt driven mechanism for managing speed of the
>> memory controller. The interrupts are generated due to performance
>> counters overflow. The performance counters might track memory reads,
>> writes, transfers, page misses, etc. In the basic algorithm tracking
>> read transfers and calculating memory pressure should be enough to
>> skip polling mode in devfreq.
>>
>> Signed-off-by: Lukasz Luba <l.luba@partner.samsung.com>
>> ---
>>   drivers/memory/samsung/exynos5422-dmc.c | 297 ++++++++++++++++++++++--
>>   1 file changed, 272 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/memory/samsung/exynos5422-dmc.c b/drivers/memory/samsung/exynos5422-dmc.c
>> index 0fe5f2186139..86e1844b97ef 100644
>> --- a/drivers/memory/samsung/exynos5422-dmc.c
>> +++ b/drivers/memory/samsung/exynos5422-dmc.c
>> @@ -8,6 +8,7 @@
>>   #include <linux/devfreq.h>
>>   #include <linux/devfreq-event.h>
>>   #include <linux/device.h>
>> +#include <linux/interrupt.h>
>>   #include <linux/io.h>
>>   #include <linux/mfd/syscon.h>
>>   #include <linux/module.h>
>> @@ -35,6 +36,29 @@
>>   #define USE_BPLL_TIMINGS			(0)
>>   #define EXYNOS5_AREF_NORMAL			(0x2e)
>>   
>> +#define DREX_PPCCLKCON		(0x0130)
>> +#define DREX_PEREV2CONFIG	(0x013c)
>> +#define DREX_PMNC_PPC		(0xE000)
>> +#define DREX_CNTENS_PPC		(0xE010)
>> +#define DREX_CNTENC_PPC		(0xE020)
>> +#define DREX_INTENS_PPC		(0xE030)
>> +#define DREX_INTENC_PPC		(0xE040)
>> +#define DREX_FLAG_PPC		(0xE050)
>> +#define DREX_PMCNT2_PPC		(0xE130)
>> +
>> +#define CC_RESET		BIT(2)
>> +#define PPC_COUNTER_RESET	BIT(1)
>> +#define PPC_ENABLE		BIT(0)
>> +#define PEREV_CLK_EN		BIT(0)
>> +#define PERF_CNT2		BIT(2)
>> +#define PERF_CCNT		BIT(31)
> 
> Describe to which registers these bitfields are applicable.
OK, I will add comment above them.
> 
>> +
>> +#define READ_TRANSFER_CH0	(0x6d)
>> +#define READ_TRANSFER_CH1	(0x6f)
> 
> The same. Otherwise they all look like some generic constants which is
> not true.
Make sense, agree. I will add comment above them.
> 
>> +
>> +#define PERF_COUNTER_START_VALUE 0xff000000
>> +#define PERF_EVENT_UP_DOWN_THRESHOLD 900000000ULL
>> +
>>   /**
>>    * struct dmc_opp_table - Operating level desciption
>>    *
>> @@ -85,6 +109,9 @@ struct exynos5_dmc {
>>   	struct clk *mout_mx_mspll_ccore_phy;
>>   	struct devfreq_event_dev **counter;
>>   	int num_counters;
>> +	u64 last_overflow_ts[2];
>> +	unsigned long load, total;
> 
> One member per line.  This decreases readability.
OK
> 
>> +	bool in_irq_mode;
>>   };
>>   
>>   #define TIMING_FIELD(t_name, t_bit_beg, t_bit_end) \
>> @@ -653,6 +680,167 @@ static int exynos5_counters_get(struct exynos5_dmc *dmc,
>>   	return 0;
>>   }
>>   
>> +/**
>> + * exynos5_dmc_start_perf_events() - Setup and start performance event counters
>> + * @dmc:	device for which the counters are going to be checked
>> + * @beg_value:	initial value for the counter
>> + *
>> + * Function which enables needed counters, interrupts and sets initial values
>> + * then starts the counters.
>> + */
>> +static void exynos5_dmc_start_perf_events(struct exynos5_dmc *dmc,
>> +					  u32 beg_value)
>> +{
>> +	/* Enable interrupts for counter 2 */
>> +	writel(PERF_CNT2, dmc->base_drexi0 + DREX_INTENS_PPC);
>> +	writel(PERF_CNT2, dmc->base_drexi1 + DREX_INTENS_PPC);
> 
> Blank line.
> 
>> +	/* Enable counter 2 and CCNT  */
>> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_CNTENS_PPC);
>> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_CNTENS_PPC);
> 
> Blank line.
> 
>> +	/* Clear overflow flag for all counters */
>> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_FLAG_PPC);
>> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_FLAG_PPC);
> 
> Blank line.
> 
>> +	/* Reset all counters */
>> +	writel(CC_RESET | PPC_COUNTER_RESET, dmc->base_drexi0 + DREX_PMNC_PPC);
>> +	writel(CC_RESET | PPC_COUNTER_RESET, dmc->base_drexi1 + DREX_PMNC_PPC);
> 
> Blank line.
> 
>> +	/*
>> +	 * Set start value for the counters, the number of samples that
>> +	 * will be gathered is calculated as: 0xffffffff - beg_value
>> +	 */
>> +	writel(beg_value, dmc->base_drexi0 + DREX_PMCNT2_PPC);
>> +	writel(beg_value, dmc->base_drexi1 + DREX_PMCNT2_PPC);
> 
> Blank line.
> 
>> +	/* Start all counters */
>> +	writel(PPC_ENABLE, dmc->base_drexi0 + DREX_PMNC_PPC);
>> +	writel(PPC_ENABLE, dmc->base_drexi1 + DREX_PMNC_PPC);
>> +}
>> +
>> +/**
>> + * exynos5_dmc_perf_events_calc() - Calculate utilization
>> + * @dmc:	device for which the counters are going to be checked
>> + * @diff_ts:	time between last interrupt and current one
>> + *
>> + * Function which calculates needed utilization for the devfreq governor.
>> + * It prepares values for 'busy_time' and 'total_time' based on elapsed time
>> + * between interrupts, which approximates utilization.
>> + */
>> +static void exynos5_dmc_perf_events_calc(struct exynos5_dmc *dmc, u64 diff_ts)
>> +{
>> +	/*
>> +	 * This is a simple algorithm for managing traffic on DMC.
>> +	 * When there is almost no load the counters overflow every 4s,
>> +	 * no mater the DMC frequency.
>> +	 * The high load might be approximated using linear function.
>> +	 * Knowing that, simple calculation can provide 'busy_time' and
>> +	 * 'total_time' to the devfreq governor which picks up target
>> +	 * frequency.
>> +	 * We want a fast ramp up and slow decay in frequency change function.
>> +	 */
>> +	if (diff_ts < PERF_EVENT_UP_DOWN_THRESHOLD) {
>> +		/*
>> +		 * Set higher utilization for the simple_ondemand governor.
>> +		 * The governor should increase the frequency of the DMC.
>> +		 */
>> +		dmc->load = 70;
>> +		dmc->total = 100;
>> +	} else {
>> +		/*
>> +		 * Set low utilization for the simple_ondemand governor.
>> +		 * The governor should decrease the frequency of the DMC.
>> +		 */
>> +		dmc->load = 35;
>> +		dmc->total = 100;
>> +	}
>> +
>> +	dev_dbg(dmc->dev, "diff_ts=%llu\n", diff_ts);
>> +}
>> +
>> +/**
>> + * exynos5_dmc_perf_events_check() - Checks the status of the counters
>> + * @dmc:	device for which the counters are going to be checked
>> + *
>> + * Function which is called from threaded IRQ to check the counters state
>> + * and to call approximation for the needed utilization.
>> + */
>> +static void exynos5_dmc_perf_events_check(struct exynos5_dmc *dmc)
>> +{
>> +	u32 val;
>> +	u64 diff_ts, ts;
>> +
>> +	ts = ktime_get_ns();
>> +
>> +	/* Stop all counters */
>> +	writel(0, dmc->base_drexi0 + DREX_PMNC_PPC);
>> +	writel(0, dmc->base_drexi1 + DREX_PMNC_PPC);
>> +
>> +	/* Check the source in interrupt flag registers (which channel) */
>> +	val = readl(dmc->base_drexi0 + DREX_FLAG_PPC);
>> +	if (val) {
>> +		diff_ts = ts - dmc->last_overflow_ts[0];
>> +		dmc->last_overflow_ts[0] = ts;
>> +		dev_dbg(dmc->dev, "drex0 0xE050 val= 0x%08x\n",  val);
>> +	} else {
>> +		val = readl(dmc->base_drexi1 + DREX_FLAG_PPC);
>> +		diff_ts = ts - dmc->last_overflow_ts[1];
>> +		dmc->last_overflow_ts[1] = ts;
>> +		dev_dbg(dmc->dev, "drex1 0xE050 val= 0x%08x\n",  val);
>> +	}
>> +
>> +	exynos5_dmc_perf_events_calc(dmc, diff_ts);
>> +
>> +	exynos5_dmc_start_perf_events(dmc, PERF_COUNTER_START_VALUE);
>> +}
>> +
>> +/**
>> + * exynos5_dmc_enable_perf_events() - Enable performance events
>> + * @dmc:	device for which the counters are going to be checked
>> + *
>> + * Function which is setup needed environment and enables counters.
>> + */
>> +static void exynos5_dmc_enable_perf_events(struct exynos5_dmc *dmc)
>> +{
>> +	u64 ts;
>> +
>> +	/* Enable Performance Event Clock */
>> +	writel(PEREV_CLK_EN, dmc->base_drexi0 + DREX_PPCCLKCON);
>> +	writel(PEREV_CLK_EN, dmc->base_drexi1 + DREX_PPCCLKCON);
>> +
>> +	/* Select read transfers as performance event2 */
>> +	writel(READ_TRANSFER_CH0, dmc->base_drexi0 + DREX_PEREV2CONFIG);
>> +	writel(READ_TRANSFER_CH1, dmc->base_drexi1 + DREX_PEREV2CONFIG);
>> +
>> +	dmc->in_irq_mode = 1;
> 
> Move this outside, to the probe. Logically it belongs there.
OK
> 
>> +
>> +	ts = ktime_get_ns();
>> +	dmc->last_overflow_ts[0] = ts;
>> +	dmc->last_overflow_ts[1] = ts;
>> +
>> +	/* Devfreq shouldn't be faster than initialization, play safe though. */
>> +	dmc->load = 99;
>> +	dmc->total = 100;
>> +}
>> +
>> +/**
>> + * exynos5_dmc_disable_perf_events() - Disable performance events
>> + * @dmc:	device for which the counters are going to be checked
>> + *
>> + * Function which stops, disables performance event counters and interrupts.
>> + */
>> +static void exynos5_dmc_disable_perf_events(struct exynos5_dmc *dmc)
>> +{
>> +	/* Stop all counters */
>> +	writel(0, dmc->base_drexi0 + DREX_PMNC_PPC);
>> +	writel(0, dmc->base_drexi1 + DREX_PMNC_PPC);
> 
> Blank line here and later.
OK - for all the 'blank line' hints.
> 
>> +	/* Disable interrupts for counter 2 */
>> +	writel(PERF_CNT2, dmc->base_drexi0 + DREX_INTENC_PPC);
>> +	writel(PERF_CNT2, dmc->base_drexi1 + DREX_INTENC_PPC);
>> +	/* Disable counter 2 and CCNT  */
>> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_CNTENC_PPC);
>> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_CNTENC_PPC);
>> +	/* Clear overflow flag for all counters */
>> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_FLAG_PPC);
>> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_FLAG_PPC);
>> +}
>> +
>>   /**
>>    * exynos5_dmc_get_status() - Read current DMC performance statistics.
>>    * @dev:	device for which the statistics are requested
>> @@ -669,18 +857,24 @@ static int exynos5_dmc_get_status(struct device *dev,
>>   	unsigned long load, total;
>>   	int ret;
>>   
>> -	ret = exynos5_counters_get(dmc, &load, &total);
>> -	if (ret < 0)
>> -		return -EINVAL;
>> +	if (dmc->in_irq_mode) {
>> +		stat->current_frequency = dmc->curr_rate;
>> +		stat->busy_time = dmc->load;
>> +		stat->total_time = dmc->total;
>> +	} else {
>> +		ret = exynos5_counters_get(dmc, &load, &total);
>> +		if (ret < 0)
>> +			return -EINVAL;
>>   
>> -	/* To protect from overflow in calculation ratios, divide by 1024 */
>> -	stat->busy_time = load >> 10;
>> -	stat->total_time = total >> 10;
>> +		/* To protect from overflow, divide by 1024 */
>> +		stat->busy_time = load >> 10;
>> +		stat->total_time = total >> 10;
>>   
>> -	ret = exynos5_counters_set_event(dmc);
>> -	if (ret < 0) {
>> -		dev_err(dev, "could not set event counter\n");
>> -		return ret;
>> +		ret = exynos5_counters_set_event(dmc);
>> +		if (ret < 0) {
>> +			dev_err(dev, "could not set event counter\n");
>> +			return ret;
>> +		}
>>   	}
>>   
>>   	return 0;
>> @@ -712,7 +906,6 @@ static int exynos5_dmc_get_cur_freq(struct device *dev, unsigned long *freq)
>>    * It provides to the devfreq framework needed functions and polling period.
>>    */
>>   static struct devfreq_dev_profile exynos5_dmc_df_profile = {
>> -	.polling_ms = 500,
>>   	.target = exynos5_dmc_target,
>>   	.get_dev_status = exynos5_dmc_get_status,
>>   	.get_cur_freq = exynos5_dmc_get_cur_freq,
>> @@ -1108,6 +1301,26 @@ static inline int exynos5_dmc_set_pause_on_switching(struct exynos5_dmc *dmc)
>>   	return 0;
>>   }
>>   
>> +static irqreturn_t dmc_irq_thread(int irq, void *priv)
>> +{
>> +	int res;
>> +	struct exynos5_dmc *dmc = priv;
>> +
>> +	dev_dbg(dmc->dev, "irq thread handler\n");
> 
> Skip a debug in thread handler for memory. It can pollute your log (I
> guess depending on workload).
OK, I will remove it.
> 
>> +
>> +	mutex_lock(&dmc->df->lock);
>> +
>> +	exynos5_dmc_perf_events_check(dmc);
>> +
>> +	res = update_devfreq(dmc->df);
>> +	if (res)
>> +		dev_err(dmc->dev, "devfreq failed with %d\n", res);
> 
> dev_warn()
OK
> 
>> +
>> +	mutex_unlock(&dmc->df->lock);
>> +
>> +	return IRQ_HANDLED;
>> +}
>> +
>>   /**
>>    * exynos5_dmc_probe() - Probe function for the DMC driver
>>    * @pdev:	platform device for which the driver is going to be initialized
>> @@ -1125,6 +1338,7 @@ static int exynos5_dmc_probe(struct platform_device *pdev)
>>   	struct device_node *np = dev->of_node;
>>   	struct exynos5_dmc *dmc;
>>   	struct resource *res;
>> +	int irq;
>>   
>>   	dmc = devm_kzalloc(dev, sizeof(*dmc), GFP_KERNEL);
>>   	if (!dmc)
>> @@ -1172,24 +1386,48 @@ static int exynos5_dmc_probe(struct platform_device *pdev)
>>   		goto remove_clocks;
>>   	}
>>   
>> -	ret = exynos5_performance_counters_init(dmc);
>> -	if (ret) {
>> -		dev_warn(dev, "couldn't probe performance counters\n");
>> -		goto remove_clocks;
>> -	}
>> -
>>   	ret = exynos5_dmc_set_pause_on_switching(dmc);
>>   	if (ret) {
>>   		dev_warn(dev, "couldn't get access to PAUSE register\n");
>>   		goto err_devfreq_add;
> 
> This is wrong now, I think.
Good point, should be 'goto remove_clocks'.
> 
>>   	}
>>   
>> -	/*
>> -	 * Setup default thresholds for the devfreq governor.
>> -	 * The values are chosen based on experiments.
>> -	 */
>> -	dmc->gov_data.upthreshold = 30;
>> -	dmc->gov_data.downdifferential = 5;
>> +	/* There is two modes in which the driver works: polling or IRQ */
>> +	irq = platform_get_irq(pdev, 0);
> 
> You need to document it in bindings.
OK
> 
>> +	if (irq < 0) {
>> +		ret = exynos5_performance_counters_init(dmc);
>> +		if (ret) {
>> +			dev_warn(dev, "couldn't probe performance counters\n");
>> +			goto remove_clocks;
> 
> Weird, previous error jump goes to err_devfreq_add. This goes to error
> label which is narrower (less to cleanup).

Right.

Thank you for the review. I will address the issues in the next version.

Regards,
Lukasz
diff mbox series

Patch

diff --git a/drivers/memory/samsung/exynos5422-dmc.c b/drivers/memory/samsung/exynos5422-dmc.c
index 0fe5f2186139..86e1844b97ef 100644
--- a/drivers/memory/samsung/exynos5422-dmc.c
+++ b/drivers/memory/samsung/exynos5422-dmc.c
@@ -8,6 +8,7 @@ 
 #include <linux/devfreq.h>
 #include <linux/devfreq-event.h>
 #include <linux/device.h>
+#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
@@ -35,6 +36,29 @@ 
 #define USE_BPLL_TIMINGS			(0)
 #define EXYNOS5_AREF_NORMAL			(0x2e)
 
+#define DREX_PPCCLKCON		(0x0130)
+#define DREX_PEREV2CONFIG	(0x013c)
+#define DREX_PMNC_PPC		(0xE000)
+#define DREX_CNTENS_PPC		(0xE010)
+#define DREX_CNTENC_PPC		(0xE020)
+#define DREX_INTENS_PPC		(0xE030)
+#define DREX_INTENC_PPC		(0xE040)
+#define DREX_FLAG_PPC		(0xE050)
+#define DREX_PMCNT2_PPC		(0xE130)
+
+#define CC_RESET		BIT(2)
+#define PPC_COUNTER_RESET	BIT(1)
+#define PPC_ENABLE		BIT(0)
+#define PEREV_CLK_EN		BIT(0)
+#define PERF_CNT2		BIT(2)
+#define PERF_CCNT		BIT(31)
+
+#define READ_TRANSFER_CH0	(0x6d)
+#define READ_TRANSFER_CH1	(0x6f)
+
+#define PERF_COUNTER_START_VALUE 0xff000000
+#define PERF_EVENT_UP_DOWN_THRESHOLD 900000000ULL
+
 /**
  * struct dmc_opp_table - Operating level desciption
  *
@@ -85,6 +109,9 @@  struct exynos5_dmc {
 	struct clk *mout_mx_mspll_ccore_phy;
 	struct devfreq_event_dev **counter;
 	int num_counters;
+	u64 last_overflow_ts[2];
+	unsigned long load, total;
+	bool in_irq_mode;
 };
 
 #define TIMING_FIELD(t_name, t_bit_beg, t_bit_end) \
@@ -653,6 +680,167 @@  static int exynos5_counters_get(struct exynos5_dmc *dmc,
 	return 0;
 }
 
+/**
+ * exynos5_dmc_start_perf_events() - Setup and start performance event counters
+ * @dmc:	device for which the counters are going to be checked
+ * @beg_value:	initial value for the counter
+ *
+ * Function which enables needed counters, interrupts and sets initial values
+ * then starts the counters.
+ */
+static void exynos5_dmc_start_perf_events(struct exynos5_dmc *dmc,
+					  u32 beg_value)
+{
+	/* Enable interrupts for counter 2 */
+	writel(PERF_CNT2, dmc->base_drexi0 + DREX_INTENS_PPC);
+	writel(PERF_CNT2, dmc->base_drexi1 + DREX_INTENS_PPC);
+	/* Enable counter 2 and CCNT  */
+	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_CNTENS_PPC);
+	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_CNTENS_PPC);
+	/* Clear overflow flag for all counters */
+	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_FLAG_PPC);
+	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_FLAG_PPC);
+	/* Reset all counters */
+	writel(CC_RESET | PPC_COUNTER_RESET, dmc->base_drexi0 + DREX_PMNC_PPC);
+	writel(CC_RESET | PPC_COUNTER_RESET, dmc->base_drexi1 + DREX_PMNC_PPC);
+	/*
+	 * Set start value for the counters, the number of samples that
+	 * will be gathered is calculated as: 0xffffffff - beg_value
+	 */
+	writel(beg_value, dmc->base_drexi0 + DREX_PMCNT2_PPC);
+	writel(beg_value, dmc->base_drexi1 + DREX_PMCNT2_PPC);
+	/* Start all counters */
+	writel(PPC_ENABLE, dmc->base_drexi0 + DREX_PMNC_PPC);
+	writel(PPC_ENABLE, dmc->base_drexi1 + DREX_PMNC_PPC);
+}
+
+/**
+ * exynos5_dmc_perf_events_calc() - Calculate utilization
+ * @dmc:	device for which the counters are going to be checked
+ * @diff_ts:	time between last interrupt and current one
+ *
+ * Function which calculates needed utilization for the devfreq governor.
+ * It prepares values for 'busy_time' and 'total_time' based on elapsed time
+ * between interrupts, which approximates utilization.
+ */
+static void exynos5_dmc_perf_events_calc(struct exynos5_dmc *dmc, u64 diff_ts)
+{
+	/*
+	 * This is a simple algorithm for managing traffic on DMC.
+	 * When there is almost no load the counters overflow every 4s,
+	 * no mater the DMC frequency.
+	 * The high load might be approximated using linear function.
+	 * Knowing that, simple calculation can provide 'busy_time' and
+	 * 'total_time' to the devfreq governor which picks up target
+	 * frequency.
+	 * We want a fast ramp up and slow decay in frequency change function.
+	 */
+	if (diff_ts < PERF_EVENT_UP_DOWN_THRESHOLD) {
+		/*
+		 * Set higher utilization for the simple_ondemand governor.
+		 * The governor should increase the frequency of the DMC.
+		 */
+		dmc->load = 70;
+		dmc->total = 100;
+	} else {
+		/*
+		 * Set low utilization for the simple_ondemand governor.
+		 * The governor should decrease the frequency of the DMC.
+		 */
+		dmc->load = 35;
+		dmc->total = 100;
+	}
+
+	dev_dbg(dmc->dev, "diff_ts=%llu\n", diff_ts);
+}
+
+/**
+ * exynos5_dmc_perf_events_check() - Checks the status of the counters
+ * @dmc:	device for which the counters are going to be checked
+ *
+ * Function which is called from threaded IRQ to check the counters state
+ * and to call approximation for the needed utilization.
+ */
+static void exynos5_dmc_perf_events_check(struct exynos5_dmc *dmc)
+{
+	u32 val;
+	u64 diff_ts, ts;
+
+	ts = ktime_get_ns();
+
+	/* Stop all counters */
+	writel(0, dmc->base_drexi0 + DREX_PMNC_PPC);
+	writel(0, dmc->base_drexi1 + DREX_PMNC_PPC);
+
+	/* Check the source in interrupt flag registers (which channel) */
+	val = readl(dmc->base_drexi0 + DREX_FLAG_PPC);
+	if (val) {
+		diff_ts = ts - dmc->last_overflow_ts[0];
+		dmc->last_overflow_ts[0] = ts;
+		dev_dbg(dmc->dev, "drex0 0xE050 val= 0x%08x\n",  val);
+	} else {
+		val = readl(dmc->base_drexi1 + DREX_FLAG_PPC);
+		diff_ts = ts - dmc->last_overflow_ts[1];
+		dmc->last_overflow_ts[1] = ts;
+		dev_dbg(dmc->dev, "drex1 0xE050 val= 0x%08x\n",  val);
+	}
+
+	exynos5_dmc_perf_events_calc(dmc, diff_ts);
+
+	exynos5_dmc_start_perf_events(dmc, PERF_COUNTER_START_VALUE);
+}
+
+/**
+ * exynos5_dmc_enable_perf_events() - Enable performance events
+ * @dmc:	device for which the counters are going to be checked
+ *
+ * Function which is setup needed environment and enables counters.
+ */
+static void exynos5_dmc_enable_perf_events(struct exynos5_dmc *dmc)
+{
+	u64 ts;
+
+	/* Enable Performance Event Clock */
+	writel(PEREV_CLK_EN, dmc->base_drexi0 + DREX_PPCCLKCON);
+	writel(PEREV_CLK_EN, dmc->base_drexi1 + DREX_PPCCLKCON);
+
+	/* Select read transfers as performance event2 */
+	writel(READ_TRANSFER_CH0, dmc->base_drexi0 + DREX_PEREV2CONFIG);
+	writel(READ_TRANSFER_CH1, dmc->base_drexi1 + DREX_PEREV2CONFIG);
+
+	dmc->in_irq_mode = 1;
+
+	ts = ktime_get_ns();
+	dmc->last_overflow_ts[0] = ts;
+	dmc->last_overflow_ts[1] = ts;
+
+	/* Devfreq shouldn't be faster than initialization, play safe though. */
+	dmc->load = 99;
+	dmc->total = 100;
+}
+
+/**
+ * exynos5_dmc_disable_perf_events() - Disable performance events
+ * @dmc:	device for which the counters are going to be checked
+ *
+ * Function which stops, disables performance event counters and interrupts.
+ */
+static void exynos5_dmc_disable_perf_events(struct exynos5_dmc *dmc)
+{
+	/* Stop all counters */
+	writel(0, dmc->base_drexi0 + DREX_PMNC_PPC);
+	writel(0, dmc->base_drexi1 + DREX_PMNC_PPC);
+	/* Disable interrupts for counter 2 */
+	writel(PERF_CNT2, dmc->base_drexi0 + DREX_INTENC_PPC);
+	writel(PERF_CNT2, dmc->base_drexi1 + DREX_INTENC_PPC);
+	/* Disable counter 2 and CCNT  */
+	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_CNTENC_PPC);
+	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_CNTENC_PPC);
+	/* Clear overflow flag for all counters */
+	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_FLAG_PPC);
+	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_FLAG_PPC);
+}
+
 /**
  * exynos5_dmc_get_status() - Read current DMC performance statistics.
  * @dev:	device for which the statistics are requested
@@ -669,18 +857,24 @@  static int exynos5_dmc_get_status(struct device *dev,
 	unsigned long load, total;
 	int ret;
 
-	ret = exynos5_counters_get(dmc, &load, &total);
-	if (ret < 0)
-		return -EINVAL;
+	if (dmc->in_irq_mode) {
+		stat->current_frequency = dmc->curr_rate;
+		stat->busy_time = dmc->load;
+		stat->total_time = dmc->total;
+	} else {
+		ret = exynos5_counters_get(dmc, &load, &total);
+		if (ret < 0)
+			return -EINVAL;
 
-	/* To protect from overflow in calculation ratios, divide by 1024 */
-	stat->busy_time = load >> 10;
-	stat->total_time = total >> 10;
+		/* To protect from overflow, divide by 1024 */
+		stat->busy_time = load >> 10;
+		stat->total_time = total >> 10;
 
-	ret = exynos5_counters_set_event(dmc);
-	if (ret < 0) {
-		dev_err(dev, "could not set event counter\n");
-		return ret;
+		ret = exynos5_counters_set_event(dmc);
+		if (ret < 0) {
+			dev_err(dev, "could not set event counter\n");
+			return ret;
+		}
 	}
 
 	return 0;
@@ -712,7 +906,6 @@  static int exynos5_dmc_get_cur_freq(struct device *dev, unsigned long *freq)
  * It provides to the devfreq framework needed functions and polling period.
  */
 static struct devfreq_dev_profile exynos5_dmc_df_profile = {
-	.polling_ms = 500,
 	.target = exynos5_dmc_target,
 	.get_dev_status = exynos5_dmc_get_status,
 	.get_cur_freq = exynos5_dmc_get_cur_freq,
@@ -1108,6 +1301,26 @@  static inline int exynos5_dmc_set_pause_on_switching(struct exynos5_dmc *dmc)
 	return 0;
 }
 
+static irqreturn_t dmc_irq_thread(int irq, void *priv)
+{
+	int res;
+	struct exynos5_dmc *dmc = priv;
+
+	dev_dbg(dmc->dev, "irq thread handler\n");
+
+	mutex_lock(&dmc->df->lock);
+
+	exynos5_dmc_perf_events_check(dmc);
+
+	res = update_devfreq(dmc->df);
+	if (res)
+		dev_err(dmc->dev, "devfreq failed with %d\n", res);
+
+	mutex_unlock(&dmc->df->lock);
+
+	return IRQ_HANDLED;
+}
+
 /**
  * exynos5_dmc_probe() - Probe function for the DMC driver
  * @pdev:	platform device for which the driver is going to be initialized
@@ -1125,6 +1338,7 @@  static int exynos5_dmc_probe(struct platform_device *pdev)
 	struct device_node *np = dev->of_node;
 	struct exynos5_dmc *dmc;
 	struct resource *res;
+	int irq;
 
 	dmc = devm_kzalloc(dev, sizeof(*dmc), GFP_KERNEL);
 	if (!dmc)
@@ -1172,24 +1386,48 @@  static int exynos5_dmc_probe(struct platform_device *pdev)
 		goto remove_clocks;
 	}
 
-	ret = exynos5_performance_counters_init(dmc);
-	if (ret) {
-		dev_warn(dev, "couldn't probe performance counters\n");
-		goto remove_clocks;
-	}
-
 	ret = exynos5_dmc_set_pause_on_switching(dmc);
 	if (ret) {
 		dev_warn(dev, "couldn't get access to PAUSE register\n");
 		goto err_devfreq_add;
 	}
 
-	/*
-	 * Setup default thresholds for the devfreq governor.
-	 * The values are chosen based on experiments.
-	 */
-	dmc->gov_data.upthreshold = 30;
-	dmc->gov_data.downdifferential = 5;
+	/* There is two modes in which the driver works: polling or IRQ */
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		ret = exynos5_performance_counters_init(dmc);
+		if (ret) {
+			dev_warn(dev, "couldn't probe performance counters\n");
+			goto remove_clocks;
+		}
+
+		/*
+		 * Setup default thresholds for the devfreq governor.
+		 * The values are chosen based on experiments.
+		 */
+		dmc->gov_data.upthreshold = 30;
+		dmc->gov_data.downdifferential = 5;
+
+		exynos5_dmc_df_profile.polling_ms = 500;
+	} else {
+		ret = devm_request_threaded_irq(dev, irq, NULL,
+						dmc_irq_thread, IRQF_ONESHOT,
+						dev_name(dev), dmc);
+		if (ret) {
+			dev_err(dev, "couldn't grab IRQ\n");
+			goto remove_clocks;
+		}
+
+		/*
+		 * Setup default thresholds for the devfreq governor.
+		 * The values are chosen based on experiments.
+		 */
+		dmc->gov_data.upthreshold = 55;
+		dmc->gov_data.downdifferential = 5;
+
+		exynos5_dmc_enable_perf_events(dmc);
+	}
+
 
 	dmc->df = devm_devfreq_add_device(dev, &exynos5_dmc_df_profile,
 					  DEVFREQ_GOV_SIMPLE_ONDEMAND,
@@ -1200,12 +1438,18 @@  static int exynos5_dmc_probe(struct platform_device *pdev)
 		goto err_devfreq_add;
 	}
 
+	if (dmc->in_irq_mode)
+		exynos5_dmc_start_perf_events(dmc, PERF_COUNTER_START_VALUE);
+
 	dev_info(dev, "DMC initialized\n");
 
 	return 0;
 
 err_devfreq_add:
-	exynos5_counters_disable_edev(dmc);
+	if (dmc->in_irq_mode)
+		exynos5_dmc_disable_perf_events(dmc);
+	else
+		exynos5_counters_disable_edev(dmc);
 remove_clocks:
 	clk_disable_unprepare(dmc->mout_bpll);
 	clk_disable_unprepare(dmc->fout_bpll);
@@ -1225,7 +1469,10 @@  static int exynos5_dmc_remove(struct platform_device *pdev)
 {
 	struct exynos5_dmc *dmc = dev_get_drvdata(&pdev->dev);
 
-	exynos5_counters_disable_edev(dmc);
+	if (dmc->in_irq_mode)
+		exynos5_dmc_disable_perf_events(dmc);
+	else
+		exynos5_counters_disable_edev(dmc);
 
 	clk_disable_unprepare(dmc->mout_bpll);
 	clk_disable_unprepare(dmc->fout_bpll);