diff mbox series

[2/2] media: pwm-ir-tx: trigger edges from hrtimer interrupt context

Message ID 7efe4229514001b835fa70d51973cd3306dc0b04.1696156485.git.sean@mess.org (mailing list archive)
State New, archived
Headers show
Series None | expand

Commit Message

Sean Young Oct. 1, 2023, 10:40 a.m. UTC
The pwm-ir-tx driver has to turn the pwm signal on and off, and suffers
from delays as this is done in process context. Make this work in atomic
context.

This makes the driver much more precise.

Signed-off-by: Sean Young <sean@mess.org>
Cc: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
---
 drivers/media/rc/pwm-ir-tx.c | 79 ++++++++++++++++++++++++++++--------
 1 file changed, 63 insertions(+), 16 deletions(-)

Comments

Ivaylo Dimitrov Oct. 2, 2023, 5:49 a.m. UTC | #1
Hi,

On 1.10.23 г. 13:40 ч., Sean Young wrote:
> The pwm-ir-tx driver has to turn the pwm signal on and off, and suffers
> from delays as this is done in process context. Make this work in atomic
> context.
> 
> This makes the driver much more precise.
> 
> Signed-off-by: Sean Young <sean@mess.org>
> Cc: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
> ---
>   drivers/media/rc/pwm-ir-tx.c | 79 ++++++++++++++++++++++++++++--------
>   1 file changed, 63 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/media/rc/pwm-ir-tx.c b/drivers/media/rc/pwm-ir-tx.c
> index c5f37c03af9c..557725a07a67 100644
> --- a/drivers/media/rc/pwm-ir-tx.c
> +++ b/drivers/media/rc/pwm-ir-tx.c
> @@ -10,6 +10,8 @@
>   #include <linux/slab.h>
>   #include <linux/of.h>
>   #include <linux/platform_device.h>
> +#include <linux/hrtimer.h>
> +#include <linux/completion.h>
>   #include <media/rc-core.h>
>   
>   #define DRIVER_NAME	"pwm-ir-tx"
> @@ -17,8 +19,13 @@
>   
>   struct pwm_ir {
>   	struct pwm_device *pwm;
> -	unsigned int carrier;
> -	unsigned int duty_cycle;
> +	struct hrtimer timer;
> +	struct completion completion;
> +	uint carrier;
> +	uint duty_cycle;
> +	uint *txbuf;
> +	uint txbuf_len;
> +	uint txbuf_index;
>   };
>   
>   static const struct of_device_id pwm_ir_of_match[] = {
> @@ -55,33 +62,65 @@ static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
>   	struct pwm_ir *pwm_ir = dev->priv;
>   	struct pwm_device *pwm = pwm_ir->pwm;
>   	struct pwm_state state;
> -	int i;
> -	ktime_t edge;
> -	long delta;
> +
> +	reinit_completion(&pwm_ir->completion);

You should not need that.

>   
>   	pwm_init_state(pwm, &state);
>   
>   	state.period = DIV_ROUND_CLOSEST(NSEC_PER_SEC, pwm_ir->carrier);
>   	pwm_set_relative_duty_cycle(&state, pwm_ir->duty_cycle, 100);
> +	state.enabled = false;
>   
> -	edge = ktime_get();
> +	pwm_ir->txbuf = txbuf;
> +	pwm_ir->txbuf_len = count;
> +	pwm_ir->txbuf_index = 0;
>   
> -	for (i = 0; i < count; i++) {
> -		state.enabled = !(i % 2);
> -		pwm_apply_state(pwm, &state);
> +	pwm_apply_state(pwm, &state);
>   

ditto, first pwm control should be in the timer function

> -		edge = ktime_add_us(edge, txbuf[i]);
> -		delta = ktime_us_delta(edge, ktime_get());
> -		if (delta > 0)
> -			usleep_range(delta, delta + 10);
> -	}
> +	hrtimer_start(&pwm_ir->timer, 1000, HRTIMER_MODE_REL);
>   

why not just call it with 0 time?

> -	state.enabled = false;
> -	pwm_apply_state(pwm, &state);
> +	wait_for_completion(&pwm_ir->completion);
>   
>   	return count;
>   }
>   
> +static enum hrtimer_restart pwm_ir_timer(struct hrtimer *timer)
> +{
> +	struct pwm_ir *pwm_ir = container_of(timer, struct pwm_ir, timer);
> +	ktime_t now;
> +
> +	/*
> +	 * If we happen to hit an odd latency spike, loop through the
> +	 * pulses until we catch up.
> +	 */
> +	do {
> +		u64 ns;
> +
> +		if (pwm_ir->txbuf_index >= pwm_ir->txbuf_len) {
> +			/* Stop TX here */
> +			pwm_disable(pwm_ir->pwm);
> +
> +			complete(&pwm_ir->completion);
> +
> +			return HRTIMER_NORESTART;
> +		}
> +
> +		if (pwm_ir->txbuf_index % 2)
> +			pwm_disable(pwm_ir->pwm);
> +		else
> +			pwm_enable(pwm_ir->pwm);
> +

pwm_ir->pwm->state.enabled = !(pwm_ir->txbuf_index % 2);
pwm_apply_state(pwm_ir->pwm, pwm_ir->state);

> +		ns = US_TO_NS(pwm_ir->txbuf[pwm_ir->txbuf_index]);
> +		hrtimer_add_expires_ns(timer, ns);
> +
> +		pwm_ir->txbuf_index++;
> +
> +		now = timer->base->get_time();
> +	} while (hrtimer_get_expires_tv64(timer) < now);
> +
> +	return HRTIMER_RESTART;
> +}
> +
>   static int pwm_ir_probe(struct platform_device *pdev)
>   {
>   	struct pwm_ir *pwm_ir;
> @@ -96,8 +135,16 @@ static int pwm_ir_probe(struct platform_device *pdev)
>   	if (IS_ERR(pwm_ir->pwm))
>   		return PTR_ERR(pwm_ir->pwm);
>   
> +	if (pwm_can_sleep(pwm_ir->pwm)) {
> +		dev_err(&pdev->dev, "unsupported pwm device: driver can sleep\n");
> +		return -ENODEV;
> +	}
> +

I think we shall not limit, but use high priority thread to support 
those drivers. I have that working on n900 with current (sleeping) pwm, 
see my reply on the other mail. Maybe we can combine both patches in a 
way to support both atomic and sleeping pwm drivers.

>   	pwm_ir->carrier = 38000;
>   	pwm_ir->duty_cycle = 50;
> +	init_completion(&pwm_ir->completion);
> +	hrtimer_init(&pwm_ir->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> +	pwm_ir->timer.function = pwm_ir_timer;
>   
>   	rcdev = devm_rc_allocate_device(&pdev->dev, RC_DRIVER_IR_RAW_TX);
>   	if (!rcdev)
> 

Regards,
Ivo
Ivaylo Dimitrov Oct. 2, 2023, 6:16 a.m. UTC | #2
Hi,

On 1.10.23 г. 13:40 ч., Sean Young wrote:
> The pwm-ir-tx driver has to turn the pwm signal on and off, and suffers
> from delays as this is done in process context. Make this work in atomic
> context.
> 
> This makes the driver much more precise.
> 
> Signed-off-by: Sean Young <sean@mess.org>
> Cc: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
> ---
>   drivers/media/rc/pwm-ir-tx.c | 79 ++++++++++++++++++++++++++++--------
>   1 file changed, 63 insertions(+), 16 deletions(-)
> 

what about the following patch(not a proper one, just RFC)? It achieves 
the same (if not better) precision (on n900 at least) without using 
atomic pwm. What it does is: create a fifo thread in which we swicth pwm 
on/off, start hrtimer that is used to signal thread when to switch pwm.
As signal comes earlier than needed(because hrtimer runs async to the 
thread), we do a busy loop wait for the precise time to switch the pwm. 
At least on n900, this busy loop is less than 200 us per switch(worst 
case, usually it is less than 100 us). That way, even if we have some 
latency spike, it is covered by not doing busy loop for that particular 
pwm switch and we keep the precise timing.

Maybe we shall merge both patches so fifo thread to be used for sleeping 
pwms and hrtimer for atomic. I can do that and test it here if you think 
that approach makes sense.

Regards,
Ivo

PS: I have a patch that converts timer-ti-dm to non-sleeping one, will 
send it when it comes to it.

diff --git a/drivers/media/rc/pwm-ir-tx.c b/drivers/media/rc/pwm-ir-tx.c
index 105a9c24f1e3..e8b620f53056 100644
--- a/drivers/media/rc/pwm-ir-tx.c
+++ b/drivers/media/rc/pwm-ir-tx.c
@@ -4,6 +4,7 @@
   */

  #include <linux/kernel.h>
+#include <linux/kthread.h>
  #include <linux/module.h>
  #include <linux/pwm.h>
  #include <linux/delay.h>
@@ -17,8 +18,16 @@

  struct pwm_ir {
  	struct pwm_device *pwm;
+	struct hrtimer timer;
+	struct task_struct *tx_thread;
+	wait_queue_head_t tx_wq;
+	struct completion tx_done;
+	struct completion edge;
  	unsigned int carrier;
  	unsigned int duty_cycle;
+	unsigned int *txbuf;
+	unsigned int count;
+	unsigned int index;
  };

  static const struct of_device_id pwm_ir_of_match[] = {
@@ -48,35 +57,103 @@ static int pwm_ir_set_carrier(struct rc_dev *dev, 
u32 carrier)
  	return 0;
  }

-static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
-		     unsigned int count)
+static enum hrtimer_restart pwm_ir_timer_cb(struct hrtimer *timer)
+{
+	struct pwm_ir *pwm_ir = container_of(timer, struct pwm_ir, timer);
+	ktime_t now;
+
+	/*
+	 * If we happen to hit an odd latency spike, loop through the
+	 * pulses until we catch up.
+	 */
+	do {
+		u64 edge;
+
+		if (pwm_ir->index >= pwm_ir->count)
+			goto out;
+
+		complete(&pwm_ir->edge);
+
+		edge = US_TO_NS(pwm_ir->txbuf[pwm_ir->index]);
+		hrtimer_add_expires_ns(timer, edge);
+
+		pwm_ir->index++;
+
+		now = timer->base->get_time();
+
+	} while (hrtimer_get_expires_tv64(timer) < now);
+
+	return HRTIMER_RESTART;
+out:
+	complete(&pwm_ir->edge);
+
+	return HRTIMER_NORESTART;
+}
+
+static void _pwm_ir_tx(struct pwm_ir *pwm_ir)
  {
-	struct pwm_ir *pwm_ir = dev->priv;
-	struct pwm_device *pwm = pwm_ir->pwm;
  	struct pwm_state state;
  	int i;
  	ktime_t edge;
  	long delta;

-	pwm_init_state(pwm, &state);
+	pwm_init_state(pwm_ir->pwm, &state);

  	state.period = DIV_ROUND_CLOSEST(NSEC_PER_SEC, pwm_ir->carrier);
  	pwm_set_relative_duty_cycle(&state, pwm_ir->duty_cycle, 100);

+	hrtimer_start(&pwm_ir->timer, 0, HRTIMER_MODE_REL);
+	wait_for_completion(&pwm_ir->edge);
  	edge = ktime_get();

-	for (i = 0; i < count; i++) {
+	for (i = 0; i < pwm_ir->count; i++) {
  		state.enabled = !(i % 2);
-		pwm_apply_state(pwm, &state);
+		pwm_apply_state(pwm_ir->pwm, &state);
+
+		edge = ktime_add_us(edge, pwm_ir->txbuf[i]);
+		wait_for_completion(&pwm_ir->edge);

-		edge = ktime_add_us(edge, txbuf[i]);
  		delta = ktime_us_delta(edge, ktime_get());
+
  		if (delta > 0)
-			usleep_range(delta, delta + 10);
+			udelay(delta);
  	}

  	state.enabled = false;
-	pwm_apply_state(pwm, &state);
+	pwm_apply_state(pwm_ir->pwm, &state);
+
+	pwm_ir->count = 0;
+}
+
+static int pwm_ir_thread(void *data)
+{
+	struct pwm_ir *pwm_ir = data;
+
+	for (;;) {
+		wait_event_idle(pwm_ir->tx_wq,
+				kthread_should_stop() || pwm_ir->count);
+
+		if (kthread_should_stop())
+			break;
+
+		_pwm_ir_tx(pwm_ir);
+		complete(&pwm_ir->tx_done);
+	}
+
+	return 0;
+}
+
+static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
+		     unsigned int count)
+{
+	struct pwm_ir *pwm_ir = dev->priv;
+
+	pwm_ir->txbuf = txbuf;
+	pwm_ir->count = count;
+	pwm_ir->index = 0;
+
+	wake_up(&pwm_ir->tx_wq);
+	wait_for_completion(&pwm_ir->tx_done);

  	return count;
  }
@@ -91,12 +168,24 @@ static int pwm_ir_probe(struct platform_device *pdev)
  	if (!pwm_ir)
  		return -ENOMEM;

+	platform_set_drvdata(pdev, pwm_ir);
+
+	pwm_ir->count = 0;
+
  	pwm_ir->pwm = devm_pwm_get(&pdev->dev, NULL);
  	if (IS_ERR(pwm_ir->pwm))
  		return PTR_ERR(pwm_ir->pwm);

-	pwm_ir->carrier = 38000;
+	init_waitqueue_head(&pwm_ir->tx_wq);
+	init_completion(&pwm_ir->edge);
+	init_completion(&pwm_ir->tx_done);
+
+	hrtimer_init(&pwm_ir->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	pwm_ir->timer.function = pwm_ir_timer_cb;
+
  	pwm_ir->duty_cycle = 50;
+	pwm_ir->carrier = DIV_ROUND_CLOSEST_ULL(pwm_get_period(pwm_ir->pwm),
+						NSEC_PER_SEC);

  	rcdev = devm_rc_allocate_device(&pdev->dev, RC_DRIVER_IR_RAW_TX);
  	if (!rcdev)
@@ -109,15 +198,38 @@ static int pwm_ir_probe(struct platform_device *pdev)
  	rcdev->s_tx_duty_cycle = pwm_ir_set_duty_cycle;
  	rcdev->s_tx_carrier = pwm_ir_set_carrier;

+	pwm_ir->tx_thread = kthread_create(pwm_ir_thread, pwm_ir, "%s/tx",
+					   dev_name(&pdev->dev));
+	if (IS_ERR(pwm_ir->tx_thread))
+		return PTR_ERR(pwm_ir->tx_thread);
+
+	sched_set_fifo(pwm_ir->tx_thread);
+	wake_up_process(pwm_ir->tx_thread);
+
  	rc = devm_rc_register_device(&pdev->dev, rcdev);
-	if (rc < 0)
+	if (rc < 0) {
+		kthread_stop(pwm_ir->tx_thread);
  		dev_err(&pdev->dev, "failed to register rc device\n");
+	}

  	return rc;
  }

+static int pwm_ir_remove(struct platform_device *pdev)
+{
+	struct pwm_ir *pwm_ir = platform_get_drvdata(pdev);
+
+	if (pwm_ir->tx_thread) {
+		kthread_stop(pwm_ir->tx_thread);
+		pwm_ir->tx_thread = NULL;
+	}
+
+	return 0;
+}
+
  static struct platform_driver pwm_ir_driver = {
  	.probe = pwm_ir_probe,
+	.remove = pwm_ir_remove,
  	.driver = {
  		.name	= DRIVER_NAME,
  		.of_match_table = of_match_ptr(pwm_ir_of_match),
Sean Young Oct. 2, 2023, 8:20 a.m. UTC | #3
Hi,

On Mon, Oct 02, 2023 at 08:49:47AM +0300, Ivaylo Dimitrov wrote:
> On 1.10.23 г. 13:40 ч., Sean Young wrote:
> > The pwm-ir-tx driver has to turn the pwm signal on and off, and suffers
> > from delays as this is done in process context. Make this work in atomic
> > context.
> > 
> > This makes the driver much more precise.
> > 
> > Signed-off-by: Sean Young <sean@mess.org>
> > Cc: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
> > ---
> >   drivers/media/rc/pwm-ir-tx.c | 79 ++++++++++++++++++++++++++++--------
> >   1 file changed, 63 insertions(+), 16 deletions(-)
> > 
> > diff --git a/drivers/media/rc/pwm-ir-tx.c b/drivers/media/rc/pwm-ir-tx.c
> > index c5f37c03af9c..557725a07a67 100644
> > --- a/drivers/media/rc/pwm-ir-tx.c
> > +++ b/drivers/media/rc/pwm-ir-tx.c
> > @@ -10,6 +10,8 @@
> >   #include <linux/slab.h>
> >   #include <linux/of.h>
> >   #include <linux/platform_device.h>
> > +#include <linux/hrtimer.h>
> > +#include <linux/completion.h>
> >   #include <media/rc-core.h>
> >   #define DRIVER_NAME	"pwm-ir-tx"
> > @@ -17,8 +19,13 @@
> >   struct pwm_ir {
> >   	struct pwm_device *pwm;
> > -	unsigned int carrier;
> > -	unsigned int duty_cycle;
> > +	struct hrtimer timer;
> > +	struct completion completion;
> > +	uint carrier;
> > +	uint duty_cycle;
> > +	uint *txbuf;
> > +	uint txbuf_len;
> > +	uint txbuf_index;
> >   };
> >   static const struct of_device_id pwm_ir_of_match[] = {
> > @@ -55,33 +62,65 @@ static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
> >   	struct pwm_ir *pwm_ir = dev->priv;
> >   	struct pwm_device *pwm = pwm_ir->pwm;
> >   	struct pwm_state state;
> > -	int i;
> > -	ktime_t edge;
> > -	long delta;
> > +
> > +	reinit_completion(&pwm_ir->completion);
> 
> You should not need that.

It does not work without it - the process doing the 2nd tx hangs indefinitely.

> >   	pwm_init_state(pwm, &state);
> >   	state.period = DIV_ROUND_CLOSEST(NSEC_PER_SEC, pwm_ir->carrier);
> >   	pwm_set_relative_duty_cycle(&state, pwm_ir->duty_cycle, 100);
> > +	state.enabled = false;
> > -	edge = ktime_get();
> > +	pwm_ir->txbuf = txbuf;
> > +	pwm_ir->txbuf_len = count;
> > +	pwm_ir->txbuf_index = 0;
> > -	for (i = 0; i < count; i++) {
> > -		state.enabled = !(i % 2);
> > -		pwm_apply_state(pwm, &state);
> > +	pwm_apply_state(pwm, &state);
> 
> ditto, first pwm control should be in the timer function

This requires keeping a copy of pwm_state in pwm_ir but does avoid the extra
call to pwm_apply_state() here.

Having said that, the extra call to pwm_apply_state() may have benefits,
see this comment in the pwm-sifive driver:

 * - When changing both duty cycle and period, we cannot prevent in
 *   software that the output might produce a period with mixed
 *   settings (new period length and old duty cycle).

So setting the duty cycle and period once with enabled = false prevents a
first period with mixed settings (i.e. bogus).

> > -		edge = ktime_add_us(edge, txbuf[i]);
> > -		delta = ktime_us_delta(edge, ktime_get());
> > -		if (delta > 0)
> > -			usleep_range(delta, delta + 10);
> > -	}
> > +	hrtimer_start(&pwm_ir->timer, 1000, HRTIMER_MODE_REL);
> 
> why not just call it with 0 time?

Otherwise the timings are a little off for the first edge - hrtimer setup
time, I think. I can experiment again.

> > -	state.enabled = false;
> > -	pwm_apply_state(pwm, &state);
> > +	wait_for_completion(&pwm_ir->completion);
> >   	return count;
> >   }
> > +static enum hrtimer_restart pwm_ir_timer(struct hrtimer *timer)
> > +{
> > +	struct pwm_ir *pwm_ir = container_of(timer, struct pwm_ir, timer);
> > +	ktime_t now;
> > +
> > +	/*
> > +	 * If we happen to hit an odd latency spike, loop through the
> > +	 * pulses until we catch up.
> > +	 */
> > +	do {
> > +		u64 ns;
> > +
> > +		if (pwm_ir->txbuf_index >= pwm_ir->txbuf_len) {
> > +			/* Stop TX here */
> > +			pwm_disable(pwm_ir->pwm);
> > +
> > +			complete(&pwm_ir->completion);
> > +
> > +			return HRTIMER_NORESTART;
> > +		}
> > +
> > +		if (pwm_ir->txbuf_index % 2)
> > +			pwm_disable(pwm_ir->pwm);
> > +		else
> > +			pwm_enable(pwm_ir->pwm);
> > +
> 
> pwm_ir->pwm->state.enabled = !(pwm_ir->txbuf_index % 2);
> pwm_apply_state(pwm_ir->pwm, pwm_ir->state);

Requires a copy of pwm_state in pwm_ir, not a huge difference (copy of 28
bytes vs keeping it around).

> > +		ns = US_TO_NS(pwm_ir->txbuf[pwm_ir->txbuf_index]);
> > +		hrtimer_add_expires_ns(timer, ns);
> > +
> > +		pwm_ir->txbuf_index++;
> > +
> > +		now = timer->base->get_time();
> > +	} while (hrtimer_get_expires_tv64(timer) < now);
> > +
> > +	return HRTIMER_RESTART;
> > +}
> > +
> >   static int pwm_ir_probe(struct platform_device *pdev)
> >   {
> >   	struct pwm_ir *pwm_ir;
> > @@ -96,8 +135,16 @@ static int pwm_ir_probe(struct platform_device *pdev)
> >   	if (IS_ERR(pwm_ir->pwm))
> >   		return PTR_ERR(pwm_ir->pwm);
> > +	if (pwm_can_sleep(pwm_ir->pwm)) {
> > +		dev_err(&pdev->dev, "unsupported pwm device: driver can sleep\n");
> > +		return -ENODEV;
> > +	}
> > +
> 
> I think we shall not limit, but use high priority thread to support those
> drivers. I have that working on n900 with current (sleeping) pwm, see my
> reply on the other mail. Maybe we can combine both patches in a way to
> support both atomic and sleeping pwm drivers.

If the ir-rx51 driver uses a sleeping pwm then that's broken and only works
by accident - the current driver is broken then.

Spinning for longer periods (e.g. 100us) does not play well with RT. Would
make more sense to fix the pwm driver to non-sleeping when a pwm driver
is used for pwm-ir-tx?

Thanks

Sean

> 
> >   	pwm_ir->carrier = 38000;
> >   	pwm_ir->duty_cycle = 50;
> > +	init_completion(&pwm_ir->completion);
> > +	hrtimer_init(&pwm_ir->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> > +	pwm_ir->timer.function = pwm_ir_timer;
> >   	rcdev = devm_rc_allocate_device(&pdev->dev, RC_DRIVER_IR_RAW_TX);
> >   	if (!rcdev)
> > 
> 
> Regards,
> Ivo
Uwe Kleine-König Oct. 2, 2023, 8:59 a.m. UTC | #4
Hello Sean,

On Mon, Oct 02, 2023 at 09:20:20AM +0100, Sean Young wrote:
> Having said that, the extra call to pwm_apply_state() may have benefits,
> see this comment in the pwm-sifive driver:
> 
>  * - When changing both duty cycle and period, we cannot prevent in
>  *   software that the output might produce a period with mixed
>  *   settings (new period length and old duty cycle).

You don't gain anything here I think. Disabling a PWM might also result
in a constant active level. If you want to prevent this, your best bet
is to never disable the PWM.

Best regards
Uwe
Ivaylo Dimitrov Oct. 2, 2023, 9:52 a.m. UTC | #5
On 2.10.23 г. 11:20 ч., Sean Young wrote:
> Hi,
> 
> On Mon, Oct 02, 2023 at 08:49:47AM +0300, Ivaylo Dimitrov wrote:
>> On 1.10.23 г. 13:40 ч., Sean Young wrote:
>>> The pwm-ir-tx driver has to turn the pwm signal on and off, and suffers
>>> from delays as this is done in process context. Make this work in atomic
>>> context.
>>>
>>> This makes the driver much more precise.
>>>
>>> Signed-off-by: Sean Young <sean@mess.org>
>>> Cc: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
>>> ---
>>>    drivers/media/rc/pwm-ir-tx.c | 79 ++++++++++++++++++++++++++++--------
>>>    1 file changed, 63 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/drivers/media/rc/pwm-ir-tx.c b/drivers/media/rc/pwm-ir-tx.c
>>> index c5f37c03af9c..557725a07a67 100644
>>> --- a/drivers/media/rc/pwm-ir-tx.c
>>> +++ b/drivers/media/rc/pwm-ir-tx.c
>>> @@ -10,6 +10,8 @@
>>>    #include <linux/slab.h>
>>>    #include <linux/of.h>
>>>    #include <linux/platform_device.h>
>>> +#include <linux/hrtimer.h>
>>> +#include <linux/completion.h>
>>>    #include <media/rc-core.h>
>>>    #define DRIVER_NAME	"pwm-ir-tx"
>>> @@ -17,8 +19,13 @@
>>>    struct pwm_ir {
>>>    	struct pwm_device *pwm;
>>> -	unsigned int carrier;
>>> -	unsigned int duty_cycle;
>>> +	struct hrtimer timer;
>>> +	struct completion completion;
>>> +	uint carrier;
>>> +	uint duty_cycle;
>>> +	uint *txbuf;
>>> +	uint txbuf_len;
>>> +	uint txbuf_index;
>>>    };
>>>    static const struct of_device_id pwm_ir_of_match[] = {
>>> @@ -55,33 +62,65 @@ static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
>>>    	struct pwm_ir *pwm_ir = dev->priv;
>>>    	struct pwm_device *pwm = pwm_ir->pwm;
>>>    	struct pwm_state state;
>>> -	int i;
>>> -	ktime_t edge;
>>> -	long delta;
>>> +
>>> +	reinit_completion(&pwm_ir->completion);
>>
>> You should not need that.
> 
> It does not work without it - the process doing the 2nd tx hangs indefinitely.
> 

that means your calls to wait_for_completion() / complete() do not 
match. I think you should check why.

>>>    	pwm_init_state(pwm, &state);
>>>    	state.period = DIV_ROUND_CLOSEST(NSEC_PER_SEC, pwm_ir->carrier);
>>>    	pwm_set_relative_duty_cycle(&state, pwm_ir->duty_cycle, 100);
>>> +	state.enabled = false;
>>> -	edge = ktime_get();
>>> +	pwm_ir->txbuf = txbuf;
>>> +	pwm_ir->txbuf_len = count;
>>> +	pwm_ir->txbuf_index = 0;
>>> -	for (i = 0; i < count; i++) {
>>> -		state.enabled = !(i % 2);
>>> -		pwm_apply_state(pwm, &state);
>>> +	pwm_apply_state(pwm, &state);
>>
>> ditto, first pwm control should be in the timer function
> 
> This requires keeping a copy of pwm_state in pwm_ir but does avoid the extra
> call to pwm_apply_state() here.
> 

not really, you can have pwm_state * pwm_ir member and pass pointer to 
the stack variable.

> Having said that, the extra call to pwm_apply_state() may have benefits,
> see this comment in the pwm-sifive driver:
> 
>   * - When changing both duty cycle and period, we cannot prevent in
>   *   software that the output might produce a period with mixed
>   *   settings (new period length and old duty cycle).
> 
> So setting the duty cycle and period once with enabled = false prevents a
> first period with mixed settings (i.e. bogus).
> 

Who will enable pwm if not in tx? Like, doesn't the driver have 
exclusive ownership of the pwm? Also, every transmission ends up wit pwm 
disabled, so disabling it once again does not make sense to me.


>>> -		edge = ktime_add_us(edge, txbuf[i]);
>>> -		delta = ktime_us_delta(edge, ktime_get());
>>> -		if (delta > 0)
>>> -			usleep_range(delta, delta + 10);
>>> -	}
>>> +	hrtimer_start(&pwm_ir->timer, 1000, HRTIMER_MODE_REL);
>>
>> why not just call it with 0 time?
> 
> Otherwise the timings are a little off for the first edge - hrtimer setup
> time, I think. I can experiment again.
> 

Why is that? Edge start is controlled by the calls in timer function, it 
should not matter when it is called for the first time.

>>> -	state.enabled = false;
>>> -	pwm_apply_state(pwm, &state);
>>> +	wait_for_completion(&pwm_ir->completion);
>>>    	return count;
>>>    }
>>> +static enum hrtimer_restart pwm_ir_timer(struct hrtimer *timer)
>>> +{
>>> +	struct pwm_ir *pwm_ir = container_of(timer, struct pwm_ir, timer);
>>> +	ktime_t now;
>>> +
>>> +	/*
>>> +	 * If we happen to hit an odd latency spike, loop through the
>>> +	 * pulses until we catch up.
>>> +	 */
>>> +	do {
>>> +		u64 ns;
>>> +
>>> +		if (pwm_ir->txbuf_index >= pwm_ir->txbuf_len) {
>>> +			/* Stop TX here */
>>> +			pwm_disable(pwm_ir->pwm);
>>> +
>>> +			complete(&pwm_ir->completion);
>>> +
>>> +			return HRTIMER_NORESTART;
>>> +		}
>>> +
>>> +		if (pwm_ir->txbuf_index % 2)
>>> +			pwm_disable(pwm_ir->pwm);
>>> +		else
>>> +			pwm_enable(pwm_ir->pwm);
>>> +
>>
>> pwm_ir->pwm->state.enabled = !(pwm_ir->txbuf_index % 2);
>> pwm_apply_state(pwm_ir->pwm, pwm_ir->state);
> 
> Requires a copy of pwm_state in pwm_ir, not a huge difference (copy of 28
> bytes vs keeping it around).

see my previous comment re struct var. Also, look at the overhead: 
https://elixir.bootlin.com/linux/v6.6-rc3/source/include/linux/pwm.h#L349 
- you call pwm_get_state() for every edge.

> 
>>> +		ns = US_TO_NS(pwm_ir->txbuf[pwm_ir->txbuf_index]);
>>> +		hrtimer_add_expires_ns(timer, ns);
>>> +
>>> +		pwm_ir->txbuf_index++;
>>> +
>>> +		now = timer->base->get_time();
>>> +	} while (hrtimer_get_expires_tv64(timer) < now);
>>> +
>>> +	return HRTIMER_RESTART;
>>> +}
>>> +
>>>    static int pwm_ir_probe(struct platform_device *pdev)
>>>    {
>>>    	struct pwm_ir *pwm_ir;
>>> @@ -96,8 +135,16 @@ static int pwm_ir_probe(struct platform_device *pdev)
>>>    	if (IS_ERR(pwm_ir->pwm))
>>>    		return PTR_ERR(pwm_ir->pwm);
>>> +	if (pwm_can_sleep(pwm_ir->pwm)) {
>>> +		dev_err(&pdev->dev, "unsupported pwm device: driver can sleep\n");
>>> +		return -ENODEV;
>>> +	}
>>> +
>>
>> I think we shall not limit, but use high priority thread to support those
>> drivers. I have that working on n900 with current (sleeping) pwm, see my
>> reply on the other mail. Maybe we can combine both patches in a way to
>> support both atomic and sleeping pwm drivers.
> 
> If the ir-rx51 driver uses a sleeping pwm then that's broken and only works
> by accident - the current driver is broken then.
> 

Yes, and I stated that couple of times in my previous emails :)

> Spinning for longer periods (e.g. 100us) does not play well with RT. Would
> make more sense to fix the pwm driver to non-sleeping when a pwm driver
> is used for pwm-ir-tx?
> 

Sure, and I have a patch for n900 that does this, however, for your i2c 
case there is no solution. Also, we may play smart and dynamically 
decrease sleep time (by adjusting edge by lets say 5-10 us every pulse 
until we have some sane value) if we see it is too long. No strong 
preferences here, it is just that I have code that works.

Thanks,
Ivo

> Thanks
> 
> Sean
> 
>>
>>>    	pwm_ir->carrier = 38000;
>>>    	pwm_ir->duty_cycle = 50;
>>> +	init_completion(&pwm_ir->completion);
>>> +	hrtimer_init(&pwm_ir->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>>> +	pwm_ir->timer.function = pwm_ir_timer;
>>>    	rcdev = devm_rc_allocate_device(&pdev->dev, RC_DRIVER_IR_RAW_TX);
>>>    	if (!rcdev)
>>>
>>
>> Regards,
>> Ivo
Sean Young Oct. 4, 2023, 7:43 a.m. UTC | #6
Hi,

On Mon, Oct 02, 2023 at 12:52:00PM +0300, Ivaylo Dimitrov wrote:
> On 2.10.23 г. 11:20 ч., Sean Young wrote:
> > On Mon, Oct 02, 2023 at 08:49:47AM +0300, Ivaylo Dimitrov wrote:
> > > On 1.10.23 г. 13:40 ч., Sean Young wrote:
> > > > The pwm-ir-tx driver has to turn the pwm signal on and off, and suffers
> > > > from delays as this is done in process context. Make this work in atomic
> > > > context.
> > > > 
> > > > This makes the driver much more precise.
> > > > 
> > > > Signed-off-by: Sean Young <sean@mess.org>
> > > > Cc: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
> > > > ---
> > > >    drivers/media/rc/pwm-ir-tx.c | 79 ++++++++++++++++++++++++++++--------
> > > >    1 file changed, 63 insertions(+), 16 deletions(-)
> > > > 
> > > > diff --git a/drivers/media/rc/pwm-ir-tx.c b/drivers/media/rc/pwm-ir-tx.c
> > > > index c5f37c03af9c..557725a07a67 100644
> > > > --- a/drivers/media/rc/pwm-ir-tx.c
> > > > +++ b/drivers/media/rc/pwm-ir-tx.c
> > > > @@ -10,6 +10,8 @@
> > > >    #include <linux/slab.h>
> > > >    #include <linux/of.h>
> > > >    #include <linux/platform_device.h>
> > > > +#include <linux/hrtimer.h>
> > > > +#include <linux/completion.h>
> > > >    #include <media/rc-core.h>
> > > >    #define DRIVER_NAME	"pwm-ir-tx"
> > > > @@ -17,8 +19,13 @@
> > > >    struct pwm_ir {
> > > >    	struct pwm_device *pwm;
> > > > -	unsigned int carrier;
> > > > -	unsigned int duty_cycle;
> > > > +	struct hrtimer timer;
> > > > +	struct completion completion;
> > > > +	uint carrier;
> > > > +	uint duty_cycle;
> > > > +	uint *txbuf;
> > > > +	uint txbuf_len;
> > > > +	uint txbuf_index;
> > > >    };
> > > >    static const struct of_device_id pwm_ir_of_match[] = {
> > > > @@ -55,33 +62,65 @@ static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
> > > >    	struct pwm_ir *pwm_ir = dev->priv;
> > > >    	struct pwm_device *pwm = pwm_ir->pwm;
> > > >    	struct pwm_state state;
> > > > -	int i;
> > > > -	ktime_t edge;
> > > > -	long delta;
> > > > +
> > > > +	reinit_completion(&pwm_ir->completion);
> > > 
> > > You should not need that.
> > 
> > It does not work without it - the process doing the 2nd tx hangs indefinitely.
> > 
> 
> that means your calls to wait_for_completion() / complete() do not match. I
> think you should check why.

I've had a deeper look and you're right. Thanks!

> > > >    	pwm_init_state(pwm, &state);
> > > >    	state.period = DIV_ROUND_CLOSEST(NSEC_PER_SEC, pwm_ir->carrier);
> > > >    	pwm_set_relative_duty_cycle(&state, pwm_ir->duty_cycle, 100);
> > > > +	state.enabled = false;
> > > > -	edge = ktime_get();
> > > > +	pwm_ir->txbuf = txbuf;
> > > > +	pwm_ir->txbuf_len = count;
> > > > +	pwm_ir->txbuf_index = 0;
> > > > -	for (i = 0; i < count; i++) {
> > > > -		state.enabled = !(i % 2);
> > > > -		pwm_apply_state(pwm, &state);
> > > > +	pwm_apply_state(pwm, &state);
> > > 
> > > ditto, first pwm control should be in the timer function
> > 
> > This requires keeping a copy of pwm_state in pwm_ir but does avoid the extra
> > call to pwm_apply_state() here.
> > 
> 
> not really, you can have pwm_state * pwm_ir member and pass pointer to the
> stack variable.

Ah, good point, I had not thought of that :)

> > Having said that, the extra call to pwm_apply_state() may have benefits,
> > see this comment in the pwm-sifive driver:
> > 
> >   * - When changing both duty cycle and period, we cannot prevent in
> >   *   software that the output might produce a period with mixed
> >   *   settings (new period length and old duty cycle).
> > 
> > So setting the duty cycle and period once with enabled = false prevents a
> > first period with mixed settings (i.e. bogus).
> > 
> 
> Who will enable pwm if not in tx? Like, doesn't the driver have exclusive
> ownership of the pwm? Also, every transmission ends up wit pwm disabled, so
> disabling it once again does not make sense to me.

My only point was that if the period/duty cycle have changed, then the extra
disable may set up the parameters in the pwm hardware correctly (according
to this comment). Uwe has already responded saying this is not going to work,
so this can be ignored.

> > > > -		edge = ktime_add_us(edge, txbuf[i]);
> > > > -		delta = ktime_us_delta(edge, ktime_get());
> > > > -		if (delta > 0)
> > > > -			usleep_range(delta, delta + 10);
> > > > -	}
> > > > +	hrtimer_start(&pwm_ir->timer, 1000, HRTIMER_MODE_REL);
> > > 
> > > why not just call it with 0 time?
> > 
> > Otherwise the timings are a little off for the first edge - hrtimer setup
> > time, I think. I can experiment again.
> > 
> 
> Why is that? Edge start is controlled by the calls in timer function, it
> should not matter when it is called for the first time.

Again, you're right.

> > > > -	state.enabled = false;
> > > > -	pwm_apply_state(pwm, &state);
> > > > +	wait_for_completion(&pwm_ir->completion);
> > > >    	return count;
> > > >    }
> > > > +static enum hrtimer_restart pwm_ir_timer(struct hrtimer *timer)
> > > > +{
> > > > +	struct pwm_ir *pwm_ir = container_of(timer, struct pwm_ir, timer);
> > > > +	ktime_t now;
> > > > +
> > > > +	/*
> > > > +	 * If we happen to hit an odd latency spike, loop through the
> > > > +	 * pulses until we catch up.
> > > > +	 */
> > > > +	do {
> > > > +		u64 ns;
> > > > +
> > > > +		if (pwm_ir->txbuf_index >= pwm_ir->txbuf_len) {
> > > > +			/* Stop TX here */
> > > > +			pwm_disable(pwm_ir->pwm);
> > > > +
> > > > +			complete(&pwm_ir->completion);
> > > > +
> > > > +			return HRTIMER_NORESTART;
> > > > +		}
> > > > +
> > > > +		if (pwm_ir->txbuf_index % 2)
> > > > +			pwm_disable(pwm_ir->pwm);
> > > > +		else
> > > > +			pwm_enable(pwm_ir->pwm);
> > > > +
> > > 
> > > pwm_ir->pwm->state.enabled = !(pwm_ir->txbuf_index % 2);
> > > pwm_apply_state(pwm_ir->pwm, pwm_ir->state);
> > 
> > Requires a copy of pwm_state in pwm_ir, not a huge difference (copy of 28
> > bytes vs keeping it around).
> 
> see my previous comment re struct var. Also, look at the overhead:
> https://elixir.bootlin.com/linux/v6.6-rc3/source/include/linux/pwm.h#L349 -
> you call pwm_get_state() for every edge.

That's the 28 bytes copy I was talking about.

However keeping a pointer in struct pwm_ir is a good compromise and makes
the rest of the code cleaner.

> > > > +		ns = US_TO_NS(pwm_ir->txbuf[pwm_ir->txbuf_index]);
> > > > +		hrtimer_add_expires_ns(timer, ns);
> > > > +
> > > > +		pwm_ir->txbuf_index++;
> > > > +
> > > > +		now = timer->base->get_time();
> > > > +	} while (hrtimer_get_expires_tv64(timer) < now);
> > > > +
> > > > +	return HRTIMER_RESTART;
> > > > +}
> > > > +
> > > >    static int pwm_ir_probe(struct platform_device *pdev)
> > > >    {
> > > >    	struct pwm_ir *pwm_ir;
> > > > @@ -96,8 +135,16 @@ static int pwm_ir_probe(struct platform_device *pdev)
> > > >    	if (IS_ERR(pwm_ir->pwm))
> > > >    		return PTR_ERR(pwm_ir->pwm);
> > > > +	if (pwm_can_sleep(pwm_ir->pwm)) {
> > > > +		dev_err(&pdev->dev, "unsupported pwm device: driver can sleep\n");
> > > > +		return -ENODEV;
> > > > +	}
> > > > +
> > > 
> > > I think we shall not limit, but use high priority thread to support those
> > > drivers. I have that working on n900 with current (sleeping) pwm, see my
> > > reply on the other mail. Maybe we can combine both patches in a way to
> > > support both atomic and sleeping pwm drivers.
> > 
> > If the ir-rx51 driver uses a sleeping pwm then that's broken and only works
> > by accident - the current driver is broken then.
> > 
> 
> Yes, and I stated that couple of times in my previous emails :)
> 
> > Spinning for longer periods (e.g. 100us) does not play well with RT. Would
> > make more sense to fix the pwm driver to non-sleeping when a pwm driver
> > is used for pwm-ir-tx?
> > 
> 
> Sure, and I have a patch for n900 that does this, however, for your i2c case
> there is no solution. Also, we may play smart and dynamically decrease sleep
> time (by adjusting edge by lets say 5-10 us every pulse until we have some
> sane value) if we see it is too long. No strong preferences here, it is just
> that I have code that works.

So in order to get everything in order for atomic pwm, we need a few patches
to land. Let's try to get your patch merged for the next release cycle, so
that at least tx on the n900 works and there is improved tx for other devices
(although not the most efficient).

Thanks,
Sean
Sean Young Oct. 4, 2023, 8 a.m. UTC | #7
Hi,

On Mon, Oct 02, 2023 at 09:16:53AM +0300, Ivaylo Dimitrov wrote:
> On 1.10.23 г. 13:40 ч., Sean Young wrote:
> > The pwm-ir-tx driver has to turn the pwm signal on and off, and suffers
> > from delays as this is done in process context. Make this work in atomic
> > context.
> > 
> > This makes the driver much more precise.
> > 
> > Signed-off-by: Sean Young <sean@mess.org>
> > Cc: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
> > ---
> >   drivers/media/rc/pwm-ir-tx.c | 79 ++++++++++++++++++++++++++++--------
> >   1 file changed, 63 insertions(+), 16 deletions(-)
> > 
> 
> what about the following patch(not a proper one, just RFC)? It achieves the
> same (if not better) precision (on n900 at least) without using atomic pwm.
> What it does is: create a fifo thread in which we swicth pwm on/off, start
> hrtimer that is used to signal thread when to switch pwm.
> As signal comes earlier than needed(because hrtimer runs async to the
> thread), we do a busy loop wait for the precise time to switch the pwm. At
> least on n900, this busy loop is less than 200 us per switch(worst case,
> usually it is less than 100 us). That way, even if we have some latency
> spike, it is covered by not doing busy loop for that particular pwm switch
> and we keep the precise timing.

I think this is a good idea.

> Maybe we shall merge both patches so fifo thread to be used for sleeping
> pwms and hrtimer for atomic. I can do that and test it here if you think
> that approach makes sense.

Let's try and merge this patch for the next merge window, and worry about
the atomic version after that. We've already queued the ir-rx51 removal
patches to media_stage so it would be nice to have to revert these patches,
and improve pwm-ir-tx for the next kernel release.

I'll test your patch, in the mean time would you mind posting this patch
as a proper patch (with review comments below addressed)?

Thanks,

Sean


> 
> Regards,
> Ivo
> 
> PS: I have a patch that converts timer-ti-dm to non-sleeping one, will send
> it when it comes to it.
> 
> diff --git a/drivers/media/rc/pwm-ir-tx.c b/drivers/media/rc/pwm-ir-tx.c
> index 105a9c24f1e3..e8b620f53056 100644
> --- a/drivers/media/rc/pwm-ir-tx.c
> +++ b/drivers/media/rc/pwm-ir-tx.c
> @@ -4,6 +4,7 @@
>   */
> 
>  #include <linux/kernel.h>
> +#include <linux/kthread.h>
>  #include <linux/module.h>
>  #include <linux/pwm.h>
>  #include <linux/delay.h>
> @@ -17,8 +18,16 @@
> 
>  struct pwm_ir {
>  	struct pwm_device *pwm;
> +	struct hrtimer timer;
> +	struct task_struct *tx_thread;
> +	wait_queue_head_t tx_wq;
> +	struct completion tx_done;
> +	struct completion edge;
>  	unsigned int carrier;
>  	unsigned int duty_cycle;
> +	unsigned int *txbuf;
> +	unsigned int count;
> +	unsigned int index;
>  };
> 
>  static const struct of_device_id pwm_ir_of_match[] = {
> @@ -48,35 +57,103 @@ static int pwm_ir_set_carrier(struct rc_dev *dev, u32
> carrier)
>  	return 0;
>  }
> 
> -static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
> -		     unsigned int count)
> +static enum hrtimer_restart pwm_ir_timer_cb(struct hrtimer *timer)
> +{
> +	struct pwm_ir *pwm_ir = container_of(timer, struct pwm_ir, timer);
> +	ktime_t now;
> +
> +	/*
> +	 * If we happen to hit an odd latency spike, loop through the
> +	 * pulses until we catch up.
> +	 */
> +	do {
> +		u64 edge;
> +
> +		if (pwm_ir->index >= pwm_ir->count)
> +			goto out;

Might as well avoid the goto and put the complete and return in the body of
the if.

> +
> +		complete(&pwm_ir->edge);
> +
> +		edge = US_TO_NS(pwm_ir->txbuf[pwm_ir->index]);
> +		hrtimer_add_expires_ns(timer, edge);
> +
> +		pwm_ir->index++;
> +
> +		now = timer->base->get_time();
> +
> +	} while (hrtimer_get_expires_tv64(timer) < now);
> +
> +	return HRTIMER_RESTART;
> +out:
> +	complete(&pwm_ir->edge);
> +
> +	return HRTIMER_NORESTART;
> +}
> +
> +static void _pwm_ir_tx(struct pwm_ir *pwm_ir)
>  {
> -	struct pwm_ir *pwm_ir = dev->priv;
> -	struct pwm_device *pwm = pwm_ir->pwm;
>  	struct pwm_state state;
>  	int i;
>  	ktime_t edge;
>  	long delta;
> 
> -	pwm_init_state(pwm, &state);
> +	pwm_init_state(pwm_ir->pwm, &state);
> 
>  	state.period = DIV_ROUND_CLOSEST(NSEC_PER_SEC, pwm_ir->carrier);
>  	pwm_set_relative_duty_cycle(&state, pwm_ir->duty_cycle, 100);
> 
> +	hrtimer_start(&pwm_ir->timer, 0, HRTIMER_MODE_REL);
> +	wait_for_completion(&pwm_ir->edge);
>  	edge = ktime_get();
> 
> -	for (i = 0; i < count; i++) {
> +	for (i = 0; i < pwm_ir->count; i++) {
>  		state.enabled = !(i % 2);
> -		pwm_apply_state(pwm, &state);
> +		pwm_apply_state(pwm_ir->pwm, &state);
> +
> +		edge = ktime_add_us(edge, pwm_ir->txbuf[i]);
> +		wait_for_completion(&pwm_ir->edge);
> 
> -		edge = ktime_add_us(edge, txbuf[i]);
>  		delta = ktime_us_delta(edge, ktime_get());
> +
>  		if (delta > 0)
> -			usleep_range(delta, delta + 10);
> +			udelay(delta);
>  	}
> 
>  	state.enabled = false;
> -	pwm_apply_state(pwm, &state);
> +	pwm_apply_state(pwm_ir->pwm, &state);
> +
> +	pwm_ir->count = 0;
> +}
> +
> +static int pwm_ir_thread(void *data)
> +{
> +	struct pwm_ir *pwm_ir = data;
> +
> +	for (;;) {
> +		wait_event_idle(pwm_ir->tx_wq,
> +				kthread_should_stop() || pwm_ir->count);
> +
> +		if (kthread_should_stop())
> +			break;
> +
> +		_pwm_ir_tx(pwm_ir);
> +		complete(&pwm_ir->tx_done);
> +	}
> +
> +	return 0;
> +}
> +
> +static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
> +		     unsigned int count)
> +{
> +	struct pwm_ir *pwm_ir = dev->priv;
> +
> +	pwm_ir->txbuf = txbuf;
> +	pwm_ir->count = count;
> +	pwm_ir->index = 0;
> +
> +	wake_up(&pwm_ir->tx_wq);
> +	wait_for_completion(&pwm_ir->tx_done);
> 
>  	return count;
>  }
> @@ -91,12 +168,24 @@ static int pwm_ir_probe(struct platform_device *pdev)
>  	if (!pwm_ir)
>  		return -ENOMEM;
> 
> +	platform_set_drvdata(pdev, pwm_ir);
> +
> +	pwm_ir->count = 0;
> +
>  	pwm_ir->pwm = devm_pwm_get(&pdev->dev, NULL);
>  	if (IS_ERR(pwm_ir->pwm))
>  		return PTR_ERR(pwm_ir->pwm);
> 
> -	pwm_ir->carrier = 38000;
> +	init_waitqueue_head(&pwm_ir->tx_wq);
> +	init_completion(&pwm_ir->edge);
> +	init_completion(&pwm_ir->tx_done);
> +
> +	hrtimer_init(&pwm_ir->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> +	pwm_ir->timer.function = pwm_ir_timer_cb;
> +
>  	pwm_ir->duty_cycle = 50;
> +	pwm_ir->carrier = DIV_ROUND_CLOSEST_ULL(pwm_get_period(pwm_ir->pwm),
> +						NSEC_PER_SEC);
> 
>  	rcdev = devm_rc_allocate_device(&pdev->dev, RC_DRIVER_IR_RAW_TX);
>  	if (!rcdev)
> @@ -109,15 +198,38 @@ static int pwm_ir_probe(struct platform_device *pdev)
>  	rcdev->s_tx_duty_cycle = pwm_ir_set_duty_cycle;
>  	rcdev->s_tx_carrier = pwm_ir_set_carrier;
> 
> +	pwm_ir->tx_thread = kthread_create(pwm_ir_thread, pwm_ir, "%s/tx",
> +					   dev_name(&pdev->dev));
> +	if (IS_ERR(pwm_ir->tx_thread))
> +		return PTR_ERR(pwm_ir->tx_thread);
> +
> +	sched_set_fifo(pwm_ir->tx_thread);
> +	wake_up_process(pwm_ir->tx_thread);

This means the thread is always around. How about creating the thread 
per-tx?

> +
>  	rc = devm_rc_register_device(&pdev->dev, rcdev);
> -	if (rc < 0)
> +	if (rc < 0) {
> +		kthread_stop(pwm_ir->tx_thread);
>  		dev_err(&pdev->dev, "failed to register rc device\n");
> +	}
> 
>  	return rc;
>  }
> 
> +static int pwm_ir_remove(struct platform_device *pdev)
> +{
> +	struct pwm_ir *pwm_ir = platform_get_drvdata(pdev);
> +
> +	if (pwm_ir->tx_thread) {
> +		kthread_stop(pwm_ir->tx_thread);
> +		pwm_ir->tx_thread = NULL;
> +	}
> +
> +	return 0;
> +}
> +
>  static struct platform_driver pwm_ir_driver = {
>  	.probe = pwm_ir_probe,
> +	.remove = pwm_ir_remove,
>  	.driver = {
>  		.name	= DRIVER_NAME,
>  		.of_match_table = of_match_ptr(pwm_ir_of_match),
Uwe Kleine-König Oct. 4, 2023, 9:35 a.m. UTC | #8
Hello,

On Wed, Oct 04, 2023 at 08:43:53AM +0100, Sean Young wrote:
> On Mon, Oct 02, 2023 at 12:52:00PM +0300, Ivaylo Dimitrov wrote:
> > On 2.10.23 г. 11:20 ч., Sean Young wrote:
> > > Requires a copy of pwm_state in pwm_ir, not a huge difference (copy of 28
> > > bytes vs keeping it around).
> > 
> > see my previous comment re struct var. Also, look at the overhead:
> > https://elixir.bootlin.com/linux/v6.6-rc3/source/include/linux/pwm.h#L349 -
> > you call pwm_get_state() for every edge.
> 
> That's the 28 bytes copy I was talking about.

Note that pwm_get_state() also has (IMHO) confusing semantics. It gives
you (most of the time) the state that was last pwm_state_apply()d and
not the state the hardware is currently in. In my book keeping the
pwm_state around is the nicer approach that often is also simpler ...

> However keeping a pointer in struct pwm_ir is a good compromise and makes
> the rest of the code cleaner.

... which seems to apply here, too.

Best regards
Uwe
Ivaylo Dimitrov Oct. 4, 2023, 12:54 p.m. UTC | #9
Hi,

On 4.10.23 г. 11:00 ч., Sean Young wrote:
> Hi,
> 
> On Mon, Oct 02, 2023 at 09:16:53AM +0300, Ivaylo Dimitrov wrote:
>> On 1.10.23 г. 13:40 ч., Sean Young wrote:
>>> The pwm-ir-tx driver has to turn the pwm signal on and off, and suffers
>>> from delays as this is done in process context. Make this work in atomic
>>> context.
>>>
>>> This makes the driver much more precise.
>>>
>>> Signed-off-by: Sean Young <sean@mess.org>
>>> Cc: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
>>> ---
>>>    drivers/media/rc/pwm-ir-tx.c | 79 ++++++++++++++++++++++++++++--------
>>>    1 file changed, 63 insertions(+), 16 deletions(-)
>>>
>>
>> what about the following patch(not a proper one, just RFC)? It achieves the
>> same (if not better) precision (on n900 at least) without using atomic pwm.
>> What it does is: create a fifo thread in which we swicth pwm on/off, start
>> hrtimer that is used to signal thread when to switch pwm.
>> As signal comes earlier than needed(because hrtimer runs async to the
>> thread), we do a busy loop wait for the precise time to switch the pwm. At
>> least on n900, this busy loop is less than 200 us per switch(worst case,
>> usually it is less than 100 us). That way, even if we have some latency
>> spike, it is covered by not doing busy loop for that particular pwm switch
>> and we keep the precise timing.
> 
> I think this is a good idea.
> 
>> Maybe we shall merge both patches so fifo thread to be used for sleeping
>> pwms and hrtimer for atomic. I can do that and test it here if you think
>> that approach makes sense.
> 
> Let's try and merge this patch for the next merge window, and worry about
> the atomic version after that. We've already queued the ir-rx51 removal
> patches to media_stage so it would be nice to have to revert these patches,
> and improve pwm-ir-tx for the next kernel release.
> 

ir-rx51 is broken without 
https://www.spinics.net/lists/kernel/msg4953300.html, it is also missing 
a call to init_waitqueue_head() in the probe() function. So I have no 
strong opinion on what shall be done with it.

> I'll test your patch, in the mean time would you mind posting this patch
> as a proper patch (with review comments below addressed)?
> 

ok

> Thanks,
> 
> Sean
> 
> 
>>
>> Regards,
>> Ivo
>>
>> PS: I have a patch that converts timer-ti-dm to non-sleeping one, will send
>> it when it comes to it.
>>
>> diff --git a/drivers/media/rc/pwm-ir-tx.c b/drivers/media/rc/pwm-ir-tx.c
>> index 105a9c24f1e3..e8b620f53056 100644
>> --- a/drivers/media/rc/pwm-ir-tx.c
>> +++ b/drivers/media/rc/pwm-ir-tx.c
>> @@ -4,6 +4,7 @@
>>    */
>>
>>   #include <linux/kernel.h>
>> +#include <linux/kthread.h>
>>   #include <linux/module.h>
>>   #include <linux/pwm.h>
>>   #include <linux/delay.h>
>> @@ -17,8 +18,16 @@
>>
>>   struct pwm_ir {
>>   	struct pwm_device *pwm;
>> +	struct hrtimer timer;
>> +	struct task_struct *tx_thread;
>> +	wait_queue_head_t tx_wq;
>> +	struct completion tx_done;
>> +	struct completion edge;
>>   	unsigned int carrier;
>>   	unsigned int duty_cycle;
>> +	unsigned int *txbuf;
>> +	unsigned int count;
>> +	unsigned int index;
>>   };
>>
>>   static const struct of_device_id pwm_ir_of_match[] = {
>> @@ -48,35 +57,103 @@ static int pwm_ir_set_carrier(struct rc_dev *dev, u32
>> carrier)
>>   	return 0;
>>   }
>>
>> -static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
>> -		     unsigned int count)
>> +static enum hrtimer_restart pwm_ir_timer_cb(struct hrtimer *timer)
>> +{
>> +	struct pwm_ir *pwm_ir = container_of(timer, struct pwm_ir, timer);
>> +	ktime_t now;
>> +
>> +	/*
>> +	 * If we happen to hit an odd latency spike, loop through the
>> +	 * pulses until we catch up.
>> +	 */
>> +	do {
>> +		u64 edge;
>> +
>> +		if (pwm_ir->index >= pwm_ir->count)
>> +			goto out;
> 
> Might as well avoid the goto and put the complete and return in the body of
> the if.
> 

right, will fix

>> +
>> +		complete(&pwm_ir->edge);
>> +
>> +		edge = US_TO_NS(pwm_ir->txbuf[pwm_ir->index]);
>> +		hrtimer_add_expires_ns(timer, edge);
>> +
>> +		pwm_ir->index++;
>> +
>> +		now = timer->base->get_time();
>> +
>> +	} while (hrtimer_get_expires_tv64(timer) < now);
>> +
>> +	return HRTIMER_RESTART;
>> +out:
>> +	complete(&pwm_ir->edge);
>> +
>> +	return HRTIMER_NORESTART;
>> +}
>> +
>> +static void _pwm_ir_tx(struct pwm_ir *pwm_ir)
>>   {
>> -	struct pwm_ir *pwm_ir = dev->priv;
>> -	struct pwm_device *pwm = pwm_ir->pwm;
>>   	struct pwm_state state;
>>   	int i;
>>   	ktime_t edge;
>>   	long delta;
>>
>> -	pwm_init_state(pwm, &state);
>> +	pwm_init_state(pwm_ir->pwm, &state);
>>
>>   	state.period = DIV_ROUND_CLOSEST(NSEC_PER_SEC, pwm_ir->carrier);
>>   	pwm_set_relative_duty_cycle(&state, pwm_ir->duty_cycle, 100);
>>
>> +	hrtimer_start(&pwm_ir->timer, 0, HRTIMER_MODE_REL);
>> +	wait_for_completion(&pwm_ir->edge);
>>   	edge = ktime_get();
>>
>> -	for (i = 0; i < count; i++) {
>> +	for (i = 0; i < pwm_ir->count; i++) {
>>   		state.enabled = !(i % 2);
>> -		pwm_apply_state(pwm, &state);
>> +		pwm_apply_state(pwm_ir->pwm, &state);
>> +
>> +		edge = ktime_add_us(edge, pwm_ir->txbuf[i]);
>> +		wait_for_completion(&pwm_ir->edge);
>>
>> -		edge = ktime_add_us(edge, txbuf[i]);
>>   		delta = ktime_us_delta(edge, ktime_get());
>> +
>>   		if (delta > 0)
>> -			usleep_range(delta, delta + 10);
>> +			udelay(delta);
>>   	}
>>
>>   	state.enabled = false;
>> -	pwm_apply_state(pwm, &state);
>> +	pwm_apply_state(pwm_ir->pwm, &state);
>> +
>> +	pwm_ir->count = 0;
>> +}
>> +
>> +static int pwm_ir_thread(void *data)
>> +{
>> +	struct pwm_ir *pwm_ir = data;
>> +
>> +	for (;;) {
>> +		wait_event_idle(pwm_ir->tx_wq,
>> +				kthread_should_stop() || pwm_ir->count);
>> +
>> +		if (kthread_should_stop())
>> +			break;
>> +
>> +		_pwm_ir_tx(pwm_ir);
>> +		complete(&pwm_ir->tx_done);
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
>> +		     unsigned int count)
>> +{
>> +	struct pwm_ir *pwm_ir = dev->priv;
>> +
>> +	pwm_ir->txbuf = txbuf;
>> +	pwm_ir->count = count;
>> +	pwm_ir->index = 0;
>> +
>> +	wake_up(&pwm_ir->tx_wq);
>> +	wait_for_completion(&pwm_ir->tx_done);
>>
>>   	return count;
>>   }
>> @@ -91,12 +168,24 @@ static int pwm_ir_probe(struct platform_device *pdev)
>>   	if (!pwm_ir)
>>   		return -ENOMEM;
>>
>> +	platform_set_drvdata(pdev, pwm_ir);
>> +
>> +	pwm_ir->count = 0;
>> +
>>   	pwm_ir->pwm = devm_pwm_get(&pdev->dev, NULL);
>>   	if (IS_ERR(pwm_ir->pwm))
>>   		return PTR_ERR(pwm_ir->pwm);
>>
>> -	pwm_ir->carrier = 38000;
>> +	init_waitqueue_head(&pwm_ir->tx_wq);
>> +	init_completion(&pwm_ir->edge);
>> +	init_completion(&pwm_ir->tx_done);
>> +
>> +	hrtimer_init(&pwm_ir->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>> +	pwm_ir->timer.function = pwm_ir_timer_cb;
>> +
>>   	pwm_ir->duty_cycle = 50;
>> +	pwm_ir->carrier = DIV_ROUND_CLOSEST_ULL(pwm_get_period(pwm_ir->pwm),
>> +						NSEC_PER_SEC);
>>
>>   	rcdev = devm_rc_allocate_device(&pdev->dev, RC_DRIVER_IR_RAW_TX);
>>   	if (!rcdev)
>> @@ -109,15 +198,38 @@ static int pwm_ir_probe(struct platform_device *pdev)
>>   	rcdev->s_tx_duty_cycle = pwm_ir_set_duty_cycle;
>>   	rcdev->s_tx_carrier = pwm_ir_set_carrier;
>>
>> +	pwm_ir->tx_thread = kthread_create(pwm_ir_thread, pwm_ir, "%s/tx",
>> +					   dev_name(&pdev->dev));
>> +	if (IS_ERR(pwm_ir->tx_thread))
>> +		return PTR_ERR(pwm_ir->tx_thread);
>> +
>> +	sched_set_fifo(pwm_ir->tx_thread);
>> +	wake_up_process(pwm_ir->tx_thread);
> 
> This means the thread is always around. How about creating the thread
> per-tx?
> 

Yes, that can be done, just not sure what the overhead would be.

Also, I think we shall reconsider the way the driver works:

Imagine we have to pretend we are TV remote that supports NEC protocol 
(for example), especially the "REPEAT CODES" part. Currently, no matter 
what we do, there is no way to get the timings even remotely right, as 
we have no idea what the "warmup" and "complete" delays are. Like, 
starting thread (if needed), hrtimer setup time, completions waiting, 
contexts switching, etc.

So, I think the correct thing to do is to copy txbuf (as a list of 
txbufs) into pwm_ir in tx function, start pulses generation and return 
from pwm_ir_tx() *immediately*, without waiting for tx to finish. If 
userspace requests submission of another set of pulses while we are 
still sending the current one, well, we accept it, add it to the list 
and delay the sending until the current one is finished. When there is 
nothing more to send (the list is empty), stop the hrtimer (and perhaps 
the thread)

I think that way userspace will be able to append as many "repeat" 
pulses with proper timings as it wants (with some sane limits ofc).

Unless we somehow have API restriction that we shall not return until tx 
is finished.

Does that make any sense to you?

Thanks,
Ivo

>> +
>>   	rc = devm_rc_register_device(&pdev->dev, rcdev);
>> -	if (rc < 0)
>> +	if (rc < 0) {
>> +		kthread_stop(pwm_ir->tx_thread);
>>   		dev_err(&pdev->dev, "failed to register rc device\n");
>> +	}
>>
>>   	return rc;
>>   }
>>
>> +static int pwm_ir_remove(struct platform_device *pdev)
>> +{
>> +	struct pwm_ir *pwm_ir = platform_get_drvdata(pdev);
>> +
>> +	if (pwm_ir->tx_thread) {
>> +		kthread_stop(pwm_ir->tx_thread);
>> +		pwm_ir->tx_thread = NULL;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>   static struct platform_driver pwm_ir_driver = {
>>   	.probe = pwm_ir_probe,
>> +	.remove = pwm_ir_remove,
>>   	.driver = {
>>   		.name	= DRIVER_NAME,
>>   		.of_match_table = of_match_ptr(pwm_ir_of_match),
Sean Young Oct. 4, 2023, 2:42 p.m. UTC | #10
On Wed, Oct 04, 2023 at 03:54:32PM +0300, Ivaylo Dimitrov wrote:
> On 4.10.23 г. 11:00 ч., Sean Young wrote:
> > On Mon, Oct 02, 2023 at 09:16:53AM +0300, Ivaylo Dimitrov wrote:
> > > On 1.10.23 г. 13:40 ч., Sean Young wrote:
> > > > The pwm-ir-tx driver has to turn the pwm signal on and off, and suffers
> > > > from delays as this is done in process context. Make this work in atomic
> > > > context.
> > > > 
> > > > This makes the driver much more precise.
> > > > 
> > > > Signed-off-by: Sean Young <sean@mess.org>
> > > > Cc: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
> > > > ---
> > > >    drivers/media/rc/pwm-ir-tx.c | 79 ++++++++++++++++++++++++++++--------
> > > >    1 file changed, 63 insertions(+), 16 deletions(-)
> > > > 
> > > 
> > > what about the following patch(not a proper one, just RFC)? It achieves the
> > > same (if not better) precision (on n900 at least) without using atomic pwm.
> > > What it does is: create a fifo thread in which we swicth pwm on/off, start
> > > hrtimer that is used to signal thread when to switch pwm.
> > > As signal comes earlier than needed(because hrtimer runs async to the
> > > thread), we do a busy loop wait for the precise time to switch the pwm. At
> > > least on n900, this busy loop is less than 200 us per switch(worst case,
> > > usually it is less than 100 us). That way, even if we have some latency
> > > spike, it is covered by not doing busy loop for that particular pwm switch
> > > and we keep the precise timing.
> > 
> > I think this is a good idea.
> > 
> > > Maybe we shall merge both patches so fifo thread to be used for sleeping
> > > pwms and hrtimer for atomic. I can do that and test it here if you think
> > > that approach makes sense.
> > 
> > Let's try and merge this patch for the next merge window, and worry about
> > the atomic version after that. We've already queued the ir-rx51 removal
> > patches to media_stage so it would be nice to have to revert these patches,
> > and improve pwm-ir-tx for the next kernel release.
> > 
> 
> ir-rx51 is broken without
> https://www.spinics.net/lists/kernel/msg4953300.html, it is also missing a
> call to init_waitqueue_head() in the probe() function. So I have no strong
> opinion on what shall be done with it.

Sure, ok. I guess the pwm-ir-tx driver is less broken in that regard.

In that case I propose we merge the ir-rx51 for the next merge window,
and further fixes to pwm-ir-tx go in when they're ready.

> > This means the thread is always around. How about creating the thread
> > per-tx?
> > 
> 
> Yes, that can be done, just not sure what the overhead would be.
> 
> Also, I think we shall reconsider the way the driver works:
> 
> Imagine we have to pretend we are TV remote that supports NEC protocol (for
> example), especially the "REPEAT CODES" part. Currently, no matter what we
> do, there is no way to get the timings even remotely right, as we have no
> idea what the "warmup" and "complete" delays are. Like, starting thread (if
> needed), hrtimer setup time, completions waiting, contexts switching, etc.

It's not perfect, but the assumption is that those times are going to be
the same or very similar for each tx. So, if the setup/warmup time is the same
and if there is no complete delay, then using usleep() between two txs 
works fine. I think in reality the setup/complete times are extremely
short (time to send usb packet or so), and compared to IR timings this is
insignificant.

Having said that, maybe a different scheme would be nice, which could offer
better precision.
 
> So, I think the correct thing to do is to copy txbuf (as a list of txbufs)
> into pwm_ir in tx function, start pulses generation and return from
> pwm_ir_tx() *immediately*, without waiting for tx to finish. If userspace
> requests submission of another set of pulses while we are still sending the
> current one, well, we accept it, add it to the list and delay the sending
> until the current one is finished. When there is nothing more to send (the
> list is empty), stop the hrtimer (and perhaps the thread)
> 
> I think that way userspace will be able to append as many "repeat" pulses
> with proper timings as it wants (with some sane limits ofc).
> 
> Unless we somehow have API restriction that we shall not return until tx is
> finished.
> 
> Does that make any sense to you?

Two problems:

It's a breaking uapi change: for example lircd and ir-ctl use this for 
calculating the gap between transmits. If we start returning early then
things break.

Secondly, not all drivers can support this, or they would need to support
it using a thread or so, which makes the driver code much more complicated
and we'd have to change nearly every driver.


Sean
diff mbox series

Patch

diff --git a/drivers/media/rc/pwm-ir-tx.c b/drivers/media/rc/pwm-ir-tx.c
index c5f37c03af9c..557725a07a67 100644
--- a/drivers/media/rc/pwm-ir-tx.c
+++ b/drivers/media/rc/pwm-ir-tx.c
@@ -10,6 +10,8 @@ 
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/hrtimer.h>
+#include <linux/completion.h>
 #include <media/rc-core.h>
 
 #define DRIVER_NAME	"pwm-ir-tx"
@@ -17,8 +19,13 @@ 
 
 struct pwm_ir {
 	struct pwm_device *pwm;
-	unsigned int carrier;
-	unsigned int duty_cycle;
+	struct hrtimer timer;
+	struct completion completion;
+	uint carrier;
+	uint duty_cycle;
+	uint *txbuf;
+	uint txbuf_len;
+	uint txbuf_index;
 };
 
 static const struct of_device_id pwm_ir_of_match[] = {
@@ -55,33 +62,65 @@  static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
 	struct pwm_ir *pwm_ir = dev->priv;
 	struct pwm_device *pwm = pwm_ir->pwm;
 	struct pwm_state state;
-	int i;
-	ktime_t edge;
-	long delta;
+
+	reinit_completion(&pwm_ir->completion);
 
 	pwm_init_state(pwm, &state);
 
 	state.period = DIV_ROUND_CLOSEST(NSEC_PER_SEC, pwm_ir->carrier);
 	pwm_set_relative_duty_cycle(&state, pwm_ir->duty_cycle, 100);
+	state.enabled = false;
 
-	edge = ktime_get();
+	pwm_ir->txbuf = txbuf;
+	pwm_ir->txbuf_len = count;
+	pwm_ir->txbuf_index = 0;
 
-	for (i = 0; i < count; i++) {
-		state.enabled = !(i % 2);
-		pwm_apply_state(pwm, &state);
+	pwm_apply_state(pwm, &state);
 
-		edge = ktime_add_us(edge, txbuf[i]);
-		delta = ktime_us_delta(edge, ktime_get());
-		if (delta > 0)
-			usleep_range(delta, delta + 10);
-	}
+	hrtimer_start(&pwm_ir->timer, 1000, HRTIMER_MODE_REL);
 
-	state.enabled = false;
-	pwm_apply_state(pwm, &state);
+	wait_for_completion(&pwm_ir->completion);
 
 	return count;
 }
 
+static enum hrtimer_restart pwm_ir_timer(struct hrtimer *timer)
+{
+	struct pwm_ir *pwm_ir = container_of(timer, struct pwm_ir, timer);
+	ktime_t now;
+
+	/*
+	 * If we happen to hit an odd latency spike, loop through the
+	 * pulses until we catch up.
+	 */
+	do {
+		u64 ns;
+
+		if (pwm_ir->txbuf_index >= pwm_ir->txbuf_len) {
+			/* Stop TX here */
+			pwm_disable(pwm_ir->pwm);
+
+			complete(&pwm_ir->completion);
+
+			return HRTIMER_NORESTART;
+		}
+
+		if (pwm_ir->txbuf_index % 2)
+			pwm_disable(pwm_ir->pwm);
+		else
+			pwm_enable(pwm_ir->pwm);
+
+		ns = US_TO_NS(pwm_ir->txbuf[pwm_ir->txbuf_index]);
+		hrtimer_add_expires_ns(timer, ns);
+
+		pwm_ir->txbuf_index++;
+
+		now = timer->base->get_time();
+	} while (hrtimer_get_expires_tv64(timer) < now);
+
+	return HRTIMER_RESTART;
+}
+
 static int pwm_ir_probe(struct platform_device *pdev)
 {
 	struct pwm_ir *pwm_ir;
@@ -96,8 +135,16 @@  static int pwm_ir_probe(struct platform_device *pdev)
 	if (IS_ERR(pwm_ir->pwm))
 		return PTR_ERR(pwm_ir->pwm);
 
+	if (pwm_can_sleep(pwm_ir->pwm)) {
+		dev_err(&pdev->dev, "unsupported pwm device: driver can sleep\n");
+		return -ENODEV;
+	}
+
 	pwm_ir->carrier = 38000;
 	pwm_ir->duty_cycle = 50;
+	init_completion(&pwm_ir->completion);
+	hrtimer_init(&pwm_ir->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	pwm_ir->timer.function = pwm_ir_timer;
 
 	rcdev = devm_rc_allocate_device(&pdev->dev, RC_DRIVER_IR_RAW_TX);
 	if (!rcdev)