diff mbox

iommu/arm-smmu-v3: Poll for CMDQ drain completion more effectively

Message ID 1493291587-23488-1-git-send-email-sunil.kovvuri@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Sunil Kovvuri April 27, 2017, 11:13 a.m. UTC
From: Sunil Goutham <sgoutham@cavium.com>

Modified polling on CMDQ consumer similar to how polling is done for TLB SYNC
completion in SMMUv2 driver. Code changes are done with reference to

8513c8930069 iommu/arm-smmu: Poll for TLB sync completion more effectively

Poll timeout has been increased which addresses issue of 100us timeout not
sufficient, when command queue is full with TLB invalidation commands.

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: Geetha <gakula@cavium.com>
---
 drivers/iommu/arm-smmu-v3.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

Comments

Sunil Kovvuri May 3, 2017, 1:19 p.m. UTC | #1
On Thu, Apr 27, 2017 at 4:43 PM,  <sunil.kovvuri@gmail.com> wrote:
> From: Sunil Goutham <sgoutham@cavium.com>
>
> Modified polling on CMDQ consumer similar to how polling is done for TLB SYNC
> completion in SMMUv2 driver. Code changes are done with reference to
>
> 8513c8930069 iommu/arm-smmu: Poll for TLB sync completion more effectively
>
> Poll timeout has been increased which addresses issue of 100us timeout not
> sufficient, when command queue is full with TLB invalidation commands.
>
> Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
> Signed-off-by: Geetha <gakula@cavium.com>
> ---
>  drivers/iommu/arm-smmu-v3.c | 15 ++++++++++++---
>  1 file changed, 12 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index d412bdd..34599d4 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -379,6 +379,9 @@
>  #define CMDQ_SYNC_0_CS_NONE            (0UL << CMDQ_SYNC_0_CS_SHIFT)
>  #define CMDQ_SYNC_0_CS_SEV             (2UL << CMDQ_SYNC_0_CS_SHIFT)
>
> +#define CMDQ_DRAIN_TIMEOUT_US          1000
> +#define CMDQ_SPIN_COUNT                        10
> +
>  /* Event queue */
>  #define EVTQ_ENT_DWORDS                        4
>  #define EVTQ_MAX_SZ_SHIFT              7
> @@ -737,7 +740,8 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
>   */
>  static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
>  {
> -       ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
> +       ktime_t timeout = ktime_add_us(ktime_get(), CMDQ_DRAIN_TIMEOUT_US);
> +       unsigned int spin_cnt, delay = 1;
>
>         while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
>                 if (ktime_compare(ktime_get(), timeout) > 0)
> @@ -746,8 +750,13 @@ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
>                 if (wfe) {
>                         wfe();
>                 } else {
> -                       cpu_relax();
> -                       udelay(1);
> +                       for (spin_cnt = 0;
> +                            spin_cnt < CMDQ_SPIN_COUNT; spin_cnt++) {
> +                               cpu_relax();
> +                               continue;
> +                       }
> +                       udelay(delay);
> +                       delay *= 2;
>                 }
>         }
>
> --
> 2.7.4
>

Sorry for the ignorance.
Is there a patchwork where I can check current status of ARM IOMMU
related patches ?

And is this patch accepted, if not any comments / feedback ?

Thanks,
Sunil.
Robin Murphy May 3, 2017, 3:33 p.m. UTC | #2
On 27/04/17 12:13, sunil.kovvuri@gmail.com wrote:
> From: Sunil Goutham <sgoutham@cavium.com>
> 
> Modified polling on CMDQ consumer similar to how polling is done for TLB SYNC
> completion in SMMUv2 driver. Code changes are done with reference to
> 
> 8513c8930069 iommu/arm-smmu: Poll for TLB sync completion more effectively
> 
> Poll timeout has been increased which addresses issue of 100us timeout not
> sufficient, when command queue is full with TLB invalidation commands.
> 
> Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
> Signed-off-by: Geetha <gakula@cavium.com>
> ---
>  drivers/iommu/arm-smmu-v3.c | 15 ++++++++++++---
>  1 file changed, 12 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index d412bdd..34599d4 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -379,6 +379,9 @@
>  #define CMDQ_SYNC_0_CS_NONE		(0UL << CMDQ_SYNC_0_CS_SHIFT)
>  #define CMDQ_SYNC_0_CS_SEV		(2UL << CMDQ_SYNC_0_CS_SHIFT)
>  
> +#define CMDQ_DRAIN_TIMEOUT_US		1000
> +#define CMDQ_SPIN_COUNT			10
> +
>  /* Event queue */
>  #define EVTQ_ENT_DWORDS			4
>  #define EVTQ_MAX_SZ_SHIFT		7
> @@ -737,7 +740,8 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
>   */
>  static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
>  {
> -	ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
> +	ktime_t timeout = ktime_add_us(ktime_get(), CMDQ_DRAIN_TIMEOUT_US);
> +	unsigned int spin_cnt, delay = 1;
>  
>  	while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
>  		if (ktime_compare(ktime_get(), timeout) > 0)
> @@ -746,8 +750,13 @@ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
>  		if (wfe) {
>  			wfe();
>  		} else {
> -			cpu_relax();
> -			udelay(1);
> +			for (spin_cnt = 0;
> +			     spin_cnt < CMDQ_SPIN_COUNT; spin_cnt++) {
> +				cpu_relax();
> +				continue;
> +			}
> +			udelay(delay);
> +			delay *= 2;

Sorry, I can't make sense of this. The referenced commit uses the spin
loop to poll opportunistically a few times before delaying. This loop
just adds a short open-coded udelay to an exponential udelay, and it's
not really clear that that's any better than a fixed udelay (especially
as the two cases in which we poll are somewhat different).

What's wrong with simply increasing the timeout value alone?

Robin.

>  		}
>  	}
>  
>
Will Deacon May 3, 2017, 3:37 p.m. UTC | #3
On Wed, May 03, 2017 at 06:49:09PM +0530, Sunil Kovvuri wrote:
> On Thu, Apr 27, 2017 at 4:43 PM,  <sunil.kovvuri@gmail.com> wrote:
> > From: Sunil Goutham <sgoutham@cavium.com>
> >
> > Modified polling on CMDQ consumer similar to how polling is done for TLB SYNC
> > completion in SMMUv2 driver. Code changes are done with reference to
> >
> > 8513c8930069 iommu/arm-smmu: Poll for TLB sync completion more effectively
> >
> > Poll timeout has been increased which addresses issue of 100us timeout not
> > sufficient, when command queue is full with TLB invalidation commands.
> >
> > Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
> > Signed-off-by: Geetha <gakula@cavium.com>
> > ---
> >  drivers/iommu/arm-smmu-v3.c | 15 ++++++++++++---
> >  1 file changed, 12 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> > index d412bdd..34599d4 100644
> > --- a/drivers/iommu/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm-smmu-v3.c
> > @@ -379,6 +379,9 @@
> >  #define CMDQ_SYNC_0_CS_NONE            (0UL << CMDQ_SYNC_0_CS_SHIFT)
> >  #define CMDQ_SYNC_0_CS_SEV             (2UL << CMDQ_SYNC_0_CS_SHIFT)
> >
> > +#define CMDQ_DRAIN_TIMEOUT_US          1000
> > +#define CMDQ_SPIN_COUNT                        10
> > +
> >  /* Event queue */
> >  #define EVTQ_ENT_DWORDS                        4
> >  #define EVTQ_MAX_SZ_SHIFT              7
> > @@ -737,7 +740,8 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
> >   */
> >  static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
> >  {
> > -       ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
> > +       ktime_t timeout = ktime_add_us(ktime_get(), CMDQ_DRAIN_TIMEOUT_US);
> > +       unsigned int spin_cnt, delay = 1;
> >
> >         while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
> >                 if (ktime_compare(ktime_get(), timeout) > 0)
> > @@ -746,8 +750,13 @@ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
> >                 if (wfe) {
> >                         wfe();
> >                 } else {
> > -                       cpu_relax();
> > -                       udelay(1);
> > +                       for (spin_cnt = 0;
> > +                            spin_cnt < CMDQ_SPIN_COUNT; spin_cnt++) {
> > +                               cpu_relax();
> > +                               continue;
> > +                       }
> > +                       udelay(delay);
> > +                       delay *= 2;
> >                 }
> >         }
> >
> > --
> > 2.7.4
> >
> 
> Sorry for the ignorance.
> Is there a patchwork where I can check current status of ARM IOMMU
> related patches ?
> 
> And is this patch accepted, if not any comments / feedback ?

Please be patient: the merge window is open and it's not been long since you
posted the patch, which looks pretty bonkers at first glance.

Will
Will Deacon May 3, 2017, 3:40 p.m. UTC | #4
On Wed, May 03, 2017 at 04:33:57PM +0100, Robin Murphy wrote:
> On 27/04/17 12:13, sunil.kovvuri@gmail.com wrote:
> > From: Sunil Goutham <sgoutham@cavium.com>
> > 
> > Modified polling on CMDQ consumer similar to how polling is done for TLB SYNC
> > completion in SMMUv2 driver. Code changes are done with reference to
> > 
> > 8513c8930069 iommu/arm-smmu: Poll for TLB sync completion more effectively
> > 
> > Poll timeout has been increased which addresses issue of 100us timeout not
> > sufficient, when command queue is full with TLB invalidation commands.
> > 
> > Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
> > Signed-off-by: Geetha <gakula@cavium.com>
> > ---
> >  drivers/iommu/arm-smmu-v3.c | 15 ++++++++++++---
> >  1 file changed, 12 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> > index d412bdd..34599d4 100644
> > --- a/drivers/iommu/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm-smmu-v3.c
> > @@ -379,6 +379,9 @@
> >  #define CMDQ_SYNC_0_CS_NONE		(0UL << CMDQ_SYNC_0_CS_SHIFT)
> >  #define CMDQ_SYNC_0_CS_SEV		(2UL << CMDQ_SYNC_0_CS_SHIFT)
> >  
> > +#define CMDQ_DRAIN_TIMEOUT_US		1000
> > +#define CMDQ_SPIN_COUNT			10
> > +
> >  /* Event queue */
> >  #define EVTQ_ENT_DWORDS			4
> >  #define EVTQ_MAX_SZ_SHIFT		7
> > @@ -737,7 +740,8 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
> >   */
> >  static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
> >  {
> > -	ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
> > +	ktime_t timeout = ktime_add_us(ktime_get(), CMDQ_DRAIN_TIMEOUT_US);
> > +	unsigned int spin_cnt, delay = 1;
> >  
> >  	while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
> >  		if (ktime_compare(ktime_get(), timeout) > 0)
> > @@ -746,8 +750,13 @@ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
> >  		if (wfe) {
> >  			wfe();
> >  		} else {
> > -			cpu_relax();
> > -			udelay(1);
> > +			for (spin_cnt = 0;
> > +			     spin_cnt < CMDQ_SPIN_COUNT; spin_cnt++) {
> > +				cpu_relax();
> > +				continue;
> > +			}
> > +			udelay(delay);
> > +			delay *= 2;
> 
> Sorry, I can't make sense of this. The referenced commit uses the spin
> loop to poll opportunistically a few times before delaying. This loop
> just adds a short open-coded udelay to an exponential udelay, and it's
> not really clear that that's any better than a fixed udelay (especially
> as the two cases in which we poll are somewhat different).
> 
> What's wrong with simply increasing the timeout value alone?

I asked that the timeout is only increased for the drain case, and that
we fix the issue here where we udelat if cons didn't move immediately:

http://lists.infradead.org/pipermail/linux-arm-kernel/2017-April/503389.html

but I don't think the patch above actually achieves any of that.

Will
Sunil Kovvuri May 3, 2017, 3:54 p.m. UTC | #5
On Wed, May 3, 2017 at 9:07 PM, Will Deacon <will.deacon@arm.com> wrote:
> On Wed, May 03, 2017 at 06:49:09PM +0530, Sunil Kovvuri wrote:
>> On Thu, Apr 27, 2017 at 4:43 PM,  <sunil.kovvuri@gmail.com> wrote:
>> > From: Sunil Goutham <sgoutham@cavium.com>
>> >
>> > Modified polling on CMDQ consumer similar to how polling is done for TLB SYNC
>> > completion in SMMUv2 driver. Code changes are done with reference to
>> >
>> > 8513c8930069 iommu/arm-smmu: Poll for TLB sync completion more effectively
>> >
>> > Poll timeout has been increased which addresses issue of 100us timeout not
>> > sufficient, when command queue is full with TLB invalidation commands.
>> >
>> > Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
>> > Signed-off-by: Geetha <gakula@cavium.com>
>> > ---
>> >  drivers/iommu/arm-smmu-v3.c | 15 ++++++++++++---
>> >  1 file changed, 12 insertions(+), 3 deletions(-)
>> >
>> > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
>> > index d412bdd..34599d4 100644
>> > --- a/drivers/iommu/arm-smmu-v3.c
>> > +++ b/drivers/iommu/arm-smmu-v3.c
>> > @@ -379,6 +379,9 @@
>> >  #define CMDQ_SYNC_0_CS_NONE            (0UL << CMDQ_SYNC_0_CS_SHIFT)
>> >  #define CMDQ_SYNC_0_CS_SEV             (2UL << CMDQ_SYNC_0_CS_SHIFT)
>> >
>> > +#define CMDQ_DRAIN_TIMEOUT_US          1000
>> > +#define CMDQ_SPIN_COUNT                        10
>> > +
>> >  /* Event queue */
>> >  #define EVTQ_ENT_DWORDS                        4
>> >  #define EVTQ_MAX_SZ_SHIFT              7
>> > @@ -737,7 +740,8 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
>> >   */
>> >  static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
>> >  {
>> > -       ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
>> > +       ktime_t timeout = ktime_add_us(ktime_get(), CMDQ_DRAIN_TIMEOUT_US);
>> > +       unsigned int spin_cnt, delay = 1;
>> >
>> >         while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
>> >                 if (ktime_compare(ktime_get(), timeout) > 0)
>> > @@ -746,8 +750,13 @@ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
>> >                 if (wfe) {
>> >                         wfe();
>> >                 } else {
>> > -                       cpu_relax();
>> > -                       udelay(1);
>> > +                       for (spin_cnt = 0;
>> > +                            spin_cnt < CMDQ_SPIN_COUNT; spin_cnt++) {
>> > +                               cpu_relax();
>> > +                               continue;
>> > +                       }
>> > +                       udelay(delay);
>> > +                       delay *= 2;
>> >                 }
>> >         }
>> >
>> > --
>> > 2.7.4
>> >
>>
>> Sorry for the ignorance.
>> Is there a patchwork where I can check current status of ARM IOMMU
>> related patches ?
>>
>> And is this patch accepted, if not any comments / feedback ?
>
> Please be patient: the merge window is open and it's not been long since you
> posted the patch, which looks pretty bonkers at first glance.
>
> Will

Look at this
https://lkml.org/lkml/2017/4/3/605
The same thing, i pinged after a week and you said you already picked it up.
All I am asking is how do i know the current status, how many days
would normally
be considered being patient ?
Instead of troubling you, is there a patchwork where i can check the status ?

Thanks,
Sunil.
Will Deacon May 3, 2017, 3:59 p.m. UTC | #6
On Wed, May 03, 2017 at 09:24:13PM +0530, Sunil Kovvuri wrote:
> On Wed, May 3, 2017 at 9:07 PM, Will Deacon <will.deacon@arm.com> wrote:
> > On Wed, May 03, 2017 at 06:49:09PM +0530, Sunil Kovvuri wrote:
> >> On Thu, Apr 27, 2017 at 4:43 PM,  <sunil.kovvuri@gmail.com> wrote:
> >> > From: Sunil Goutham <sgoutham@cavium.com>
> >> >
> >> > Modified polling on CMDQ consumer similar to how polling is done for TLB SYNC
> >> > completion in SMMUv2 driver. Code changes are done with reference to
> >> >
> >> > 8513c8930069 iommu/arm-smmu: Poll for TLB sync completion more effectively
> >> >
> >> > Poll timeout has been increased which addresses issue of 100us timeout not
> >> > sufficient, when command queue is full with TLB invalidation commands.
> >> >
> >> > Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
> >> > Signed-off-by: Geetha <gakula@cavium.com>
> >> > ---
> >> >  drivers/iommu/arm-smmu-v3.c | 15 ++++++++++++---
> >> >  1 file changed, 12 insertions(+), 3 deletions(-)
> >> >
> >> > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> >> > index d412bdd..34599d4 100644
> >> > --- a/drivers/iommu/arm-smmu-v3.c
> >> > +++ b/drivers/iommu/arm-smmu-v3.c
> >> > @@ -379,6 +379,9 @@
> >> >  #define CMDQ_SYNC_0_CS_NONE            (0UL << CMDQ_SYNC_0_CS_SHIFT)
> >> >  #define CMDQ_SYNC_0_CS_SEV             (2UL << CMDQ_SYNC_0_CS_SHIFT)
> >> >
> >> > +#define CMDQ_DRAIN_TIMEOUT_US          1000
> >> > +#define CMDQ_SPIN_COUNT                        10
> >> > +
> >> >  /* Event queue */
> >> >  #define EVTQ_ENT_DWORDS                        4
> >> >  #define EVTQ_MAX_SZ_SHIFT              7
> >> > @@ -737,7 +740,8 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
> >> >   */
> >> >  static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
> >> >  {
> >> > -       ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
> >> > +       ktime_t timeout = ktime_add_us(ktime_get(), CMDQ_DRAIN_TIMEOUT_US);
> >> > +       unsigned int spin_cnt, delay = 1;
> >> >
> >> >         while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
> >> >                 if (ktime_compare(ktime_get(), timeout) > 0)
> >> > @@ -746,8 +750,13 @@ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
> >> >                 if (wfe) {
> >> >                         wfe();
> >> >                 } else {
> >> > -                       cpu_relax();
> >> > -                       udelay(1);
> >> > +                       for (spin_cnt = 0;
> >> > +                            spin_cnt < CMDQ_SPIN_COUNT; spin_cnt++) {
> >> > +                               cpu_relax();
> >> > +                               continue;
> >> > +                       }
> >> > +                       udelay(delay);
> >> > +                       delay *= 2;
> >> >                 }
> >> >         }
> >> >
> >> > --
> >> > 2.7.4
> >> >
> >>
> >> Sorry for the ignorance.
> >> Is there a patchwork where I can check current status of ARM IOMMU
> >> related patches ?
> >>
> >> And is this patch accepted, if not any comments / feedback ?
> >
> > Please be patient: the merge window is open and it's not been long since you
> > posted the patch, which looks pretty bonkers at first glance.
> >
> > Will
> 
> Look at this
> https://lkml.org/lkml/2017/4/3/605
> The same thing, i pinged after a week and you said you already picked it up.
> All I am asking is how do i know the current status, how many days
> would normally
> be considered being patient ?

At least wait until the merge window is over if it's not a fix, or keep an
eye on the relevant branches (see below).

> Instead of troubling you, is there a patchwork where i can check the status ?

No, but I pick patches up on my iommu/devel branch here:

https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/

and at some point they appear on for-joerg/arm-smmu/updates, which I send
to Joerg (who is the iommu maintainer). He then puts them into linux-next
before they get sent for inclusion in mainline during the next merge window.

Will
Sunil Kovvuri May 3, 2017, 4:23 p.m. UTC | #7
On Wed, May 3, 2017 at 9:10 PM, Will Deacon <will.deacon@arm.com> wrote:
> On Wed, May 03, 2017 at 04:33:57PM +0100, Robin Murphy wrote:
>> On 27/04/17 12:13, sunil.kovvuri@gmail.com wrote:
>> > From: Sunil Goutham <sgoutham@cavium.com>
>> >
>> > Modified polling on CMDQ consumer similar to how polling is done for TLB SYNC
>> > completion in SMMUv2 driver. Code changes are done with reference to
>> >
>> > 8513c8930069 iommu/arm-smmu: Poll for TLB sync completion more effectively
>> >
>> > Poll timeout has been increased which addresses issue of 100us timeout not
>> > sufficient, when command queue is full with TLB invalidation commands.
>> >
>> > Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
>> > Signed-off-by: Geetha <gakula@cavium.com>
>> > ---
>> >  drivers/iommu/arm-smmu-v3.c | 15 ++++++++++++---
>> >  1 file changed, 12 insertions(+), 3 deletions(-)
>> >
>> > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
>> > index d412bdd..34599d4 100644
>> > --- a/drivers/iommu/arm-smmu-v3.c
>> > +++ b/drivers/iommu/arm-smmu-v3.c
>> > @@ -379,6 +379,9 @@
>> >  #define CMDQ_SYNC_0_CS_NONE                (0UL << CMDQ_SYNC_0_CS_SHIFT)
>> >  #define CMDQ_SYNC_0_CS_SEV         (2UL << CMDQ_SYNC_0_CS_SHIFT)
>> >
>> > +#define CMDQ_DRAIN_TIMEOUT_US              1000
>> > +#define CMDQ_SPIN_COUNT                    10
>> > +
>> >  /* Event queue */
>> >  #define EVTQ_ENT_DWORDS                    4
>> >  #define EVTQ_MAX_SZ_SHIFT          7
>> > @@ -737,7 +740,8 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
>> >   */
>> >  static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
>> >  {
>> > -   ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
>> > +   ktime_t timeout = ktime_add_us(ktime_get(), CMDQ_DRAIN_TIMEOUT_US);
>> > +   unsigned int spin_cnt, delay = 1;
>> >
>> >     while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
>> >             if (ktime_compare(ktime_get(), timeout) > 0)
>> > @@ -746,8 +750,13 @@ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
>> >             if (wfe) {
>> >                     wfe();
>> >             } else {
>> > -                   cpu_relax();
>> > -                   udelay(1);
>> > +                   for (spin_cnt = 0;
>> > +                        spin_cnt < CMDQ_SPIN_COUNT; spin_cnt++) {
>> > +                           cpu_relax();
>> > +                           continue;
>> > +                   }
>> > +                   udelay(delay);
>> > +                   delay *= 2;
>>
>> Sorry, I can't make sense of this. The referenced commit uses the spin
>> loop to poll opportunistically a few times before delaying. This loop
>> just adds a short open-coded udelay to an exponential udelay, and it's
>> not really clear that that's any better than a fixed udelay (especially
>> as the two cases in which we poll are somewhat different).
>>
>> What's wrong with simply increasing the timeout value alone?
>
> I asked that the timeout is only increased for the drain case, and that
> we fix the issue here where we udelat if cons didn't move immediately:
>
> http://lists.infradead.org/pipermail/linux-arm-kernel/2017-April/503389.html
>
> but I don't think the patch above actually achieves any of that.
>
> Will

Sorry, I completely screwed up the spin poll above.
Will resubmit.

Thanks,
Sunil.
Sunil Kovvuri May 3, 2017, 4:24 p.m. UTC | #8
On Wed, May 3, 2017 at 9:29 PM, Will Deacon <will.deacon@arm.com> wrote:
> On Wed, May 03, 2017 at 09:24:13PM +0530, Sunil Kovvuri wrote:
>> On Wed, May 3, 2017 at 9:07 PM, Will Deacon <will.deacon@arm.com> wrote:
>> > On Wed, May 03, 2017 at 06:49:09PM +0530, Sunil Kovvuri wrote:
>> >> On Thu, Apr 27, 2017 at 4:43 PM,  <sunil.kovvuri@gmail.com> wrote:
>> >> > From: Sunil Goutham <sgoutham@cavium.com>
>> >> >
>> >> > Modified polling on CMDQ consumer similar to how polling is done for TLB SYNC
>> >> > completion in SMMUv2 driver. Code changes are done with reference to
>> >> >
>> >> > 8513c8930069 iommu/arm-smmu: Poll for TLB sync completion more effectively
>> >> >
>> >> > Poll timeout has been increased which addresses issue of 100us timeout not
>> >> > sufficient, when command queue is full with TLB invalidation commands.
>> >> >
>> >> > Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
>> >> > Signed-off-by: Geetha <gakula@cavium.com>
>> >> > ---
>> >> >  drivers/iommu/arm-smmu-v3.c | 15 ++++++++++++---
>> >> >  1 file changed, 12 insertions(+), 3 deletions(-)
>> >> >
>> >> > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
>> >> > index d412bdd..34599d4 100644
>> >> > --- a/drivers/iommu/arm-smmu-v3.c
>> >> > +++ b/drivers/iommu/arm-smmu-v3.c
>> >> > @@ -379,6 +379,9 @@
>> >> >  #define CMDQ_SYNC_0_CS_NONE            (0UL << CMDQ_SYNC_0_CS_SHIFT)
>> >> >  #define CMDQ_SYNC_0_CS_SEV             (2UL << CMDQ_SYNC_0_CS_SHIFT)
>> >> >
>> >> > +#define CMDQ_DRAIN_TIMEOUT_US          1000
>> >> > +#define CMDQ_SPIN_COUNT                        10
>> >> > +
>> >> >  /* Event queue */
>> >> >  #define EVTQ_ENT_DWORDS                        4
>> >> >  #define EVTQ_MAX_SZ_SHIFT              7
>> >> > @@ -737,7 +740,8 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
>> >> >   */
>> >> >  static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
>> >> >  {
>> >> > -       ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
>> >> > +       ktime_t timeout = ktime_add_us(ktime_get(), CMDQ_DRAIN_TIMEOUT_US);
>> >> > +       unsigned int spin_cnt, delay = 1;
>> >> >
>> >> >         while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
>> >> >                 if (ktime_compare(ktime_get(), timeout) > 0)
>> >> > @@ -746,8 +750,13 @@ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
>> >> >                 if (wfe) {
>> >> >                         wfe();
>> >> >                 } else {
>> >> > -                       cpu_relax();
>> >> > -                       udelay(1);
>> >> > +                       for (spin_cnt = 0;
>> >> > +                            spin_cnt < CMDQ_SPIN_COUNT; spin_cnt++) {
>> >> > +                               cpu_relax();
>> >> > +                               continue;
>> >> > +                       }
>> >> > +                       udelay(delay);
>> >> > +                       delay *= 2;
>> >> >                 }
>> >> >         }
>> >> >
>> >> > --
>> >> > 2.7.4
>> >> >
>> >>
>> >> Sorry for the ignorance.
>> >> Is there a patchwork where I can check current status of ARM IOMMU
>> >> related patches ?
>> >>
>> >> And is this patch accepted, if not any comments / feedback ?
>> >
>> > Please be patient: the merge window is open and it's not been long since you
>> > posted the patch, which looks pretty bonkers at first glance.
>> >
>> > Will
>>
>> Look at this
>> https://lkml.org/lkml/2017/4/3/605
>> The same thing, i pinged after a week and you said you already picked it up.
>> All I am asking is how do i know the current status, how many days
>> would normally
>> be considered being patient ?
>
> At least wait until the merge window is over if it's not a fix, or keep an
> eye on the relevant branches (see below).
>
>> Instead of troubling you, is there a patchwork where i can check the status ?
>
> No, but I pick patches up on my iommu/devel branch here:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/
>
> and at some point they appear on for-joerg/arm-smmu/updates, which I send
> to Joerg (who is the iommu maintainer). He then puts them into linux-next
> before they get sent for inclusion in mainline during the next merge window.
>
> Will

Thanks for the info.

Sunil.
diff mbox

Patch

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index d412bdd..34599d4 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -379,6 +379,9 @@ 
 #define CMDQ_SYNC_0_CS_NONE		(0UL << CMDQ_SYNC_0_CS_SHIFT)
 #define CMDQ_SYNC_0_CS_SEV		(2UL << CMDQ_SYNC_0_CS_SHIFT)
 
+#define CMDQ_DRAIN_TIMEOUT_US		1000
+#define CMDQ_SPIN_COUNT			10
+
 /* Event queue */
 #define EVTQ_ENT_DWORDS			4
 #define EVTQ_MAX_SZ_SHIFT		7
@@ -737,7 +740,8 @@  static void queue_inc_prod(struct arm_smmu_queue *q)
  */
 static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
 {
-	ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
+	ktime_t timeout = ktime_add_us(ktime_get(), CMDQ_DRAIN_TIMEOUT_US);
+	unsigned int spin_cnt, delay = 1;
 
 	while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
 		if (ktime_compare(ktime_get(), timeout) > 0)
@@ -746,8 +750,13 @@  static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
 		if (wfe) {
 			wfe();
 		} else {
-			cpu_relax();
-			udelay(1);
+			for (spin_cnt = 0;
+			     spin_cnt < CMDQ_SPIN_COUNT; spin_cnt++) {
+				cpu_relax();
+				continue;
+			}
+			udelay(delay);
+			delay *= 2;
 		}
 	}