diff mbox

[V4,10/11] xen/arm: io: Use binary search for mmio handler lookup

Message ID 1468513081-12002-2-git-send-email-shankerd@codeaurora.org (mailing list archive)
State New, archived
Headers show

Commit Message

Shanker Donthineni July 14, 2016, 4:18 p.m. UTC
As the number of I/O handlers increase, the overhead associated with
linear lookup also increases. The system might have maximum of 144
(assuming CONFIG_NR_CPUS=128) mmio handlers. In worst case scenario,
it would require 144 iterations for finding a matching handler. Now
it is time for us to change from linear (complexity O(n)) to a binary
search (complexity O(log n) for reducing mmio handler lookup overhead.

Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
---
Changes since v3:
  Moved the function bsearch() to common file xen/common/bsearch.c.

Changes since v2:
  Converted mmio lookup code to a critical section.
  Copied the function bsreach() from Linux kernel.

 xen/arch/arm/io.c | 42 +++++++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 17 deletions(-)

Comments

Julien Grall July 14, 2016, 4:46 p.m. UTC | #1
Hi Shanker,

On 14/07/16 17:18, Shanker Donthineni wrote:
> As the number of I/O handlers increase, the overhead associated with
> linear lookup also increases. The system might have maximum of 144
> (assuming CONFIG_NR_CPUS=128) mmio handlers. In worst case scenario,
> it would require 144 iterations for finding a matching handler. Now
> it is time for us to change from linear (complexity O(n)) to a binary
> search (complexity O(log n) for reducing mmio handler lookup overhead.
>
> Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
> ---
> Changes since v3:
>    Moved the function bsearch() to common file xen/common/bsearch.c.
>
> Changes since v2:
>    Converted mmio lookup code to a critical section.
>    Copied the function bsreach() from Linux kernel.
>
>   xen/arch/arm/io.c | 42 +++++++++++++++++++++++++-----------------
>   1 file changed, 25 insertions(+), 17 deletions(-)
>
> diff --git a/xen/arch/arm/io.c b/xen/arch/arm/io.c
> index 40330f0..0471ba8 100644
> --- a/xen/arch/arm/io.c
> +++ b/xen/arch/arm/io.c
> @@ -20,6 +20,8 @@
>   #include <xen/lib.h>
>   #include <xen/spinlock.h>
>   #include <xen/sched.h>
> +#include <xen/sort.h>
> +#include <xen/bsearch.h>
>   #include <asm/current.h>
>   #include <asm/mmio.h>
>
> @@ -70,27 +72,29 @@ static int handle_write(const struct mmio_handler *handler, struct vcpu *v,
>                                  handler->priv);
>   }
>
> -static const struct mmio_handler *find_mmio_handler(struct domain *d,
> -                                                    paddr_t gpa)
> +static int cmp_mmio_handler(const void *key, const void *elem)

Would it be worth to mention in a comment that we don't support overlapping?

>   {
> -    const struct mmio_handler *handler;
> -    unsigned int i;
> -    struct vmmio *vmmio = &d->arch.vmmio;
> +    const struct mmio_handler *handler0 = key;
> +    const struct mmio_handler *handler1 = elem;
>
> -    read_lock(&vmmio->lock);
> +    if ( handler0->addr < handler1->addr )
> +        return -1;
>
> -    for ( i = 0; i < vmmio->num_entries; i++ )
> -    {
> -        handler = &vmmio->handlers[i];
> +    if ( handler0->addr > (handler1->addr + handler1->size) )
> +        return 1;
>
> -        if ( (gpa >= handler->addr) &&
> -             (gpa < (handler->addr + handler->size)) )
> -            break;
> -    }
> +    return 0;
> +}
>
> -    if ( i == vmmio->num_entries )
> -        handler = NULL;
> +static const struct mmio_handler *find_mmio_handler(struct vcpu *v, paddr_t gpa)

Why have you changed the prototype of find_mmio_handler?

> +{
> +    struct vmmio *vmmio = &v->domain->arch.vmmio;
> +    struct mmio_handler key = {.addr = gpa};

I know it is not currently the case, but should not we take into account 
the size of the access?

> +    const struct mmio_handler *handler;
>
> +    read_lock(&vmmio->lock);
> +    handler = bsearch(&key, vmmio->handlers, vmmio->num_entries,
> +                      sizeof(*handler), cmp_mmio_handler);
>       read_unlock(&vmmio->lock);
>
>       return handler;
> @@ -99,9 +103,9 @@ static const struct mmio_handler *find_mmio_handler(struct domain *d,
>   int handle_mmio(mmio_info_t *info)
>   {
>       struct vcpu *v = current;
> -    const struct mmio_handler *handler = NULL;
> +    const struct mmio_handler *handler;

Why this change?

>
> -    handler = find_mmio_handler(v->domain, info->gpa);
> +    handler = find_mmio_handler(v, info->gpa);

ditto.

>       if ( !handler )
>           return 0;
>
> @@ -131,6 +135,10 @@ void register_mmio_handler(struct domain *d,
>
>       vmmio->num_entries++;
>
> +    /* Sort mmio handlers in ascending order based on base address */
> +    sort(vmmio->handlers, vmmio->num_entries, sizeof(struct mmio_handler),
> +        cmp_mmio_handler, NULL);

The indentation looks wrong here.

> +
>       write_unlock(&vmmio->lock);
>   }
>
>
Shanker Donthineni July 15, 2016, 2:35 a.m. UTC | #2
Hi Julien,

On 07/14/2016 11:46 AM, Julien Grall wrote:
> Hi Shanker,
>
> On 14/07/16 17:18, Shanker Donthineni wrote:
>> As the number of I/O handlers increase, the overhead associated with
>> linear lookup also increases. The system might have maximum of 144
>> (assuming CONFIG_NR_CPUS=128) mmio handlers. In worst case scenario,
>> it would require 144 iterations for finding a matching handler. Now
>> it is time for us to change from linear (complexity O(n)) to a binary
>> search (complexity O(log n) for reducing mmio handler lookup overhead.
>>
>> Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
>> ---
>> Changes since v3:
>>    Moved the function bsearch() to common file xen/common/bsearch.c.
>>
>> Changes since v2:
>>    Converted mmio lookup code to a critical section.
>>    Copied the function bsreach() from Linux kernel.
>>
>>   xen/arch/arm/io.c | 42 +++++++++++++++++++++++++-----------------
>>   1 file changed, 25 insertions(+), 17 deletions(-)
>>
>> diff --git a/xen/arch/arm/io.c b/xen/arch/arm/io.c
>> index 40330f0..0471ba8 100644
>> --- a/xen/arch/arm/io.c
>> +++ b/xen/arch/arm/io.c
>> @@ -20,6 +20,8 @@
>>   #include <xen/lib.h>
>>   #include <xen/spinlock.h>
>>   #include <xen/sched.h>
>> +#include <xen/sort.h>
>> +#include <xen/bsearch.h>
>>   #include <asm/current.h>
>>   #include <asm/mmio.h>
>>
>> @@ -70,27 +72,29 @@ static int handle_write(const struct mmio_handler 
>> *handler, struct vcpu *v,
>>                                  handler->priv);
>>   }
>>
>> -static const struct mmio_handler *find_mmio_handler(struct domain *d,
>> -                                                    paddr_t gpa)
>> +static int cmp_mmio_handler(const void *key, const void *elem)
>
> Would it be worth to mention in a comment that we don't support 
> overlapping?
>

Sure, I'll do.

>>   {
>> -    const struct mmio_handler *handler;
>> -    unsigned int i;
>> -    struct vmmio *vmmio = &d->arch.vmmio;
>> +    const struct mmio_handler *handler0 = key;
>> +    const struct mmio_handler *handler1 = elem;
>>
>> -    read_lock(&vmmio->lock);
>> +    if ( handler0->addr < handler1->addr )
>> +        return -1;
>>
>> -    for ( i = 0; i < vmmio->num_entries; i++ )
>> -    {
>> -        handler = &vmmio->handlers[i];
>> +    if ( handler0->addr > (handler1->addr + handler1->size) )
>> +        return 1;
>>
>> -        if ( (gpa >= handler->addr) &&
>> -             (gpa < (handler->addr + handler->size)) )
>> -            break;
>> -    }
>> +    return 0;
>> +}
>>
>> -    if ( i == vmmio->num_entries )
>> -        handler = NULL;
>> +static const struct mmio_handler *find_mmio_handler(struct vcpu *v, 
>> paddr_t gpa)
>
> Why have you changed the prototype of find_mmio_handler?
>

As such there is no reason for this change, I'll revert this change in 
next patchset.
>> +{
>> +    struct vmmio *vmmio = &v->domain->arch.vmmio;
>> +    struct mmio_handler key = {.addr = gpa};
>
> I know it is not currently the case, but should not we take into 
> account the size of the access?
>

I agree with you, we definitely need to consider size of the access for 
traps/emulation drivers similar to what Linux KVM code does currently. 
Do you know which emulation drivers are using non aligned accesses?


>> +    const struct mmio_handler *handler;
>>
>> +    read_lock(&vmmio->lock);
>> +    handler = bsearch(&key, vmmio->handlers, vmmio->num_entries,
>> +                      sizeof(*handler), cmp_mmio_handler);
>>       read_unlock(&vmmio->lock);
>>
>>       return handler;
>> @@ -99,9 +103,9 @@ static const struct mmio_handler 
>> *find_mmio_handler(struct domain *d,
>>   int handle_mmio(mmio_info_t *info)
>>   {
>>       struct vcpu *v = current;
>> -    const struct mmio_handler *handler = NULL;
>> +    const struct mmio_handler *handler;
>
> Why this change?
>

No need to initialize this local variable because the following line 
always initializes it. I'll revert this change anyway to avoid confusion.

>>
>> -    handler = find_mmio_handler(v->domain, info->gpa);
>> +    handler = find_mmio_handler(v, info->gpa);
>
> ditto.
>
>>       if ( !handler )
>>           return 0;
>>
>> @@ -131,6 +135,10 @@ void register_mmio_handler(struct domain *d,
>>
>>       vmmio->num_entries++;
>>
>> +    /* Sort mmio handlers in ascending order based on base address */
>> +    sort(vmmio->handlers, vmmio->num_entries, sizeof(struct 
>> mmio_handler),
>> +        cmp_mmio_handler, NULL);
>
> The indentation looks wrong here.
>

I don't quite understand what you mean. It has 8 spaces, do I need more 
than 8 spaces or something else?
>> +
>>       write_unlock(&vmmio->lock);
>>   }
>>
>>
>
Julien Grall July 15, 2016, 9:52 a.m. UTC | #3
Hello Shanker,

On 15/07/16 03:35, Shanker Donthineni wrote:
> On 07/14/2016 11:46 AM, Julien Grall wrote:

[...]

>>> +{
>>> +    struct vmmio *vmmio = &v->domain->arch.vmmio;
>>> +    struct mmio_handler key = {.addr = gpa};
>>
>> I know it is not currently the case, but should not we take into
>> account the size of the access?
>>
>
> I agree with you, we definitely need to consider size of the access for
> traps/emulation drivers similar to what Linux KVM code does currently.
> Do you know which emulation drivers are using non aligned accesses?

Sorry, I don't understand your question.

>
>
>>> +    const struct mmio_handler *handler;
>>>
>>> +    read_lock(&vmmio->lock);
>>> +    handler = bsearch(&key, vmmio->handlers, vmmio->num_entries,
>>> +                      sizeof(*handler), cmp_mmio_handler);
>>>       read_unlock(&vmmio->lock);
>>>
>>>       return handler;

[...]

>>> @@ -131,6 +135,10 @@ void register_mmio_handler(struct domain *d,
>>>
>>>       vmmio->num_entries++;
>>>
>>> +    /* Sort mmio handlers in ascending order based on base address */
>>> +    sort(vmmio->handlers, vmmio->num_entries, sizeof(struct
>>> mmio_handler),
>>> +        cmp_mmio_handler, NULL);
>>
>> The indentation looks wrong here.
>>
>
> I don't quite understand what you mean. It has 8 spaces, do I need more
> than 8 spaces or something else?

If the list of arguments is on multiples lines, the lines should be 
aligned to the first arguments. I.e:

      sort(foo, bar,
           fish);

Your editor should already do it.

Regards,
Shanker Donthineni July 15, 2016, 1:18 p.m. UTC | #4
Hi Julien,

Are you okay to insert the below two validation checks before inserting 
mmio handler?

void register_mmio_handler(struct domain *d,
                            const struct mmio_handler_ops *ops,
                            paddr_t addr, paddr_t size, void *priv)
{
     struct vmmio *vmmio = &d->arch.vmmio;
     struct mmio_handler *handler;

     /* Don't allow overlap regions */
     BUG_ON(find_mmio_handler(d, addr);
     BUG_ON(find_mmio_handler(d, addr + size - 1);

     BUG_ON(vmmio->num_entries >= vmmio->max_num_entries);
diff mbox

Patch

diff --git a/xen/arch/arm/io.c b/xen/arch/arm/io.c
index 40330f0..0471ba8 100644
--- a/xen/arch/arm/io.c
+++ b/xen/arch/arm/io.c
@@ -20,6 +20,8 @@ 
 #include <xen/lib.h>
 #include <xen/spinlock.h>
 #include <xen/sched.h>
+#include <xen/sort.h>
+#include <xen/bsearch.h>
 #include <asm/current.h>
 #include <asm/mmio.h>
 
@@ -70,27 +72,29 @@  static int handle_write(const struct mmio_handler *handler, struct vcpu *v,
                                handler->priv);
 }
 
-static const struct mmio_handler *find_mmio_handler(struct domain *d,
-                                                    paddr_t gpa)
+static int cmp_mmio_handler(const void *key, const void *elem)
 {
-    const struct mmio_handler *handler;
-    unsigned int i;
-    struct vmmio *vmmio = &d->arch.vmmio;
+    const struct mmio_handler *handler0 = key;
+    const struct mmio_handler *handler1 = elem;
 
-    read_lock(&vmmio->lock);
+    if ( handler0->addr < handler1->addr )
+        return -1;
 
-    for ( i = 0; i < vmmio->num_entries; i++ )
-    {
-        handler = &vmmio->handlers[i];
+    if ( handler0->addr > (handler1->addr + handler1->size) )
+        return 1;
 
-        if ( (gpa >= handler->addr) &&
-             (gpa < (handler->addr + handler->size)) )
-            break;
-    }
+    return 0;
+}
 
-    if ( i == vmmio->num_entries )
-        handler = NULL;
+static const struct mmio_handler *find_mmio_handler(struct vcpu *v, paddr_t gpa)
+{
+    struct vmmio *vmmio = &v->domain->arch.vmmio;
+    struct mmio_handler key = {.addr = gpa};
+    const struct mmio_handler *handler;
 
+    read_lock(&vmmio->lock);
+    handler = bsearch(&key, vmmio->handlers, vmmio->num_entries,
+                      sizeof(*handler), cmp_mmio_handler);
     read_unlock(&vmmio->lock);
 
     return handler;
@@ -99,9 +103,9 @@  static const struct mmio_handler *find_mmio_handler(struct domain *d,
 int handle_mmio(mmio_info_t *info)
 {
     struct vcpu *v = current;
-    const struct mmio_handler *handler = NULL;
+    const struct mmio_handler *handler;
 
-    handler = find_mmio_handler(v->domain, info->gpa);
+    handler = find_mmio_handler(v, info->gpa);
     if ( !handler )
         return 0;
 
@@ -131,6 +135,10 @@  void register_mmio_handler(struct domain *d,
 
     vmmio->num_entries++;
 
+    /* Sort mmio handlers in ascending order based on base address */
+    sort(vmmio->handlers, vmmio->num_entries, sizeof(struct mmio_handler),
+        cmp_mmio_handler, NULL);
+
     write_unlock(&vmmio->lock);
 }