Message ID | 1468513081-12002-2-git-send-email-shankerd@codeaurora.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Shanker, On 14/07/16 17:18, Shanker Donthineni wrote: > As the number of I/O handlers increase, the overhead associated with > linear lookup also increases. The system might have maximum of 144 > (assuming CONFIG_NR_CPUS=128) mmio handlers. In worst case scenario, > it would require 144 iterations for finding a matching handler. Now > it is time for us to change from linear (complexity O(n)) to a binary > search (complexity O(log n) for reducing mmio handler lookup overhead. > > Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org> > --- > Changes since v3: > Moved the function bsearch() to common file xen/common/bsearch.c. > > Changes since v2: > Converted mmio lookup code to a critical section. > Copied the function bsreach() from Linux kernel. > > xen/arch/arm/io.c | 42 +++++++++++++++++++++++++----------------- > 1 file changed, 25 insertions(+), 17 deletions(-) > > diff --git a/xen/arch/arm/io.c b/xen/arch/arm/io.c > index 40330f0..0471ba8 100644 > --- a/xen/arch/arm/io.c > +++ b/xen/arch/arm/io.c > @@ -20,6 +20,8 @@ > #include <xen/lib.h> > #include <xen/spinlock.h> > #include <xen/sched.h> > +#include <xen/sort.h> > +#include <xen/bsearch.h> > #include <asm/current.h> > #include <asm/mmio.h> > > @@ -70,27 +72,29 @@ static int handle_write(const struct mmio_handler *handler, struct vcpu *v, > handler->priv); > } > > -static const struct mmio_handler *find_mmio_handler(struct domain *d, > - paddr_t gpa) > +static int cmp_mmio_handler(const void *key, const void *elem) Would it be worth to mention in a comment that we don't support overlapping? > { > - const struct mmio_handler *handler; > - unsigned int i; > - struct vmmio *vmmio = &d->arch.vmmio; > + const struct mmio_handler *handler0 = key; > + const struct mmio_handler *handler1 = elem; > > - read_lock(&vmmio->lock); > + if ( handler0->addr < handler1->addr ) > + return -1; > > - for ( i = 0; i < vmmio->num_entries; i++ ) > - { > - handler = &vmmio->handlers[i]; > + if ( handler0->addr > (handler1->addr + handler1->size) ) > + return 1; > > - if ( (gpa >= handler->addr) && > - (gpa < (handler->addr + handler->size)) ) > - break; > - } > + return 0; > +} > > - if ( i == vmmio->num_entries ) > - handler = NULL; > +static const struct mmio_handler *find_mmio_handler(struct vcpu *v, paddr_t gpa) Why have you changed the prototype of find_mmio_handler? > +{ > + struct vmmio *vmmio = &v->domain->arch.vmmio; > + struct mmio_handler key = {.addr = gpa}; I know it is not currently the case, but should not we take into account the size of the access? > + const struct mmio_handler *handler; > > + read_lock(&vmmio->lock); > + handler = bsearch(&key, vmmio->handlers, vmmio->num_entries, > + sizeof(*handler), cmp_mmio_handler); > read_unlock(&vmmio->lock); > > return handler; > @@ -99,9 +103,9 @@ static const struct mmio_handler *find_mmio_handler(struct domain *d, > int handle_mmio(mmio_info_t *info) > { > struct vcpu *v = current; > - const struct mmio_handler *handler = NULL; > + const struct mmio_handler *handler; Why this change? > > - handler = find_mmio_handler(v->domain, info->gpa); > + handler = find_mmio_handler(v, info->gpa); ditto. > if ( !handler ) > return 0; > > @@ -131,6 +135,10 @@ void register_mmio_handler(struct domain *d, > > vmmio->num_entries++; > > + /* Sort mmio handlers in ascending order based on base address */ > + sort(vmmio->handlers, vmmio->num_entries, sizeof(struct mmio_handler), > + cmp_mmio_handler, NULL); The indentation looks wrong here. > + > write_unlock(&vmmio->lock); > } > >
Hi Julien, On 07/14/2016 11:46 AM, Julien Grall wrote: > Hi Shanker, > > On 14/07/16 17:18, Shanker Donthineni wrote: >> As the number of I/O handlers increase, the overhead associated with >> linear lookup also increases. The system might have maximum of 144 >> (assuming CONFIG_NR_CPUS=128) mmio handlers. In worst case scenario, >> it would require 144 iterations for finding a matching handler. Now >> it is time for us to change from linear (complexity O(n)) to a binary >> search (complexity O(log n) for reducing mmio handler lookup overhead. >> >> Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org> >> --- >> Changes since v3: >> Moved the function bsearch() to common file xen/common/bsearch.c. >> >> Changes since v2: >> Converted mmio lookup code to a critical section. >> Copied the function bsreach() from Linux kernel. >> >> xen/arch/arm/io.c | 42 +++++++++++++++++++++++++----------------- >> 1 file changed, 25 insertions(+), 17 deletions(-) >> >> diff --git a/xen/arch/arm/io.c b/xen/arch/arm/io.c >> index 40330f0..0471ba8 100644 >> --- a/xen/arch/arm/io.c >> +++ b/xen/arch/arm/io.c >> @@ -20,6 +20,8 @@ >> #include <xen/lib.h> >> #include <xen/spinlock.h> >> #include <xen/sched.h> >> +#include <xen/sort.h> >> +#include <xen/bsearch.h> >> #include <asm/current.h> >> #include <asm/mmio.h> >> >> @@ -70,27 +72,29 @@ static int handle_write(const struct mmio_handler >> *handler, struct vcpu *v, >> handler->priv); >> } >> >> -static const struct mmio_handler *find_mmio_handler(struct domain *d, >> - paddr_t gpa) >> +static int cmp_mmio_handler(const void *key, const void *elem) > > Would it be worth to mention in a comment that we don't support > overlapping? > Sure, I'll do. >> { >> - const struct mmio_handler *handler; >> - unsigned int i; >> - struct vmmio *vmmio = &d->arch.vmmio; >> + const struct mmio_handler *handler0 = key; >> + const struct mmio_handler *handler1 = elem; >> >> - read_lock(&vmmio->lock); >> + if ( handler0->addr < handler1->addr ) >> + return -1; >> >> - for ( i = 0; i < vmmio->num_entries; i++ ) >> - { >> - handler = &vmmio->handlers[i]; >> + if ( handler0->addr > (handler1->addr + handler1->size) ) >> + return 1; >> >> - if ( (gpa >= handler->addr) && >> - (gpa < (handler->addr + handler->size)) ) >> - break; >> - } >> + return 0; >> +} >> >> - if ( i == vmmio->num_entries ) >> - handler = NULL; >> +static const struct mmio_handler *find_mmio_handler(struct vcpu *v, >> paddr_t gpa) > > Why have you changed the prototype of find_mmio_handler? > As such there is no reason for this change, I'll revert this change in next patchset. >> +{ >> + struct vmmio *vmmio = &v->domain->arch.vmmio; >> + struct mmio_handler key = {.addr = gpa}; > > I know it is not currently the case, but should not we take into > account the size of the access? > I agree with you, we definitely need to consider size of the access for traps/emulation drivers similar to what Linux KVM code does currently. Do you know which emulation drivers are using non aligned accesses? >> + const struct mmio_handler *handler; >> >> + read_lock(&vmmio->lock); >> + handler = bsearch(&key, vmmio->handlers, vmmio->num_entries, >> + sizeof(*handler), cmp_mmio_handler); >> read_unlock(&vmmio->lock); >> >> return handler; >> @@ -99,9 +103,9 @@ static const struct mmio_handler >> *find_mmio_handler(struct domain *d, >> int handle_mmio(mmio_info_t *info) >> { >> struct vcpu *v = current; >> - const struct mmio_handler *handler = NULL; >> + const struct mmio_handler *handler; > > Why this change? > No need to initialize this local variable because the following line always initializes it. I'll revert this change anyway to avoid confusion. >> >> - handler = find_mmio_handler(v->domain, info->gpa); >> + handler = find_mmio_handler(v, info->gpa); > > ditto. > >> if ( !handler ) >> return 0; >> >> @@ -131,6 +135,10 @@ void register_mmio_handler(struct domain *d, >> >> vmmio->num_entries++; >> >> + /* Sort mmio handlers in ascending order based on base address */ >> + sort(vmmio->handlers, vmmio->num_entries, sizeof(struct >> mmio_handler), >> + cmp_mmio_handler, NULL); > > The indentation looks wrong here. > I don't quite understand what you mean. It has 8 spaces, do I need more than 8 spaces or something else? >> + >> write_unlock(&vmmio->lock); >> } >> >> >
Hello Shanker, On 15/07/16 03:35, Shanker Donthineni wrote: > On 07/14/2016 11:46 AM, Julien Grall wrote: [...] >>> +{ >>> + struct vmmio *vmmio = &v->domain->arch.vmmio; >>> + struct mmio_handler key = {.addr = gpa}; >> >> I know it is not currently the case, but should not we take into >> account the size of the access? >> > > I agree with you, we definitely need to consider size of the access for > traps/emulation drivers similar to what Linux KVM code does currently. > Do you know which emulation drivers are using non aligned accesses? Sorry, I don't understand your question. > > >>> + const struct mmio_handler *handler; >>> >>> + read_lock(&vmmio->lock); >>> + handler = bsearch(&key, vmmio->handlers, vmmio->num_entries, >>> + sizeof(*handler), cmp_mmio_handler); >>> read_unlock(&vmmio->lock); >>> >>> return handler; [...] >>> @@ -131,6 +135,10 @@ void register_mmio_handler(struct domain *d, >>> >>> vmmio->num_entries++; >>> >>> + /* Sort mmio handlers in ascending order based on base address */ >>> + sort(vmmio->handlers, vmmio->num_entries, sizeof(struct >>> mmio_handler), >>> + cmp_mmio_handler, NULL); >> >> The indentation looks wrong here. >> > > I don't quite understand what you mean. It has 8 spaces, do I need more > than 8 spaces or something else? If the list of arguments is on multiples lines, the lines should be aligned to the first arguments. I.e: sort(foo, bar, fish); Your editor should already do it. Regards,
Hi Julien, Are you okay to insert the below two validation checks before inserting mmio handler? void register_mmio_handler(struct domain *d, const struct mmio_handler_ops *ops, paddr_t addr, paddr_t size, void *priv) { struct vmmio *vmmio = &d->arch.vmmio; struct mmio_handler *handler; /* Don't allow overlap regions */ BUG_ON(find_mmio_handler(d, addr); BUG_ON(find_mmio_handler(d, addr + size - 1); BUG_ON(vmmio->num_entries >= vmmio->max_num_entries);
diff --git a/xen/arch/arm/io.c b/xen/arch/arm/io.c index 40330f0..0471ba8 100644 --- a/xen/arch/arm/io.c +++ b/xen/arch/arm/io.c @@ -20,6 +20,8 @@ #include <xen/lib.h> #include <xen/spinlock.h> #include <xen/sched.h> +#include <xen/sort.h> +#include <xen/bsearch.h> #include <asm/current.h> #include <asm/mmio.h> @@ -70,27 +72,29 @@ static int handle_write(const struct mmio_handler *handler, struct vcpu *v, handler->priv); } -static const struct mmio_handler *find_mmio_handler(struct domain *d, - paddr_t gpa) +static int cmp_mmio_handler(const void *key, const void *elem) { - const struct mmio_handler *handler; - unsigned int i; - struct vmmio *vmmio = &d->arch.vmmio; + const struct mmio_handler *handler0 = key; + const struct mmio_handler *handler1 = elem; - read_lock(&vmmio->lock); + if ( handler0->addr < handler1->addr ) + return -1; - for ( i = 0; i < vmmio->num_entries; i++ ) - { - handler = &vmmio->handlers[i]; + if ( handler0->addr > (handler1->addr + handler1->size) ) + return 1; - if ( (gpa >= handler->addr) && - (gpa < (handler->addr + handler->size)) ) - break; - } + return 0; +} - if ( i == vmmio->num_entries ) - handler = NULL; +static const struct mmio_handler *find_mmio_handler(struct vcpu *v, paddr_t gpa) +{ + struct vmmio *vmmio = &v->domain->arch.vmmio; + struct mmio_handler key = {.addr = gpa}; + const struct mmio_handler *handler; + read_lock(&vmmio->lock); + handler = bsearch(&key, vmmio->handlers, vmmio->num_entries, + sizeof(*handler), cmp_mmio_handler); read_unlock(&vmmio->lock); return handler; @@ -99,9 +103,9 @@ static const struct mmio_handler *find_mmio_handler(struct domain *d, int handle_mmio(mmio_info_t *info) { struct vcpu *v = current; - const struct mmio_handler *handler = NULL; + const struct mmio_handler *handler; - handler = find_mmio_handler(v->domain, info->gpa); + handler = find_mmio_handler(v, info->gpa); if ( !handler ) return 0; @@ -131,6 +135,10 @@ void register_mmio_handler(struct domain *d, vmmio->num_entries++; + /* Sort mmio handlers in ascending order based on base address */ + sort(vmmio->handlers, vmmio->num_entries, sizeof(struct mmio_handler), + cmp_mmio_handler, NULL); + write_unlock(&vmmio->lock); }
As the number of I/O handlers increase, the overhead associated with linear lookup also increases. The system might have maximum of 144 (assuming CONFIG_NR_CPUS=128) mmio handlers. In worst case scenario, it would require 144 iterations for finding a matching handler. Now it is time for us to change from linear (complexity O(n)) to a binary search (complexity O(log n) for reducing mmio handler lookup overhead. Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org> --- Changes since v3: Moved the function bsearch() to common file xen/common/bsearch.c. Changes since v2: Converted mmio lookup code to a critical section. Copied the function bsreach() from Linux kernel. xen/arch/arm/io.c | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-)