@@ -200,6 +200,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->call = NULL;
vq->log_ctx = NULL;
vq->memory = NULL;
+ vq->cached_reg = 0;
}
static int vhost_worker(void *data)
@@ -649,6 +650,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
for (i = 0; i < d->nvqs; ++i) {
mutex_lock(&d->vqs[i]->mutex);
d->vqs[i]->memory = newmem;
+ d->vqs[i]->cached_reg = 0;
mutex_unlock(&d->vqs[i]->mutex);
}
kvfree(oldmem);
@@ -936,11 +938,17 @@ done:
EXPORT_SYMBOL_GPL(vhost_dev_ioctl);
static const struct vhost_memory_region *find_region(struct vhost_memory *mem,
- __u64 addr, __u32 len)
+ __u64 addr, __u32 len,
+ int *cached_reg)
{
const struct vhost_memory_region *reg;
int start = 0, end = mem->nregions;
+ reg = mem->regions + *cached_reg;
+ if (likely(addr >= reg->guest_phys_addr &&
+ reg->guest_phys_addr + reg->memory_size > addr))
+ return reg;
+
while (start < end) {
int slot = start + (end - start) / 2;
reg = mem->regions + slot;
@@ -952,8 +960,10 @@ static const struct vhost_memory_region *find_region(struct vhost_memory *mem,
reg = mem->regions + start;
if (addr >= reg->guest_phys_addr &&
- reg->guest_phys_addr + reg->memory_size > addr)
+ reg->guest_phys_addr + reg->memory_size > addr) {
+ *cached_reg = start;
return reg;
+ }
return NULL;
}
@@ -1107,7 +1117,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
ret = -ENOBUFS;
break;
}
- reg = find_region(mem, addr, len);
+ reg = find_region(mem, addr, len, &vq->cached_reg);
if (unlikely(!reg)) {
ret = -EFAULT;
break;
@@ -106,6 +106,7 @@ struct vhost_virtqueue {
/* Log write descriptors */
void __user *log_base;
struct vhost_log *log;
+ int cached_reg;
};
struct vhost_dev {
that brings down translate_desc() cost to around 210ns if accessed descriptors are from the same memory region. Signed-off-by: Igor Mammedov <imammedo@redhat.com> --- that's what netperf/iperf workloads were during testing. --- drivers/vhost/vhost.c | 16 +++++++++++++--- drivers/vhost/vhost.h | 1 + 2 files changed, 14 insertions(+), 3 deletions(-)