diff mbox

[v2,1/4] kvm tools: Add ioeventfd support

Message ID 1306513120-28794-1-git-send-email-levinsasha928@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Sasha Levin May 27, 2011, 4:18 p.m. UTC
ioeventfd is way provided by KVM to receive notifications about
reads and writes to PIO and MMIO areas within the guest.

Such notifications are usefull if all we need to know is that
a specific area of the memory has been changed, and we don't need
a heavyweight exit to happen.

The implementation uses epoll to scale to large number of ioeventfds.

Benchmarks ran on a seperate (non boot) 1GB virtio-blk device, formatted
as ext4, using bonnie++.

cmd line:
# bonnie++ -d temp/ -c 2 -s 768 -u 0

Before:
Version  1.96       ------Sequential Output------ --Sequential Input- --Random-
Concurrency   2     -Per Chr- --Block-- -Rewrite- -Per Chr- --Block-- --Seeks--
Machine        Size K/sec %CP K/sec %CP K/sec %CP K/sec %CP K/sec %CP  /sec %CP
tux            768M   498  99 381127  74 269712  48  2871  99 717109  50 +++++ +++
Latency             18368us   31063us   21281us    3017us    6875us     251ms
Version  1.96       ------Sequential Create------ --------Random Create--------
tux                 -Create-- --Read--- -Delete-- -Create-- --Read--- -Delete--
              files  /sec %CP  /sec %CP  /sec %CP  /sec %CP  /sec %CP  /sec %CP
                 16 +++++ +++ +++++ +++ +++++ +++ +++++ +++ +++++ +++ +++++ +++
Latency               148us     588us    2792us    1547us    1543us     218us

After:
Version  1.96       ------Sequential Output------ --Sequential Input- --Random-
Concurrency   2     -Per Chr- --Block-- -Rewrite- -Per Chr- --Block-- --Seeks--
Machine        Size K/sec %CP K/sec %CP K/sec %CP K/sec %CP K/sec %CP  /sec %CP
tux            768M   499  99 459779  73 350689  54  2997  99 860395  58 +++++ +++
Latency             17194us   14619us   26358us    4055us    7890us   44122us
Version  1.96       ------Sequential Create------ --------Random Create--------
tux                 -Create-- --Read--- -Delete-- -Create-- --Read--- -Delete--
              files  /sec %CP  /sec %CP  /sec %CP  /sec %CP  /sec %CP  /sec %CP
                 16 +++++ +++ +++++ +++ +++++ +++ +++++ +++ +++++ +++ +++++ +++
Latency               135us     567us    2564us     134us    1500us     231us

Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
---
 tools/kvm/Makefile                |    1 +
 tools/kvm/include/kvm/ioeventfd.h |   27 ++++++++
 tools/kvm/ioeventfd.c             |  128 +++++++++++++++++++++++++++++++++++++
 tools/kvm/kvm-run.c               |    4 +
 4 files changed, 160 insertions(+), 0 deletions(-)
 create mode 100644 tools/kvm/include/kvm/ioeventfd.h
 create mode 100644 tools/kvm/ioeventfd.c

Comments

Ingo Molnar May 27, 2011, 4:52 p.m. UTC | #1
* Sasha Levin <levinsasha928@gmail.com> wrote:

> Benchmarks ran on a seperate (non boot) 1GB virtio-blk device, 
> formatted as ext4, using bonnie++.
> 
> cmd line:
> # bonnie++ -d temp/ -c 2 -s 768 -u 0
> 
> Before:
> Version  1.96       ------Sequential Output------ --Sequential Input- --Random-
> Concurrency   2     -Per Chr- --Block-- -Rewrite- -Per Chr- --Block-- --Seeks--
> Machine        Size K/sec %CP K/sec %CP K/sec %CP K/sec %CP K/sec %CP  /sec %CP
> tux            768M   498  99 381127  74 269712  48  2871  99 717109  50 +++++ +++
> Latency             18368us   31063us   21281us    3017us    6875us     251ms
> Version  1.96       ------Sequential Create------ --------Random Create--------
> tux                 -Create-- --Read--- -Delete-- -Create-- --Read--- -Delete--
>               files  /sec %CP  /sec %CP  /sec %CP  /sec %CP  /sec %CP  /sec %CP
>                  16 +++++ +++ +++++ +++ +++++ +++ +++++ +++ +++++ +++ +++++ +++
> Latency               148us     588us    2792us    1547us    1543us     218us
> 
> After:
> Version  1.96       ------Sequential Output------ --Sequential Input- --Random-
> Concurrency   2     -Per Chr- --Block-- -Rewrite- -Per Chr- --Block-- --Seeks--
> Machine        Size K/sec %CP K/sec %CP K/sec %CP K/sec %CP K/sec %CP  /sec %CP
> tux            768M   499  99 459779  73 350689  54  2997  99 860395  58 +++++ +++
> Latency             17194us   14619us   26358us    4055us    7890us   44122us
> Version  1.96       ------Sequential Create------ --------Random Create--------
> tux                 -Create-- --Read--- -Delete-- -Create-- --Read--- -Delete--
>               files  /sec %CP  /sec %CP  /sec %CP  /sec %CP  /sec %CP  /sec %CP
>                  16 +++++ +++ +++++ +++ +++++ +++ +++++ +++ +++++ +++ +++++ +++
> Latency               135us     567us    2564us     134us    1500us     231us

In such cases it would *really* be useful, in addition of dumping 
80-100 raw numbers to summarize results and compare them for the 
reader, as i suspect you've done already? Please don't keep it a 
secret and don't force the reader to compare two tables with 80+ raw 
numbers! :-)

Something like:

" In short, block writes are 20% faster, block reads are 19.9% 
  faster, seeks got 16% faster. None of the operations got slower. "

Thanks,

	Ingo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 2ebc86c..e7ceb5c 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -48,6 +48,7 @@  OBJS    += irq.o
 OBJS    += rbtree.o
 OBJS    += util/rbtree-interval.o
 OBJS    += virtio/9p.o
+OBJS    += ioeventfd.o
 
 
 FLAGS_BFD=$(CFLAGS) -lbfd
diff --git a/tools/kvm/include/kvm/ioeventfd.h b/tools/kvm/include/kvm/ioeventfd.h
new file mode 100644
index 0000000..df01750
--- /dev/null
+++ b/tools/kvm/include/kvm/ioeventfd.h
@@ -0,0 +1,27 @@ 
+#ifndef KVM__IOEVENTFD_H
+#define KVM__IOEVENTFD_H
+
+#include <linux/types.h>
+#include <linux/list.h>
+#include <sys/eventfd.h>
+
+struct kvm;
+
+struct ioevent {
+	u64			io_addr;
+	u8			io_len;
+	void			(*fn)(struct kvm *kvm, void *ptr);
+	struct kvm		*fn_kvm;
+	void			*fn_ptr;
+	int			fd;
+	u64			datamatch;
+
+	struct list_head	list;
+};
+
+void ioeventfd__init(void);
+void ioeventfd__start(void);
+void ioeventfd__add_event(struct ioevent *ioevent);
+void ioeventfd__del_event(u64 addr, u64 datamatch);
+
+#endif
diff --git a/tools/kvm/ioeventfd.c b/tools/kvm/ioeventfd.c
new file mode 100644
index 0000000..3a240e4
--- /dev/null
+++ b/tools/kvm/ioeventfd.c
@@ -0,0 +1,128 @@ 
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <signal.h>
+
+#include <linux/kernel.h>
+#include <linux/kvm.h>
+#include <linux/types.h>
+
+#include "kvm/ioeventfd.h"
+#include "kvm/kvm.h"
+#include "kvm/util.h"
+
+#define IOEVENTFD_MAX_EVENTS	20
+
+static struct	epoll_event events[IOEVENTFD_MAX_EVENTS];
+static int	epoll_fd;
+static LIST_HEAD(used_ioevents);
+
+void ioeventfd__init(void)
+{
+	epoll_fd = epoll_create(IOEVENTFD_MAX_EVENTS);
+	if (epoll_fd < 0)
+		die("Failed creating epoll fd");
+}
+
+void ioeventfd__add_event(struct ioevent *ioevent)
+{
+	struct kvm_ioeventfd kvm_ioevent;
+	struct epoll_event epoll_event;
+	struct ioevent *new_ioevent;
+	int event;
+
+	new_ioevent = malloc(sizeof(*new_ioevent));
+	if (new_ioevent == NULL)
+		die("Failed allocating memory for new ioevent");
+
+	*new_ioevent = *ioevent;
+	event = new_ioevent->fd;
+
+	kvm_ioevent = (struct kvm_ioeventfd) {
+		.addr			= ioevent->io_addr,
+		.len			= ioevent->io_len,
+		.datamatch		= ioevent->datamatch,
+		.fd			= event,
+		.flags			= KVM_IOEVENTFD_FLAG_PIO | KVM_IOEVENTFD_FLAG_DATAMATCH,
+	};
+
+	if (ioctl(ioevent->fn_kvm->vm_fd, KVM_IOEVENTFD, &kvm_ioevent) != 0)
+		die("Failed creating new ioeventfd");
+
+	epoll_event = (struct epoll_event) {
+		.events			= EPOLLIN,
+		.data.ptr		= new_ioevent,
+	};
+
+	if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, event, &epoll_event) != 0)
+		die("Failed assigning new event to the epoll fd");
+
+	list_add_tail(&new_ioevent->list, &used_ioevents);
+}
+
+void ioeventfd__del_event(u64 addr, u64 datamatch)
+{
+	struct kvm_ioeventfd kvm_ioevent;
+	struct ioevent *ioevent;
+	u8 found = 0;
+
+	list_for_each_entry(ioevent, &used_ioevents, list) {
+		if (ioevent->io_addr == addr) {
+			found = 1;
+			break;
+		}
+	}
+
+	if (found == 0 || ioevent == NULL)
+		return;
+
+	kvm_ioevent = (struct kvm_ioeventfd) {
+		.addr			= ioevent->io_addr,
+		.len			= ioevent->io_len,
+		.datamatch		= ioevent->datamatch,
+		.flags			= KVM_IOEVENTFD_FLAG_PIO
+					| KVM_IOEVENTFD_FLAG_DEASSIGN
+					| KVM_IOEVENTFD_FLAG_DATAMATCH,
+	};
+
+	ioctl(ioevent->fn_kvm->vm_fd, KVM_IOEVENTFD, &kvm_ioevent);
+
+	epoll_ctl(epoll_fd, EPOLL_CTL_DEL, ioevent->fd, NULL);
+
+	list_del(&ioevent->list);
+
+	close(ioevent->fd);
+	free(ioevent);
+}
+
+static void *ioeventfd__thread(void *param)
+{
+	for (;;) {
+		int nfds, i;
+
+		nfds = epoll_wait(epoll_fd, events, IOEVENTFD_MAX_EVENTS, -1);
+		for (i = 0; i < nfds; i++) {
+			u64 tmp;
+			struct ioevent *ioevent;
+
+			ioevent = events[i].data.ptr;
+
+			if (read(ioevent->fd, &tmp, sizeof(tmp)) < 0)
+				die("Failed reading event");
+
+			ioevent->fn(ioevent->fn_kvm, ioevent->fn_ptr);
+		}
+	}
+
+	return NULL;
+}
+
+void ioeventfd__start(void)
+{
+	pthread_t thread;
+
+	if (pthread_create(&thread, NULL, ioeventfd__thread, NULL) != 0)
+		die("Failed starting ioeventfd thread");
+}
diff --git a/tools/kvm/kvm-run.c b/tools/kvm/kvm-run.c
index f384ddd..48b8e70 100644
--- a/tools/kvm/kvm-run.c
+++ b/tools/kvm/kvm-run.c
@@ -29,6 +29,7 @@ 
 #include <kvm/symbol.h>
 #include <kvm/virtio-9p.h>
 #include <kvm/vesa.h>
+#include <kvm/ioeventfd.h>
 
 /* header files for gitish interface  */
 #include <kvm/kvm-run.h>
@@ -505,6 +506,8 @@  int kvm_cmd_run(int argc, const char **argv, const char *prefix)
 
 	kvm = kvm__init(kvm_dev, ram_size);
 
+	ioeventfd__init();
+
 	max_cpus = kvm__max_cpus(kvm);
 
 	if (nrcpus > max_cpus) {
@@ -612,6 +615,7 @@  int kvm_cmd_run(int argc, const char **argv, const char *prefix)
 		vesa__init(kvm);
 
 	thread_pool__init(nr_online_cpus);
+	ioeventfd__start();
 
 	for (i = 0; i < nrcpus; i++) {
 		if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0)