diff mbox

kvm tools: Add ivshmem device

Message ID 1314620700-3852-1-git-send-email-levinsasha928@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Sasha Levin Aug. 29, 2011, 12:25 p.m. UTC
The patch adds an ivshmem device which can be used to share memory between
guests on the same host.

This implementation is lacking inter-guest communication which should be
implemented later once information regarding the client-server protocol is
gathered, though infrastructure used to add and remove clients already exists
in the patch (but isn't used anywhere).

Patch is based on David Evansky's shmem device.

Signed-off-by: David Evensky <evensky@sandia.gov>
Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
---
 tools/kvm/Makefile                     |    1 +
 tools/kvm/builtin-run.c                |  134 ++++++++++++++++
 tools/kvm/hw/pci-shmem.c               |  266 ++++++++++++++++++++++++++++++++
 tools/kvm/include/kvm/pci-shmem.h      |   28 ++++
 tools/kvm/include/kvm/pci.h            |    3 +-
 tools/kvm/include/kvm/virtio-pci-dev.h |    3 +
 tools/kvm/pci.c                        |    5 +-
 tools/kvm/virtio/pci.c                 |    4 +-
 8 files changed, 438 insertions(+), 6 deletions(-)
 create mode 100644 tools/kvm/hw/pci-shmem.c
 create mode 100644 tools/kvm/include/kvm/pci-shmem.h
diff mbox

Patch

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 25cbd7e..efa032d 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -81,6 +81,7 @@  OBJS	+= virtio/9p.o
 OBJS	+= virtio/9p-pdu.o
 OBJS	+= hw/vesa.o
 OBJS	+= hw/i8042.o
+OBJS	+= hw/pci-shmem.o
 
 FLAGS_BFD := $(CFLAGS) -lbfd
 has_bfd := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD))
diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c
index 38612b6..b9efde2 100644
--- a/tools/kvm/builtin-run.c
+++ b/tools/kvm/builtin-run.c
@@ -28,6 +28,7 @@ 
 #include "kvm/sdl.h"
 #include "kvm/vnc.h"
 #include "kvm/guest_compat.h"
+#include "kvm/pci-shmem.h"
 
 #include <linux/types.h>
 
@@ -52,6 +53,8 @@ 
 #define DEFAULT_SCRIPT		"none"
 
 #define MB_SHIFT		(20)
+#define KB_SHIFT		(10)
+#define GB_SHIFT		(30)
 #define MIN_RAM_SIZE_MB		(64ULL)
 #define MIN_RAM_SIZE_BYTE	(MIN_RAM_SIZE_MB << MB_SHIFT)
 
@@ -151,6 +154,131 @@  static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, i
 	return 0;
 }
 
+static int shmem_parser(const struct option *opt, const char *arg, int unset)
+{
+	const uint64_t default_size = SHMEM_DEFAULT_SIZE;
+	const uint64_t default_phys_addr = SHMEM_DEFAULT_ADDR;
+	const char *default_handle = SHMEM_DEFAULT_HANDLE;
+	struct shmem_info *si = malloc(sizeof(struct shmem_info));
+	enum { PCI, UNK } addr_type = PCI;
+	uint64_t phys_addr;
+	uint64_t size;
+	char *handle = NULL;
+	int create = 0;
+	const char *p = arg;
+	char *next;
+	int base = 10;
+	int verbose = 0;
+
+	const int skip_pci = strlen("pci:");
+	if (verbose)
+		pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset);
+	/* parse out optional addr family */
+	if (strcasestr(p, "pci:")) {
+		p += skip_pci;
+		addr_type = PCI;
+	} else if (strcasestr(p, "mem:")) {
+		die("I can't add to E820 map yet.\n");
+	}
+	/* parse out physical addr */
+	base = 10;
+	if (strcasestr(p, "0x"))
+		base = 16;
+	phys_addr = strtoll(p, &next, base);
+	if (next == p && phys_addr == 0) {
+		pr_info("shmem: no physical addr specified, using default.");
+		phys_addr = default_phys_addr;
+	}
+	if (*next != ':' && *next != '\0')
+		die("shmem: unexpected chars after phys addr.\n");
+	if (*next == '\0')
+		p = next;
+	else
+		p = next + 1;
+	/* parse out size */
+	base = 10;
+	if (strcasestr(p, "0x"))
+		base = 16;
+	size = strtoll(p, &next, base);
+	if (next == p && size == 0) {
+		pr_info("shmem: no size specified, using default.");
+		size = default_size;
+	}
+	/* look for [KMGkmg][Bb]*  uses base 2. */
+	int skip_B = 0;
+	if (strspn(next, "KMGkmg")) {	/* might have a prefix */
+		if (*(next + 1) == 'B' || *(next + 1) == 'b')
+			skip_B = 1;
+		switch (*next) {
+		case 'K':
+		case 'k':
+			size = size << KB_SHIFT;
+			break;
+		case 'M':
+		case 'm':
+			size = size << MB_SHIFT;
+			break;
+		case 'G':
+		case 'g':
+			size = size << GB_SHIFT;
+			break;
+		default:
+			die("shmem: bug in detecting size prefix.");
+			break;
+		}
+		next += 1 + skip_B;
+	}
+	if (*next != ':' && *next != '\0') {
+		die("shmem: unexpected chars after phys size. <%c><%c>\n",
+		    *next, *p);
+	}
+	if (*next == '\0')
+		p = next;
+	else
+		p = next + 1;
+	/* parse out optional shmem handle */
+	const int skip_handle = strlen("handle=");
+	next = strcasestr(p, "handle=");
+	if (*p && next) {
+		if (p != next)
+			die("unexpected chars before handle\n");
+		p += skip_handle;
+		next = strchrnul(p, ':');
+		if (next - p) {
+			handle = malloc(next - p + 1);
+			strncpy(handle, p, next - p);
+			handle[next - p] = '\0';	/* just in case. */
+		}
+		if (*next == '\0')
+			p = next;
+		else
+			p = next + 1;
+	}
+	/* parse optional create flag to see if we should create shm seg. */
+	if (*p && strcasestr(p, "create")) {
+		create = 1;
+		p += strlen("create");
+	}
+	if (*p != '\0')
+		die("shmem: unexpected trailing chars\n");
+	if (handle == NULL) {
+		handle = malloc(strlen(default_handle) + 1);
+		strcpy(handle, default_handle);
+	}
+	if (verbose) {
+		pr_info("shmem: phys_addr = %lx", phys_addr);
+		pr_info("shmem: size      = %lx", size);
+		pr_info("shmem: handle    = %s", handle);
+		pr_info("shmem: create    = %d", create);
+	}
+
+	si->phys_addr = phys_addr;
+	si->size = size;
+	si->handle = handle;
+	si->create = create;
+	pci_shmem__register_mem(si);	/* ownership of si, etc. passed on. */
+	return 0;
+}
 
 static const struct option options[] = {
 	OPT_GROUP("Basic options:"),
@@ -158,6 +286,10 @@  static const struct option options[] = {
 			"A name for the guest"),
 	OPT_INTEGER('c', "cpus", &nrcpus, "Number of CPUs"),
 	OPT_U64('m', "mem", &ram_size, "Virtual machine memory size in MiB."),
+	OPT_CALLBACK('\0', "shmem", NULL,
+		     "[pci:]<addr>:<size>[:handle=<handle>][:create]",
+		     "Share host shmem with guest via pci device",
+		     shmem_parser),
 	OPT_CALLBACK('d', "disk", NULL, "image or rootfs_dir", "Disk image or rootfs directory", img_name_parser),
 	OPT_BOOLEAN('\0', "balloon", &balloon, "Enable virtio balloon"),
 	OPT_BOOLEAN('\0', "vnc", &vnc, "Enable VNC framebuffer"),
@@ -695,6 +827,8 @@  int kvm_cmd_run(int argc, const char **argv, const char *prefix)
 
 	kbd__init(kvm);
 
+	pci_shmem__init(kvm);
+
 	if (vnc || sdl)
 		fb = vesa__init(kvm);
 
diff --git a/tools/kvm/hw/pci-shmem.c b/tools/kvm/hw/pci-shmem.c
new file mode 100644
index 0000000..295f5cd
--- /dev/null
+++ b/tools/kvm/hw/pci-shmem.c
@@ -0,0 +1,266 @@ 
+#include "kvm/pci-shmem.h"
+#include "kvm/virtio-pci-dev.h"
+#include "kvm/irq.h"
+#include "kvm/kvm.h"
+#include "kvm/pci.h"
+#include "kvm/util.h"
+#include "kvm/ioport.h"
+#include "kvm/ioeventfd.h"
+
+#include <linux/kvm.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+static struct pci_device_header pci_shmem_pci_device = {
+	.vendor_id	= PCI_VENDOR_ID_REDHAT_QUMRANET,
+	.device_id	= 0x1110,
+	.header_type	= PCI_HEADER_TYPE_NORMAL,
+	.class		= 0xFF0000,	/* misc pci device */
+	.status		= PCI_STATUS_CAP_LIST,
+	.capabilities	= (void *)&pci_shmem_pci_device.msix - (void *)&pci_shmem_pci_device,
+	.msix.cap	= PCI_CAP_ID_MSIX,
+	.msix.ctrl	= 1,
+	.msix.table_offset = 1,		/* Use BAR 1 */
+	.msix.pba_offset = 0x1001,	/* Use BAR 1 */
+};
+
+/* registers for the Inter-VM shared memory device */
+enum ivshmem_registers {
+	INTRMASK = 0,
+	INTRSTATUS = 4,
+	IVPOSITION = 8,
+	DOORBELL = 12,
+};
+
+static struct shmem_info *shmem_region;
+static u16 ivshmem_registers;
+static int local_fd;
+static u32 local_id;
+static u64 msix_block;
+static u64 msix_pba;
+static struct msix_table msix_table[2];
+
+int pci_shmem__register_mem(struct shmem_info *si)
+{
+	if (shmem_region == NULL) {
+		shmem_region = si;
+	} else {
+		pr_warning("only single shmem currently avail. ignoring.\n");
+		free(si);
+	}
+	return 0;
+}
+
+static bool shmem_pci__io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size)
+{
+	u16 offset = port - ivshmem_registers;
+
+	switch (offset) {
+	case INTRMASK:
+		break;
+	case INTRSTATUS:
+		break;
+	case IVPOSITION:
+		ioport__write32(data, local_id);
+		break;
+	case DOORBELL:
+		break;
+	};
+
+	return true;
+}
+
+static bool shmem_pci__io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size)
+{
+	u16 offset = port - ivshmem_registers;
+
+	switch (offset) {
+	case INTRMASK:
+		break;
+	case INTRSTATUS:
+		break;
+	case IVPOSITION:
+		break;
+	case DOORBELL:
+		break;
+	};
+
+	return true;
+}
+
+static struct ioport_operations shmem_pci__io_ops = {
+	.io_in	= shmem_pci__io_in,
+	.io_out	= shmem_pci__io_out,
+};
+
+static void callback_mmio_msix(u64 addr, u8 *data, u32 len, u8 is_write, void *ptr)
+{
+	void *mem;
+
+	if (addr - msix_block < 0x1000)
+		mem = &msix_table;
+	else
+		mem = &msix_pba;
+
+	if (is_write)
+		memcpy(mem + addr - msix_block, data, len);
+	else
+		memcpy(data, mem + addr - msix_block, len);
+}
+
+/*
+ * Return an irqfd which can be used by other guests to signal this guest
+ * whenever they need to poke it
+ */
+int pci_shmem__get_local_irqfd(struct kvm *kvm)
+{
+	int fd, gsi, r;
+	struct kvm_irqfd irqfd;
+
+	if (local_fd == 0) {
+		fd = eventfd(0, 0);
+		if (fd < 0)
+			return fd;
+
+		if (pci_shmem_pci_device.msix.ctrl & PCI_MSIX_FLAGS_ENABLE) {
+			gsi = irq__add_msix_route(kvm,
+				  msix_table[0].low,
+				  msix_table[0].high,
+				  msix_table[0].data);
+		} else {
+			gsi = pci_shmem_pci_device.irq_line;
+		}
+
+		irqfd = (struct kvm_irqfd) {
+			.fd = fd,
+			.gsi = gsi,
+		};
+
+		r = ioctl(kvm->vm_fd, KVM_IRQFD, &irqfd);
+		if (r < 0)
+			return r;
+
+		local_fd = fd;
+	}
+
+	return local_fd;
+}
+
+/*
+ * Connect a new client to ivshmem by adding the appropriate datamatch
+ * to the DOORBELL
+ */
+int pci_shmem__add_client(struct kvm *kvm, u32 id, int fd)
+{
+	struct kvm_ioeventfd ioevent;
+
+	ioevent = (struct kvm_ioeventfd) {
+		.addr		= ivshmem_registers + DOORBELL,
+		.len		= sizeof(u32),
+		.datamatch	= id,
+		.fd		= fd,
+		.flags		= KVM_IOEVENTFD_FLAG_PIO | KVM_IOEVENTFD_FLAG_DATAMATCH,
+	};
+
+	return ioctl(kvm->vm_fd, KVM_IOEVENTFD, &ioevent);
+}
+
+/*
+ * Remove a client connected to ivshmem by removing the appropriate datamatch
+ * from the DOORBELL
+ */
+int pci_shmem__remove_client(struct kvm *kvm, u32 id)
+{
+	struct kvm_ioeventfd ioevent;
+
+	ioevent = (struct kvm_ioeventfd) {
+		.addr		= ivshmem_registers + DOORBELL,
+		.len		= sizeof(u32),
+		.datamatch	= id,
+		.flags		= KVM_IOEVENTFD_FLAG_PIO
+				| KVM_IOEVENTFD_FLAG_DATAMATCH
+				| KVM_IOEVENTFD_FLAG_DEASSIGN,
+	};
+
+	return ioctl(kvm->vm_fd, KVM_IOEVENTFD, &ioevent);
+}
+
+static void *setup_shmem(const char *key, size_t len, int creating)
+{
+	int fd;
+	int rtn;
+	void *mem;
+	int flag = O_RDWR;
+
+	if (creating)
+		flag |= O_CREAT;
+
+	fd = shm_open(key, flag, S_IRUSR | S_IWUSR);
+	if (fd < 0) {
+		pr_warning("Failed to open shared memory file %s\n", key);
+		return NULL;
+	}
+
+	if (creating) {
+		rtn = ftruncate(fd, (off_t) len);
+		if (rtn < 0)
+			pr_warning("Can't ftruncate(fd,%ld)\n", len);
+	}
+	mem = mmap(NULL, len,
+		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NORESERVE, fd, 0);
+	close(fd);
+
+	if (mem == NULL)
+		pr_warning("Failed to mmap shared memory file");
+
+	return mem;
+}
+
+int pci_shmem__init(struct kvm *kvm)
+{
+	u8 dev, line, pin;
+	char *mem;
+
+	if (shmem_region == 0) {
+		pr_warning("pci_shmem_init: memory region not registered\n");
+		return 0;
+	}
+
+	/* Register good old INTx */
+	if (irq__register_device(PCI_DEVICE_ID_PCI_SHMEM, &dev, &pin, &line) < 0)
+		return 0;
+
+	pci_shmem_pci_device.irq_pin = pin;
+	pci_shmem_pci_device.irq_line = line;
+
+	/* Register MMIO space for MSI-X */
+	ivshmem_registers = ioport__register(IOPORT_EMPTY, &shmem_pci__io_ops, IOPORT_SIZE, NULL);
+	msix_block = pci_get_io_space_block(0x1010);
+	kvm__register_mmio(kvm, msix_block, 0x1010, callback_mmio_msix, NULL);
+
+	/*
+	 * This registers 3 BARs:
+	 *
+	 * 0 - ivshmem registers
+	 * 1 - MSI-X MMIO space
+	 * 2 - Shared memory block
+	 */
+	pci_shmem_pci_device.bar[0] = ivshmem_registers | PCI_BASE_ADDRESS_SPACE_IO;
+	pci_shmem_pci_device.bar_size[0] = shmem_region->size;
+	pci_shmem_pci_device.bar[1] = msix_block | PCI_BASE_ADDRESS_SPACE_MEMORY;
+	pci_shmem_pci_device.bar_size[1] = 0x1010;
+	pci_shmem_pci_device.bar[2] = shmem_region->phys_addr | PCI_BASE_ADDRESS_SPACE_MEMORY;
+	pci_shmem_pci_device.bar_size[2] = shmem_region->size;
+
+	pci__register(&pci_shmem_pci_device, dev);
+
+	/* Open shared memory and plug it into the guest */
+	mem = setup_shmem(shmem_region->handle, shmem_region->size,
+				shmem_region->create);
+	if (mem == NULL)
+		return 0;
+	kvm__register_mem(kvm, shmem_region->phys_addr, shmem_region->size,
+			  mem);
+	return 1;
+}
diff --git a/tools/kvm/include/kvm/pci-shmem.h b/tools/kvm/include/kvm/pci-shmem.h
new file mode 100644
index 0000000..599ab37
--- /dev/null
+++ b/tools/kvm/include/kvm/pci-shmem.h
@@ -0,0 +1,28 @@ 
+#ifndef KVM__PCI_SHMEM_H
+#define KVM__PCI_SHMEM_H
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+#define SHMEM_DEFAULT_SIZE (16 << MB_SHIFT)
+#define SHMEM_DEFAULT_ADDR (0xc8000000)
+#define SHMEM_DEFAULT_HANDLE "/kvm_shmem"
+
+struct kvm;
+struct shmem_info;
+
+struct shmem_info {
+	u64 phys_addr;
+	u64 size;
+	char *handle;
+	int create;
+};
+
+int pci_shmem__init(struct kvm *self);
+int pci_shmem__register_mem(struct shmem_info *si);
+
+int pci_shmem__get_local_irqfd(struct kvm *kvm);
+int pci_shmem__add_client(struct kvm *kvm, u32 id, int fd);
+int pci_shmem__remove_client(struct kvm *kvm, u32 id);
+
+#endif
diff --git a/tools/kvm/include/kvm/pci.h b/tools/kvm/include/kvm/pci.h
index e74d3ec..5ee8005 100644
--- a/tools/kvm/include/kvm/pci.h
+++ b/tools/kvm/include/kvm/pci.h
@@ -13,6 +13,7 @@ 
 #define PCI_CONFIG_ADDRESS	0xcf8
 #define PCI_CONFIG_DATA		0xcfc
 #define PCI_CONFIG_BUS_FORWARD	0xcfa
+#define PCI_IO_SIZE		0x100
 
 struct pci_config_address {
 	unsigned	zeros		: 2;		/* 1  .. 0  */
@@ -69,6 +70,6 @@  struct pci_device_header {
 
 void pci__init(void);
 void pci__register(struct pci_device_header *dev, u8 dev_num);
-u32 pci_get_io_space_block(void);
+u32 pci_get_io_space_block(u32 size);
 
 #endif /* KVM__PCI_H */
diff --git a/tools/kvm/include/kvm/virtio-pci-dev.h b/tools/kvm/include/kvm/virtio-pci-dev.h
index 4eee831..bfcb076 100644
--- a/tools/kvm/include/kvm/virtio-pci-dev.h
+++ b/tools/kvm/include/kvm/virtio-pci-dev.h
@@ -15,10 +15,13 @@ 
 #define PCI_DEVICE_ID_VIRTIO_BLN		0x1005
 #define PCI_DEVICE_ID_VIRTIO_P9			0x1009
 #define PCI_DEVICE_ID_VESA			0x2000
+#define PCI_DEVICE_ID_PCI_SHMEM			0x0001
 
 #define PCI_VENDOR_ID_REDHAT_QUMRANET		0x1af4
+#define PCI_VENDOR_ID_PCI_SHMEM			0x0001
 #define PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET	0x1af4
 
 #define PCI_SUBSYSTEM_ID_VESA			0x0004
+#define PCI_SUBSYSTEM_ID_PCI_SHMEM		0x0001
 
 #endif /* VIRTIO_PCI_DEV_H_ */
diff --git a/tools/kvm/pci.c b/tools/kvm/pci.c
index a046c19..d1afc05 100644
--- a/tools/kvm/pci.c
+++ b/tools/kvm/pci.c
@@ -6,7 +6,6 @@ 
 #include <assert.h>
 
 #define PCI_MAX_DEVICES			256
-#define PCI_IO_SIZE			0x100
 #define PCI_BAR_OFFSET(b)		(offsetof(struct pci_device_header, bar[b]))
 
 static struct pci_device_header		*pci_devices[PCI_MAX_DEVICES];
@@ -16,10 +15,10 @@  static struct pci_config_address	pci_config_address;
 /* This is within our PCI gap - in an unused area */
 static u32 io_space_blocks		= KVM_32BIT_GAP_START + 0x1000000;
 
-u32 pci_get_io_space_block(void)
+u32 pci_get_io_space_block(u32 size)
 {
 	u32 block = io_space_blocks;
-	io_space_blocks += PCI_IO_SIZE;
+	io_space_blocks += size;
 
 	return block;
 }
diff --git a/tools/kvm/virtio/pci.c b/tools/kvm/virtio/pci.c
index 9e2a1ed..2745b25 100644
--- a/tools/kvm/virtio/pci.c
+++ b/tools/kvm/virtio/pci.c
@@ -266,8 +266,8 @@  int virtio_pci__init(struct kvm *kvm, struct virtio_pci *vpci, void *dev,
 	u8 pin, line, ndev;
 
 	vpci->dev = dev;
-	vpci->msix_io_block = pci_get_io_space_block();
-	vpci->msix_pba_block = pci_get_io_space_block();
+	vpci->msix_io_block = pci_get_io_space_block(PCI_IO_SIZE);
+	vpci->msix_pba_block = pci_get_io_space_block(PCI_IO_SIZE);
 
 	vpci->base_addr = ioport__register(IOPORT_EMPTY, &virtio_pci__io_ops, IOPORT_SIZE, vpci);
 	kvm__register_mmio(kvm, vpci->msix_io_block, 0x100, callback_mmio_table, vpci);