Message ID | 64276bb78fce32a6ddf4356bf29632af1ef2c785.1571905346.git.jag.raman@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Initial support of multi-process qemu | expand |
On Thu, Oct 24, 2019 at 05:08:54AM -0400, Jagannathan Raman wrote: > diff --git a/hw/proxy/qemu-proxy.c b/hw/proxy/qemu-proxy.c > new file mode 100644 > index 0000000..baba4da > --- /dev/null > +++ b/hw/proxy/qemu-proxy.c > @@ -0,0 +1,247 @@ > +/* > + * Copyright 2019, Oracle and/or its affiliates. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a copy > + * of this software and associated documentation files (the "Software"), to deal > + * in the Software without restriction, including without limitation the rights > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > + * copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN > + * THE SOFTWARE. > + */ > + > +#include <stdio.h> > +#include <stdlib.h> > +#include <sys/types.h> > +#include <sys/socket.h> > +#include <unistd.h> > +#include <assert.h> > +#include <string.h> > +#include "qemu/osdep.h" Most of these includes are not necessary. Please see "Include directives" in CODING_STYLE.rst. "qemu/osdep.h" is always first (even before system headers) and it already includes the common system headers. > +int remote_spawn(PCIProxyDev *pdev, const char *command, Error **errp) > +{ > + char *args[3]; > + pid_t rpid; > + int fd[2] = {-1, -1}; > + Error *local_error = NULL; > + > + if (pdev->managed) { > + /* Child is forked by external program (such as libvirt). */ > + return -1; > + } > + > + if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd)) { > + error_setg(errp, "Unable to create unix socket."); > + return -1; > + } > + /* TODO: Restrict the forked process' permissions and capabilities. */ > + rpid = qemu_fork(&local_error); > + > + if (rpid == -1) { > + error_setg(errp, "Unable to spawn emulation program."); > + close(fd[0]); > + close(fd[1]); > + return -1; > + } > + > + if (rpid == 0) { > + close(fd[0]); > + > + args[0] = g_strdup(command); > + args[1] = g_strdup_printf("%d", fd[1]); > + args[2] = NULL; > + execvp(args[0], (char *const *)args); execv(3) is safer because it doesn't search PATH. Unless searching PATH is really needed I would use that instead just in case this is ever deployed in an environment where an attacker controls a directory in PATH or is able to set PATH. > +static int config_op_send(PCIProxyDev *dev, uint32_t addr, uint32_t *val, int l, > + unsigned int op) > +{ > + MPQemuMsg msg; > + struct conf_data_msg conf_data; > + int wait; > + > + memset(&msg, 0, sizeof(MPQemuMsg)); > + conf_data.addr = addr; > + conf_data.val = (op == CONF_WRITE) ? *val : 0; > + conf_data.l = l; > + > + msg.data2 = (uint8_t *)malloc(sizeof(conf_data)); > + if (!msg.data2) { > + return -ENOMEM; > + } > + > + memcpy(msg.data2, (const uint8_t *)&conf_data, sizeof(conf_data)); > + msg.size = sizeof(conf_data); Why malloc msg.data2 instead of simply pointing it at conf_data? > + msg.cmd = op; > + msg.bytestream = 1; > + > + if (op == CONF_WRITE) { > + msg.num_fds = 0; > + } else { > + wait = GET_REMOTE_WAIT; It seems slow to create an fd and pass it for each 32-bit PCI Configuration Space read operation. This doesn't need to be changed right now, but eventually the protocol should handle this more efficiently. > + msg.num_fds = 1; > + msg.fds[0] = wait; > + } > + > + mpqemu_msg_send(dev->mpqemu_link, &msg, dev->mpqemu_link->com); > + > + if (op == CONF_READ) { > + *val = (uint32_t)wait_for_remote(wait); > + PUT_REMOTE_WAIT(wait); > + } Waiting for the eventfd blocks the event loop. This means timers and other fds won't be serviced when the remote is slow to respond. Please avoid blocking operations in event loop threads. > + > + free(msg.data2); > + > + return 0; > +} > + > +static uint32_t pci_proxy_read_config(PCIDevice *d, uint32_t addr, int len) > +{ > + uint32_t val; > + > + (void)pci_default_read_config(d, addr, len); What is the purpose of this call? > +static const TypeInfo pci_proxy_dev_type_info = { > + .name = TYPE_PCI_PROXY_DEV, > + .parent = TYPE_PCI_DEVICE, > + .instance_size = sizeof(PCIProxyDev), > + .abstract = true, > + .class_size = sizeof(PCIProxyDevClass), > + .class_init = pci_proxy_dev_class_init, > + .interfaces = (InterfaceInfo[]) { > + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, > + { }, > + }, > +}; It would be nice for -device pci-proxy-dev to work as a placeholder for *any* PCI bus device without the need to define concrete subclasses. Could the protocol exchange the PCI device configuration (similar to VFIO and muser ioctls) so that this single object can act as any remote PCI device? > diff --git a/include/hw/proxy/qemu-proxy.h b/include/hw/proxy/qemu-proxy.h > new file mode 100644 > index 0000000..3648a77 > --- /dev/null > +++ b/include/hw/proxy/qemu-proxy.h > @@ -0,0 +1,81 @@ > +/* > + * Copyright 2019, Oracle and/or its affiliates. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a copy > + * of this software and associated documentation files (the "Software"), to deal > + * in the Software without restriction, including without limitation the rights > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > + * copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN > + * THE SOFTWARE. > + */ > + > +#ifndef QEMU_PROXY_H > +#define QEMU_PROXY_H > + > +#include "io/mpqemu-link.h" > + > +#define TYPE_PCI_PROXY_DEV "pci-proxy-dev" > + > +#define PCI_PROXY_DEV(obj) \ > + OBJECT_CHECK(PCIProxyDev, (obj), TYPE_PCI_PROXY_DEV) > + > +#define PCI_PROXY_DEV_CLASS(klass) \ > + OBJECT_CLASS_CHECK(PCIProxyDevClass, (klass), TYPE_PCI_PROXY_DEV) > + > +#define PCI_PROXY_DEV_GET_CLASS(obj) \ > + OBJECT_GET_CLASS(PCIProxyDevClass, (obj), TYPE_PCI_PROXY_DEV) > + > +typedef struct PCIProxyDev { > + PCIDevice parent_dev; > + > + int n_mr_sections; > + MemoryRegionSection *mr_sections; Unused. > + > + MPQemuLinkState *mpqemu_link; > + > + EventNotifier intr; > + EventNotifier resample; Unused. > + > + pid_t remote_pid; > + int rsocket; > + int socket; What is the difference between rsocket and socket? Why is socket only read in this patch and never written? > + > + char *rid; Can remote_pid and rid be unified. They store the same value in different representations. > + > + bool managed; > + char *dev_id; dev_id is unused. > + > + QLIST_ENTRY(PCIProxyDev) next; Unused. > + > + void (*set_proxy_sock) (PCIDevice *dev, int socket); > + int (*get_proxy_sock) (PCIDevice *dev); > + > + void (*set_remote_opts) (PCIDevice *dev, QDict *qdict, unsigned int cmd); > + void (*proxy_ready) (PCIDevice *dev); Unused. > + void (*init_proxy) (PCIDevice *pdev, char *command, Error **errp); Why are these function pointers not in PCIProxyDevClass? > + > +} PCIProxyDev; > + > +typedef struct PCIProxyDevClass { > + PCIDeviceClass parent_class; > + > + void (*realize)(PCIProxyDev *dev, Error **errp); > + > + char *command; > +} PCIProxyDevClass; > + > +int remote_spawn(PCIProxyDev *pdev, const char *command, Error **errp); Does this function need to be publicly visible? > diff --git a/remote/remote-main.c b/remote/remote-main.c > index 7689b57..6c2eb91 100644 > --- a/remote/remote-main.c > +++ b/remote/remote-main.c > @@ -50,6 +50,32 @@ > static MPQemuLinkState *mpqemu_link; > PCIDevice *remote_pci_dev; > > +static void process_config_write(MPQemuMsg *msg) > +{ > + struct conf_data_msg *conf = (struct conf_data_msg *)msg->data2; > + > + qemu_mutex_lock_iothread(); > + pci_default_write_config(remote_pci_dev, conf->addr, conf->val, conf->l); > + qemu_mutex_unlock_iothread(); > +} > + > +static void process_config_read(MPQemuMsg *msg) > +{ > + struct conf_data_msg *conf = (struct conf_data_msg *)msg->data2; > + uint32_t val; > + int wait; > + > + wait = msg->fds[0]; > + > + qemu_mutex_lock_iothread(); > + val = pci_default_read_config(remote_pci_dev, conf->addr, conf->l); > + qemu_mutex_unlock_iothread(); > + > + notify_proxy(wait, val); > + > + PUT_REMOTE_WAIT(wait); > +} Input validation is missing in these message handler functions. I won't look out for this in patches that follow anymore. All message handler functions need to be audited. They must check the message size before accessing fields, that fds[0] was indeed passed, etc.
diff --git a/hw/Makefile.objs b/hw/Makefile.objs index 4e28053..e016100 100644 --- a/hw/Makefile.objs +++ b/hw/Makefile.objs @@ -44,6 +44,8 @@ endif common-obj-y += $(devices-dirs-y) obj-y += $(devices-dirs-y) +common-obj-$(CONFIG_MPQEMU) += proxy/ + remote-pci-obj-$(CONFIG_MPQEMU) += core/ remote-pci-obj-$(CONFIG_MPQEMU) += block/ remote-pci-obj-$(CONFIG_MPQEMU) += pci/ diff --git a/hw/proxy/Makefile.objs b/hw/proxy/Makefile.objs new file mode 100644 index 0000000..eb81624 --- /dev/null +++ b/hw/proxy/Makefile.objs @@ -0,0 +1 @@ +common-obj-$(CONFIG_MPQEMU) += qemu-proxy.o diff --git a/hw/proxy/qemu-proxy.c b/hw/proxy/qemu-proxy.c new file mode 100644 index 0000000..baba4da --- /dev/null +++ b/hw/proxy/qemu-proxy.c @@ -0,0 +1,247 @@ +/* + * Copyright 2019, Oracle and/or its affiliates. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <unistd.h> +#include <assert.h> +#include <string.h> +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "io/mpqemu-link.h" +#include "exec/memory.h" +#include "exec/cpu-common.h" +#include "exec/address-spaces.h" +#include "qemu/int128.h" +#include "qemu/range.h" +#include "hw/pci/pci.h" +#include "qemu/option.h" +#include "qemu/config-file.h" +#include "qapi/qmp/qjson.h" +#include "qapi/qmp/qstring.h" +#include "sysemu/sysemu.h" +#include "hw/proxy/qemu-proxy.h" + +static void pci_proxy_dev_realize(PCIDevice *dev, Error **errp); + +int remote_spawn(PCIProxyDev *pdev, const char *command, Error **errp) +{ + char *args[3]; + pid_t rpid; + int fd[2] = {-1, -1}; + Error *local_error = NULL; + + if (pdev->managed) { + /* Child is forked by external program (such as libvirt). */ + return -1; + } + + if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd)) { + error_setg(errp, "Unable to create unix socket."); + return -1; + } + /* TODO: Restrict the forked process' permissions and capabilities. */ + rpid = qemu_fork(&local_error); + + if (rpid == -1) { + error_setg(errp, "Unable to spawn emulation program."); + close(fd[0]); + close(fd[1]); + return -1; + } + + if (rpid == 0) { + close(fd[0]); + + args[0] = g_strdup(command); + args[1] = g_strdup_printf("%d", fd[1]); + args[2] = NULL; + execvp(args[0], (char *const *)args); + exit(1); + } + pdev->remote_pid = rpid; + pdev->rsocket = fd[0]; + + close(fd[1]); + + return 0; +} + +static int get_proxy_sock(PCIDevice *dev) +{ + PCIProxyDev *pdev; + + pdev = PCI_PROXY_DEV(dev); + + return pdev->rsocket; +} + +static void set_proxy_sock(PCIDevice *dev, int socket) +{ + PCIProxyDev *pdev; + + pdev = PCI_PROXY_DEV(dev); + + pdev->rsocket = socket; + pdev->managed = true; + +} + +static int config_op_send(PCIProxyDev *dev, uint32_t addr, uint32_t *val, int l, + unsigned int op) +{ + MPQemuMsg msg; + struct conf_data_msg conf_data; + int wait; + + memset(&msg, 0, sizeof(MPQemuMsg)); + conf_data.addr = addr; + conf_data.val = (op == CONF_WRITE) ? *val : 0; + conf_data.l = l; + + msg.data2 = (uint8_t *)malloc(sizeof(conf_data)); + if (!msg.data2) { + return -ENOMEM; + } + + memcpy(msg.data2, (const uint8_t *)&conf_data, sizeof(conf_data)); + msg.size = sizeof(conf_data); + msg.cmd = op; + msg.bytestream = 1; + + if (op == CONF_WRITE) { + msg.num_fds = 0; + } else { + wait = GET_REMOTE_WAIT; + msg.num_fds = 1; + msg.fds[0] = wait; + } + + mpqemu_msg_send(dev->mpqemu_link, &msg, dev->mpqemu_link->com); + + if (op == CONF_READ) { + *val = (uint32_t)wait_for_remote(wait); + PUT_REMOTE_WAIT(wait); + } + + free(msg.data2); + + return 0; +} + +static uint32_t pci_proxy_read_config(PCIDevice *d, uint32_t addr, int len) +{ + uint32_t val; + + (void)pci_default_read_config(d, addr, len); + + config_op_send(PCI_PROXY_DEV(d), addr, &val, len, CONF_READ); + + return val; +} + +static void pci_proxy_write_config(PCIDevice *d, uint32_t addr, uint32_t val, + int l) +{ + pci_default_write_config(d, addr, val, l); + + config_op_send(PCI_PROXY_DEV(d), addr, &val, l, CONF_WRITE); +} + +static void pci_proxy_dev_class_init(ObjectClass *klass, void *data) +{ + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->realize = pci_proxy_dev_realize; + k->config_read = pci_proxy_read_config; + k->config_write = pci_proxy_write_config; +} + +static const TypeInfo pci_proxy_dev_type_info = { + .name = TYPE_PCI_PROXY_DEV, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(PCIProxyDev), + .abstract = true, + .class_size = sizeof(PCIProxyDevClass), + .class_init = pci_proxy_dev_class_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { }, + }, +}; + +static void pci_proxy_dev_register_types(void) +{ + type_register_static(&pci_proxy_dev_type_info); +} + +type_init(pci_proxy_dev_register_types) + +static void init_proxy(PCIDevice *dev, char *command, Error **errp) +{ + PCIProxyDev *pdev = PCI_PROXY_DEV(dev); + Error *local_error = NULL; + + if (!pdev->managed) { + if (command) { + remote_spawn(pdev, command, &local_error); + } else { + return; + } + } else { + pdev->remote_pid = atoi(pdev->rid); + if (pdev->remote_pid == -1) { + error_setg(errp, "Remote PID is -1"); + return; + } + } + + pdev->mpqemu_link = mpqemu_link_create(); + + if (!pdev->mpqemu_link) { + error_setg(errp, "Failed to create proxy link"); + return; + } + + mpqemu_init_channel(pdev->mpqemu_link, &pdev->mpqemu_link->com, + pdev->socket); +} + +static void pci_proxy_dev_realize(PCIDevice *device, Error **errp) +{ + PCIProxyDev *dev = PCI_PROXY_DEV(device); + PCIProxyDevClass *k = PCI_PROXY_DEV_GET_CLASS(dev); + Error *local_err = NULL; + + if (k->realize) { + k->realize(dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + } + } + + dev->set_proxy_sock = set_proxy_sock; + dev->get_proxy_sock = get_proxy_sock; + dev->init_proxy = init_proxy; +} diff --git a/include/hw/proxy/qemu-proxy.h b/include/hw/proxy/qemu-proxy.h new file mode 100644 index 0000000..3648a77 --- /dev/null +++ b/include/hw/proxy/qemu-proxy.h @@ -0,0 +1,81 @@ +/* + * Copyright 2019, Oracle and/or its affiliates. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_PROXY_H +#define QEMU_PROXY_H + +#include "io/mpqemu-link.h" + +#define TYPE_PCI_PROXY_DEV "pci-proxy-dev" + +#define PCI_PROXY_DEV(obj) \ + OBJECT_CHECK(PCIProxyDev, (obj), TYPE_PCI_PROXY_DEV) + +#define PCI_PROXY_DEV_CLASS(klass) \ + OBJECT_CLASS_CHECK(PCIProxyDevClass, (klass), TYPE_PCI_PROXY_DEV) + +#define PCI_PROXY_DEV_GET_CLASS(obj) \ + OBJECT_GET_CLASS(PCIProxyDevClass, (obj), TYPE_PCI_PROXY_DEV) + +typedef struct PCIProxyDev { + PCIDevice parent_dev; + + int n_mr_sections; + MemoryRegionSection *mr_sections; + + MPQemuLinkState *mpqemu_link; + + EventNotifier intr; + EventNotifier resample; + + pid_t remote_pid; + int rsocket; + int socket; + + char *rid; + + bool managed; + char *dev_id; + + QLIST_ENTRY(PCIProxyDev) next; + + void (*set_proxy_sock) (PCIDevice *dev, int socket); + int (*get_proxy_sock) (PCIDevice *dev); + + void (*set_remote_opts) (PCIDevice *dev, QDict *qdict, unsigned int cmd); + void (*proxy_ready) (PCIDevice *dev); + void (*init_proxy) (PCIDevice *pdev, char *command, Error **errp); + +} PCIProxyDev; + +typedef struct PCIProxyDevClass { + PCIDeviceClass parent_class; + + void (*realize)(PCIProxyDev *dev, Error **errp); + + char *command; +} PCIProxyDevClass; + +int remote_spawn(PCIProxyDev *pdev, const char *command, Error **errp); + + +#endif /* QEMU_PROXY_H */ diff --git a/remote/remote-main.c b/remote/remote-main.c index 7689b57..6c2eb91 100644 --- a/remote/remote-main.c +++ b/remote/remote-main.c @@ -50,6 +50,32 @@ static MPQemuLinkState *mpqemu_link; PCIDevice *remote_pci_dev; +static void process_config_write(MPQemuMsg *msg) +{ + struct conf_data_msg *conf = (struct conf_data_msg *)msg->data2; + + qemu_mutex_lock_iothread(); + pci_default_write_config(remote_pci_dev, conf->addr, conf->val, conf->l); + qemu_mutex_unlock_iothread(); +} + +static void process_config_read(MPQemuMsg *msg) +{ + struct conf_data_msg *conf = (struct conf_data_msg *)msg->data2; + uint32_t val; + int wait; + + wait = msg->fds[0]; + + qemu_mutex_lock_iothread(); + val = pci_default_read_config(remote_pci_dev, conf->addr, conf->l); + qemu_mutex_unlock_iothread(); + + notify_proxy(wait, val); + + PUT_REMOTE_WAIT(wait); +} + static void process_msg(GIOCondition cond, MPQemuChannel *chan) { MPQemuMsg *msg = NULL; @@ -71,8 +97,10 @@ static void process_msg(GIOCondition cond, MPQemuChannel *chan) case INIT: break; case CONF_WRITE: + process_config_write(msg); break; case CONF_READ: + process_config_read(msg); break; default: error_setg(&err, "Unknown command");