diff mbox series

[v2,kvmtool,25/30] pci: Implement callbacks for toggling BAR emulation

Message ID 20200123134805.1993-26-alexandru.elisei@arm.com (mailing list archive)
State New, archived
Headers show
Series Add reassignable BARs and PCIE 1.1 support | expand

Commit Message

Alexandru Elisei Jan. 23, 2020, 1:48 p.m. UTC
Implement callbacks for activating and deactivating emulation for a BAR
region. This is in preparation for allowing a guest operating system to
enable and disable access to I/O or memory space, or to reassign the
BARs.

The emulated vesa device has been refactored in the process and the static
variables were removed in order to make using the callbacks less painful.
The framebuffer isn't designed to allow stopping and restarting at
arbitrary points in the guest execution. Furthermore, on x86, the kernel
will not change the BAR addresses, which on bare metal are programmed by
the firmware, so take the easy way out and refuse to deactivate emulation
for the BAR regions.

Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
---
 hw/vesa.c         | 120 ++++++++++++++++++++++++++++++++--------------
 include/kvm/pci.h |  19 +++++++-
 pci.c             |  44 +++++++++++++++++
 vfio/pci.c        | 100 +++++++++++++++++++++++++++++++-------
 virtio/pci.c      |  90 ++++++++++++++++++++++++----------
 5 files changed, 294 insertions(+), 79 deletions(-)

Comments

Andre Przywara Feb. 6, 2020, 6:21 p.m. UTC | #1
On Thu, 23 Jan 2020 13:48:00 +0000
Alexandru Elisei <alexandru.elisei@arm.com> wrote:

Hi,

> Implement callbacks for activating and deactivating emulation for a BAR
> region. This is in preparation for allowing a guest operating system to
> enable and disable access to I/O or memory space, or to reassign the
> BARs.
> 
> The emulated vesa device has been refactored in the process and the static
> variables were removed in order to make using the callbacks less painful.
> The framebuffer isn't designed to allow stopping and restarting at
> arbitrary points in the guest execution. Furthermore, on x86, the kernel
> will not change the BAR addresses, which on bare metal are programmed by
> the firmware, so take the easy way out and refuse to deactivate emulation
> for the BAR regions.
> 
> Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
> ---
>  hw/vesa.c         | 120 ++++++++++++++++++++++++++++++++--------------
>  include/kvm/pci.h |  19 +++++++-
>  pci.c             |  44 +++++++++++++++++
>  vfio/pci.c        | 100 +++++++++++++++++++++++++++++++-------
>  virtio/pci.c      |  90 ++++++++++++++++++++++++----------
>  5 files changed, 294 insertions(+), 79 deletions(-)
> 
> diff --git a/hw/vesa.c b/hw/vesa.c
> index e988c0425946..74ebebbefa6b 100644
> --- a/hw/vesa.c
> +++ b/hw/vesa.c
> @@ -18,6 +18,12 @@
>  #include <inttypes.h>
>  #include <unistd.h>
>  
> +struct vesa_dev {
> +	struct pci_device_header	pci_hdr;
> +	struct device_header		dev_hdr;
> +	struct framebuffer		fb;
> +};
> +
>  static bool vesa_pci_io_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size)
>  {
>  	return true;
> @@ -33,29 +39,52 @@ static struct ioport_operations vesa_io_ops = {
>  	.io_out			= vesa_pci_io_out,
>  };
>  
> -static struct pci_device_header vesa_pci_device = {
> -	.vendor_id		= cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET),
> -	.device_id		= cpu_to_le16(PCI_DEVICE_ID_VESA),
> -	.header_type		= PCI_HEADER_TYPE_NORMAL,
> -	.revision_id		= 0,
> -	.class[2]		= 0x03,
> -	.subsys_vendor_id	= cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET),
> -	.subsys_id		= cpu_to_le16(PCI_SUBSYSTEM_ID_VESA),
> -	.bar[1]			= cpu_to_le32(VESA_MEM_ADDR | PCI_BASE_ADDRESS_SPACE_MEMORY),
> -	.bar_size[1]		= VESA_MEM_SIZE,
> -};
> +static int vesa__bar_activate(struct kvm *kvm,
> +			      struct pci_device_header *pci_hdr,
> +			      int bar_num, void *data)
> +{
> +	struct vesa_dev *vdev = data;
> +	u32 bar_addr, bar_size;
> +	char *mem;
> +	int r;
>  
> -static struct device_header vesa_device = {
> -	.bus_type	= DEVICE_BUS_PCI,
> -	.data		= &vesa_pci_device,
> -};
> +	bar_addr = pci__bar_address(pci_hdr, bar_num);
> +	bar_size = pci_hdr->bar_size[bar_num];
>  
> -static struct framebuffer vesafb;
> +	switch (bar_num) {
> +	case 0:
> +		r = ioport__register(kvm, bar_addr, &vesa_io_ops, bar_size,
> +				     NULL);
> +		break;
> +	case 1:
> +		mem = mmap(NULL, bar_size, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
> +		if (mem == MAP_FAILED) {
> +			r = -errno;
> +			break;
> +		}
> +		r = kvm__register_dev_mem(kvm, bar_addr, bar_size, mem);
> +		if (r < 0)
> +			break;
> +		vdev->fb.mem = mem;
> +		break;
> +	default:
> +		r = -EINVAL;
> +	}
> +
> +	return r;
> +}
> +
> +static int vesa__bar_deactivate(struct kvm *kvm,
> +				struct pci_device_header *pci_hdr,
> +				int bar_num, void *data)
> +{
> +	return -EINVAL;
> +}
>  
>  struct framebuffer *vesa__init(struct kvm *kvm)
>  {
> -	u16 vesa_base_addr;
> -	char *mem;
> +	struct vesa_dev *vdev;
> +	u16 port_addr;
>  	int r;
>  
>  	BUILD_BUG_ON(!is_power_of_two(VESA_MEM_SIZE));
> @@ -63,34 +92,51 @@ struct framebuffer *vesa__init(struct kvm *kvm)
>  
>  	if (!kvm->cfg.vnc && !kvm->cfg.sdl && !kvm->cfg.gtk)
>  		return NULL;
> -	r = pci_get_io_port_block(PCI_IO_SIZE);
> -	r = ioport__register(kvm, r, &vesa_io_ops, PCI_IO_SIZE, NULL);
> -	if (r < 0)
> -		return ERR_PTR(r);
>  
> -	vesa_base_addr			= (u16)r;
> -	vesa_pci_device.bar[0]		= cpu_to_le32(vesa_base_addr | PCI_BASE_ADDRESS_SPACE_IO);
> -	vesa_pci_device.bar_size[0]	= PCI_IO_SIZE;
> -	r = device__register(&vesa_device);
> -	if (r < 0)
> -		return ERR_PTR(r);
> +	vdev = calloc(1, sizeof(*vdev));
> +	if (vdev == NULL)
> +		return ERR_PTR(-ENOMEM);

Is it really necessary to allocate this here? You never free this, and I don't see how you could actually do this. AFAICS conceptually there can be only one VESA device? So maybe have a static variable above and use that instead of passing the pointer around? Or use &vdev if you need a pointer argument for the callbacks.

>  
> -	mem = mmap(NULL, VESA_MEM_SIZE, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
> -	if (mem == MAP_FAILED)
> -		return ERR_PTR(-errno);
> +	port_addr = pci_get_io_port_block(PCI_IO_SIZE);
>  
> -	r = kvm__register_dev_mem(kvm, VESA_MEM_ADDR, VESA_MEM_SIZE, mem);
> -	if (r < 0)
> -		return ERR_PTR(r);
> +	vdev->pci_hdr = (struct pci_device_header) {
> +		.vendor_id		= cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET),
> +		.device_id		= cpu_to_le16(PCI_DEVICE_ID_VESA),
> +		.command		= PCI_COMMAND_IO | PCI_COMMAND_MEMORY,
> +		.header_type		= PCI_HEADER_TYPE_NORMAL,
> +		.revision_id		= 0,
> +		.class[2]		= 0x03,
> +		.subsys_vendor_id	= cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET),
> +		.subsys_id		= cpu_to_le16(PCI_SUBSYSTEM_ID_VESA),
> +		.bar[0]			= cpu_to_le32(port_addr | PCI_BASE_ADDRESS_SPACE_IO),
> +		.bar_size[0]		= PCI_IO_SIZE,
> +		.bar[1]			= cpu_to_le32(VESA_MEM_ADDR | PCI_BASE_ADDRESS_SPACE_MEMORY),
> +		.bar_size[1]		= VESA_MEM_SIZE,
> +	};
>  
> -	vesafb = (struct framebuffer) {
> +	vdev->fb = (struct framebuffer) {
>  		.width			= VESA_WIDTH,
>  		.height			= VESA_HEIGHT,
>  		.depth			= VESA_BPP,
> -		.mem			= mem,
> +		.mem			= NULL,
>  		.mem_addr		= VESA_MEM_ADDR,
>  		.mem_size		= VESA_MEM_SIZE,
>  		.kvm			= kvm,
>  	};
> -	return fb__register(&vesafb);
> +
> +	r = pci__register_bar_regions(kvm, &vdev->pci_hdr, vesa__bar_activate,
> +				      vesa__bar_deactivate, vdev);
> +	if (r < 0)
> +		return ERR_PTR(r);
> +
> +	vdev->dev_hdr = (struct device_header) {
> +		.bus_type       = DEVICE_BUS_PCI,
> +		.data           = &vdev->pci_hdr,
> +	};
> +
> +	r = device__register(&vdev->dev_hdr);
> +	if (r < 0)
> +		return ERR_PTR(r);
> +
> +	return fb__register(&vdev->fb);
>  }
> diff --git a/include/kvm/pci.h b/include/kvm/pci.h
> index 235cd82fff3c..bf42f497168f 100644
> --- a/include/kvm/pci.h
> +++ b/include/kvm/pci.h
> @@ -89,12 +89,19 @@ struct pci_cap_hdr {
>  	u8	next;
>  };
>  
> +struct pci_device_header;
> +
> +typedef int (*bar_activate_fn_t)(struct kvm *kvm,
> +				 struct pci_device_header *pci_hdr,
> +				 int bar_num, void *data);
> +typedef int (*bar_deactivate_fn_t)(struct kvm *kvm,
> +				   struct pci_device_header *pci_hdr,
> +				   int bar_num, void *data);
> +
>  #define PCI_BAR_OFFSET(b)	(offsetof(struct pci_device_header, bar[b]))
>  #define PCI_DEV_CFG_SIZE	256
>  #define PCI_DEV_CFG_MASK	(PCI_DEV_CFG_SIZE - 1)
>  
> -struct pci_device_header;
> -
>  struct pci_config_operations {
>  	void (*write)(struct kvm *kvm, struct pci_device_header *pci_hdr,
>  		      u8 offset, void *data, int sz);
> @@ -136,6 +143,9 @@ struct pci_device_header {
>  
>  	/* Private to lkvm */
>  	u32		bar_size[6];
> +	bar_activate_fn_t	bar_activate_fn;
> +	bar_deactivate_fn_t	bar_deactivate_fn;
> +	void *data;
>  	struct pci_config_operations	cfg_ops;
>  	/*
>  	 * PCI INTx# are level-triggered, but virtual device often feature
> @@ -160,8 +170,13 @@ void pci__assign_irq(struct device_header *dev_hdr);
>  void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void *data, int size);
>  void pci__config_rd(struct kvm *kvm, union pci_config_address addr, void *data, int size);
>  
> +

Stray empty line?

Cheers,
Andre

>  void *pci_find_cap(struct pci_device_header *hdr, u8 cap_type);
>  
> +int pci__register_bar_regions(struct kvm *kvm, struct pci_device_header *pci_hdr,
> +			      bar_activate_fn_t bar_activate_fn,
> +			      bar_deactivate_fn_t bar_deactivate_fn, void *data);
> +
>  static inline bool __pci__memory_space_enabled(u16 command)
>  {
>  	return command & PCI_COMMAND_MEMORY;
> diff --git a/pci.c b/pci.c
> index 4f7b863298f6..5412f2defa2e 100644
> --- a/pci.c
> +++ b/pci.c
> @@ -66,6 +66,11 @@ void pci__assign_irq(struct device_header *dev_hdr)
>  		pci_hdr->irq_type = IRQ_TYPE_EDGE_RISING;
>  }
>  
> +static bool pci_bar_is_implemented(struct pci_device_header *pci_hdr, int bar_num)
> +{
> +	return  bar_num < 6 && pci_hdr->bar_size[bar_num];
> +}
> +
>  static void *pci_config_address_ptr(u16 port)
>  {
>  	unsigned long offset;
> @@ -264,6 +269,45 @@ struct pci_device_header *pci__find_dev(u8 dev_num)
>  	return hdr->data;
>  }
>  
> +int pci__register_bar_regions(struct kvm *kvm, struct pci_device_header *pci_hdr,
> +			      bar_activate_fn_t bar_activate_fn,
> +			      bar_deactivate_fn_t bar_deactivate_fn, void *data)
> +{
> +	int i, r;
> +	bool has_bar_regions = false;
> +
> +	assert(bar_activate_fn && bar_deactivate_fn);
> +
> +	pci_hdr->bar_activate_fn = bar_activate_fn;
> +	pci_hdr->bar_deactivate_fn = bar_deactivate_fn;
> +	pci_hdr->data = data;
> +
> +	for (i = 0; i < 6; i++) {
> +		if (!pci_bar_is_implemented(pci_hdr, i))
> +			continue;
> +
> +		has_bar_regions = true;
> +
> +		if (pci__bar_is_io(pci_hdr, i) &&
> +		    pci__io_space_enabled(pci_hdr)) {
> +				r = bar_activate_fn(kvm, pci_hdr, i, data);
> +				if (r < 0)
> +					return r;
> +			}
> +
> +		if (pci__bar_is_memory(pci_hdr, i) &&
> +		    pci__memory_space_enabled(pci_hdr)) {
> +				r = bar_activate_fn(kvm, pci_hdr, i, data);
> +				if (r < 0)
> +					return r;
> +			}
> +	}
> +
> +	assert(has_bar_regions);
> +
> +	return 0;
> +}
> +
>  int pci__init(struct kvm *kvm)
>  {
>  	int r;
> diff --git a/vfio/pci.c b/vfio/pci.c
> index 8a775a4a4a54..9e595562180b 100644
> --- a/vfio/pci.c
> +++ b/vfio/pci.c
> @@ -446,6 +446,83 @@ out_unlock:
>  	mutex_unlock(&pdev->msi.mutex);
>  }
>  
> +static int vfio_pci_bar_activate(struct kvm *kvm,
> +				 struct pci_device_header *pci_hdr,
> +				 int bar_num, void *data)
> +{
> +	struct vfio_device *vdev = data;
> +	struct vfio_pci_device *pdev = &vdev->pci;
> +	struct vfio_pci_msix_pba *pba = &pdev->msix_pba;
> +	struct vfio_pci_msix_table *table = &pdev->msix_table;
> +	struct vfio_region *region = &vdev->regions[bar_num];
> +	int ret;
> +
> +	if (!region->info.size) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	if ((pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) &&
> +	    (u32)bar_num == table->bar) {
> +		ret = kvm__register_mmio(kvm, table->guest_phys_addr,
> +					 table->size, false,
> +					 vfio_pci_msix_table_access, pdev);
> +		if (ret < 0 || table->bar!= pba->bar)
> +			goto out;
> +	}
> +
> +	if ((pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) &&
> +	    (u32)bar_num == pba->bar) {
> +		ret = kvm__register_mmio(kvm, pba->guest_phys_addr,
> +					 pba->size, false,
> +					 vfio_pci_msix_pba_access, pdev);
> +		goto out;
> +	}
> +
> +	ret = vfio_map_region(kvm, vdev, region);
> +out:
> +	return ret;
> +}
> +
> +static int vfio_pci_bar_deactivate(struct kvm *kvm,
> +				   struct pci_device_header *pci_hdr,
> +				   int bar_num, void *data)
> +{
> +	struct vfio_device *vdev = data;
> +	struct vfio_pci_device *pdev = &vdev->pci;
> +	struct vfio_pci_msix_pba *pba = &pdev->msix_pba;
> +	struct vfio_pci_msix_table *table = &pdev->msix_table;
> +	struct vfio_region *region = &vdev->regions[bar_num];
> +	int ret;
> +	bool success;
> +
> +	if (!region->info.size) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	if ((pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) &&
> +	    (u32)bar_num == table->bar) {
> +		success = kvm__deregister_mmio(kvm, table->guest_phys_addr);
> +		ret = (success ? 0 : -EINVAL);
> +		if (ret < 0 || table->bar!= pba->bar)
> +			goto out;
> +	}
> +
> +	if ((pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) &&
> +	    (u32)bar_num == pba->bar) {
> +		success = kvm__deregister_mmio(kvm, pba->guest_phys_addr);
> +		ret = (success ? 0 : -EINVAL);
> +		goto out;
> +	}
> +
> +	vfio_unmap_region(kvm, region);
> +	ret = 0;
> +
> +out:
> +	return ret;
> +}
> +
>  static void vfio_pci_cfg_read(struct kvm *kvm, struct pci_device_header *pci_hdr,
>  			      u8 offset, void *data, int sz)
>  {
> @@ -804,12 +881,6 @@ static int vfio_pci_create_msix_table(struct kvm *kvm, struct vfio_device *vdev)
>  		ret = -ENOMEM;
>  		goto out_free;
>  	}
> -	pba->guest_phys_addr = table->guest_phys_addr + table->size;
> -
> -	ret = kvm__register_mmio(kvm, table->guest_phys_addr, table->size,
> -				 false, vfio_pci_msix_table_access, pdev);
> -	if (ret < 0)
> -		goto out_free;
>  
>  	/*
>  	 * We could map the physical PBA directly into the guest, but it's
> @@ -819,10 +890,7 @@ static int vfio_pci_create_msix_table(struct kvm *kvm, struct vfio_device *vdev)
>  	 * between MSI-X table and PBA. For the sake of isolation, create a
>  	 * virtual PBA.
>  	 */
> -	ret = kvm__register_mmio(kvm, pba->guest_phys_addr, pba->size, false,
> -				 vfio_pci_msix_pba_access, pdev);
> -	if (ret < 0)
> -		goto out_free;
> +	pba->guest_phys_addr = table->guest_phys_addr + table->size;
>  
>  	pdev->msix.entries = entries;
>  	pdev->msix.nr_entries = nr_entries;
> @@ -893,11 +961,6 @@ static int vfio_pci_configure_bar(struct kvm *kvm, struct vfio_device *vdev,
>  		region->guest_phys_addr = pci_get_mmio_block(map_size);
>  	}
>  
> -	/* Map the BARs into the guest or setup a trap region. */
> -	ret = vfio_map_region(kvm, vdev, region);
> -	if (ret)
> -		return ret;
> -
>  	return 0;
>  }
>  
> @@ -944,7 +1007,12 @@ static int vfio_pci_configure_dev_regions(struct kvm *kvm,
>  	}
>  
>  	/* We've configured the BARs, fake up a Configuration Space */
> -	return vfio_pci_fixup_cfg_space(vdev);
> +	ret = vfio_pci_fixup_cfg_space(vdev);
> +	if (ret)
> +		return ret;
> +
> +	return pci__register_bar_regions(kvm, &pdev->hdr, vfio_pci_bar_activate,
> +					 vfio_pci_bar_deactivate, vdev);
>  }
>  
>  /*
> diff --git a/virtio/pci.c b/virtio/pci.c
> index c4822514856c..5a3cc6f1e943 100644
> --- a/virtio/pci.c
> +++ b/virtio/pci.c
> @@ -474,6 +474,65 @@ static void virtio_pci__io_mmio_callback(struct kvm_cpu *vcpu,
>  		virtio_pci__data_out(vcpu, vdev, addr - mmio_addr, data, len);
>  }
>  
> +static int virtio_pci__bar_activate(struct kvm *kvm,
> +				    struct pci_device_header *pci_hdr,
> +				    int bar_num, void *data)
> +{
> +	struct virtio_device *vdev = data;
> +	u32 bar_addr, bar_size;
> +	int r;
> +
> +	bar_addr = pci__bar_address(pci_hdr, bar_num);
> +	bar_size = pci_hdr->bar_size[bar_num];
> +
> +	switch (bar_num) {
> +	case 0:
> +		r = ioport__register(kvm, bar_addr, &virtio_pci__io_ops,
> +				     bar_size, vdev);
> +		if (r > 0)
> +			r = 0;
> +		break;
> +	case 1:
> +		r =  kvm__register_mmio(kvm, bar_addr, bar_size, false,
> +					virtio_pci__io_mmio_callback, vdev);
> +		break;
> +	case 2:
> +		r =  kvm__register_mmio(kvm, bar_addr, bar_size, false,
> +					virtio_pci__msix_mmio_callback, vdev);
> +		break;
> +	default:
> +		r = -EINVAL;
> +	}
> +
> +	return r;
> +}
> +
> +static int virtio_pci__bar_deactivate(struct kvm *kvm,
> +				      struct pci_device_header *pci_hdr,
> +				      int bar_num, void *data)
> +{
> +	u32 bar_addr;
> +	bool success;
> +	int r;
> +
> +	bar_addr = pci__bar_address(pci_hdr, bar_num);
> +
> +	switch (bar_num) {
> +	case 0:
> +		r = ioport__unregister(kvm, bar_addr);
> +		break;
> +	case 1:
> +	case 2:
> +		success = kvm__deregister_mmio(kvm, bar_addr);
> +		r = (success ? 0 : -EINVAL);
> +		break;
> +	default:
> +		r = -EINVAL;
> +	}
> +
> +	return r;
> +}
> +
>  int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
>  		     int device_id, int subsys_id, int class)
>  {
> @@ -488,23 +547,8 @@ int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
>  	BUILD_BUG_ON(!is_power_of_two(PCI_IO_SIZE));
>  
>  	port_addr = pci_get_io_port_block(PCI_IO_SIZE);
> -	r = ioport__register(kvm, port_addr, &virtio_pci__io_ops, PCI_IO_SIZE,
> -			     vdev);
> -	if (r < 0)
> -		return r;
> -	port_addr = (u16)r;
> -
>  	mmio_addr = pci_get_mmio_block(PCI_IO_SIZE);
> -	r = kvm__register_mmio(kvm, mmio_addr, PCI_IO_SIZE, false,
> -			       virtio_pci__io_mmio_callback, vdev);
> -	if (r < 0)
> -		goto free_ioport;
> -
>  	msix_io_block = pci_get_mmio_block(PCI_IO_SIZE * 2);
> -	r = kvm__register_mmio(kvm, msix_io_block, PCI_IO_SIZE * 2, false,
> -			       virtio_pci__msix_mmio_callback, vdev);
> -	if (r < 0)
> -		goto free_mmio;
>  
>  	vpci->pci_hdr = (struct pci_device_header) {
>  		.vendor_id		= cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET),
> @@ -530,6 +574,12 @@ int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
>  		.bar_size[2]		= cpu_to_le32(PCI_IO_SIZE*2),
>  	};
>  
> +	r = pci__register_bar_regions(kvm, &vpci->pci_hdr,
> +				      virtio_pci__bar_activate,
> +				      virtio_pci__bar_deactivate, vdev);
> +	if (r < 0)
> +		return r;
> +
>  	vpci->dev_hdr = (struct device_header) {
>  		.bus_type		= DEVICE_BUS_PCI,
>  		.data			= &vpci->pci_hdr,
> @@ -560,20 +610,12 @@ int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
>  
>  	r = device__register(&vpci->dev_hdr);
>  	if (r < 0)
> -		goto free_msix_mmio;
> +		return r;
>  
>  	/* save the IRQ that device__register() has allocated */
>  	vpci->legacy_irq_line = vpci->pci_hdr.irq_line;
>  
>  	return 0;
> -
> -free_msix_mmio:
> -	kvm__deregister_mmio(kvm, msix_io_block);
> -free_mmio:
> -	kvm__deregister_mmio(kvm, mmio_addr);
> -free_ioport:
> -	ioport__unregister(kvm, port_addr);
> -	return r;
>  }
>  
>  int virtio_pci__reset(struct kvm *kvm, struct virtio_device *vdev)
Alexandru Elisei Feb. 7, 2020, 10:12 a.m. UTC | #2
Hi,

On 2/6/20 6:21 PM, Andre Przywara wrote:
> On Thu, 23 Jan 2020 13:48:00 +0000
> Alexandru Elisei <alexandru.elisei@arm.com> wrote:
>
> Hi,
>
>> Implement callbacks for activating and deactivating emulation for a BAR
>> region. This is in preparation for allowing a guest operating system to
>> enable and disable access to I/O or memory space, or to reassign the
>> BARs.
>>
>> The emulated vesa device has been refactored in the process and the static
>> variables were removed in order to make using the callbacks less painful.
>> The framebuffer isn't designed to allow stopping and restarting at
>> arbitrary points in the guest execution. Furthermore, on x86, the kernel
>> will not change the BAR addresses, which on bare metal are programmed by
>> the firmware, so take the easy way out and refuse to deactivate emulation
>> for the BAR regions.
>>
>> Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
>> ---
>>  hw/vesa.c         | 120 ++++++++++++++++++++++++++++++++--------------
>>  include/kvm/pci.h |  19 +++++++-
>>  pci.c             |  44 +++++++++++++++++
>>  vfio/pci.c        | 100 +++++++++++++++++++++++++++++++-------
>>  virtio/pci.c      |  90 ++++++++++++++++++++++++----------
>>  5 files changed, 294 insertions(+), 79 deletions(-)
>>
>> diff --git a/hw/vesa.c b/hw/vesa.c
>> index e988c0425946..74ebebbefa6b 100644
>> --- a/hw/vesa.c
>> +++ b/hw/vesa.c
>> @@ -18,6 +18,12 @@
>>  #include <inttypes.h>
>>  #include <unistd.h>
>>  
>> +struct vesa_dev {
>> +	struct pci_device_header	pci_hdr;
>> +	struct device_header		dev_hdr;
>> +	struct framebuffer		fb;
>> +};
>> +
>>  static bool vesa_pci_io_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size)
>>  {
>>  	return true;
>> @@ -33,29 +39,52 @@ static struct ioport_operations vesa_io_ops = {
>>  	.io_out			= vesa_pci_io_out,
>>  };
>>  
>> -static struct pci_device_header vesa_pci_device = {
>> -	.vendor_id		= cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET),
>> -	.device_id		= cpu_to_le16(PCI_DEVICE_ID_VESA),
>> -	.header_type		= PCI_HEADER_TYPE_NORMAL,
>> -	.revision_id		= 0,
>> -	.class[2]		= 0x03,
>> -	.subsys_vendor_id	= cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET),
>> -	.subsys_id		= cpu_to_le16(PCI_SUBSYSTEM_ID_VESA),
>> -	.bar[1]			= cpu_to_le32(VESA_MEM_ADDR | PCI_BASE_ADDRESS_SPACE_MEMORY),
>> -	.bar_size[1]		= VESA_MEM_SIZE,
>> -};
>> +static int vesa__bar_activate(struct kvm *kvm,
>> +			      struct pci_device_header *pci_hdr,
>> +			      int bar_num, void *data)
>> +{
>> +	struct vesa_dev *vdev = data;
>> +	u32 bar_addr, bar_size;
>> +	char *mem;
>> +	int r;
>>  
>> -static struct device_header vesa_device = {
>> -	.bus_type	= DEVICE_BUS_PCI,
>> -	.data		= &vesa_pci_device,
>> -};
>> +	bar_addr = pci__bar_address(pci_hdr, bar_num);
>> +	bar_size = pci_hdr->bar_size[bar_num];
>>  
>> -static struct framebuffer vesafb;
>> +	switch (bar_num) {
>> +	case 0:
>> +		r = ioport__register(kvm, bar_addr, &vesa_io_ops, bar_size,
>> +				     NULL);
>> +		break;
>> +	case 1:
>> +		mem = mmap(NULL, bar_size, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
>> +		if (mem == MAP_FAILED) {
>> +			r = -errno;
>> +			break;
>> +		}
>> +		r = kvm__register_dev_mem(kvm, bar_addr, bar_size, mem);
>> +		if (r < 0)
>> +			break;
>> +		vdev->fb.mem = mem;
>> +		break;
>> +	default:
>> +		r = -EINVAL;
>> +	}
>> +
>> +	return r;
>> +}
>> +
>> +static int vesa__bar_deactivate(struct kvm *kvm,
>> +				struct pci_device_header *pci_hdr,
>> +				int bar_num, void *data)
>> +{
>> +	return -EINVAL;
>> +}
>>  
>>  struct framebuffer *vesa__init(struct kvm *kvm)
>>  {
>> -	u16 vesa_base_addr;
>> -	char *mem;
>> +	struct vesa_dev *vdev;
>> +	u16 port_addr;
>>  	int r;
>>  
>>  	BUILD_BUG_ON(!is_power_of_two(VESA_MEM_SIZE));
>> @@ -63,34 +92,51 @@ struct framebuffer *vesa__init(struct kvm *kvm)
>>  
>>  	if (!kvm->cfg.vnc && !kvm->cfg.sdl && !kvm->cfg.gtk)
>>  		return NULL;
>> -	r = pci_get_io_port_block(PCI_IO_SIZE);
>> -	r = ioport__register(kvm, r, &vesa_io_ops, PCI_IO_SIZE, NULL);
>> -	if (r < 0)
>> -		return ERR_PTR(r);
>>  
>> -	vesa_base_addr			= (u16)r;
>> -	vesa_pci_device.bar[0]		= cpu_to_le32(vesa_base_addr | PCI_BASE_ADDRESS_SPACE_IO);
>> -	vesa_pci_device.bar_size[0]	= PCI_IO_SIZE;
>> -	r = device__register(&vesa_device);
>> -	if (r < 0)
>> -		return ERR_PTR(r);
>> +	vdev = calloc(1, sizeof(*vdev));
>> +	if (vdev == NULL)
>> +		return ERR_PTR(-ENOMEM);
> Is it really necessary to allocate this here? You never free this, and I don't see how you could actually do this. AFAICS conceptually there can be only one VESA device? So maybe have a static variable above and use that instead of passing the pointer around? Or use &vdev if you need a pointer argument for the callbacks.

As far as I can tell, there can be only one VESA device, yes. I was following the
same pattern from virtio/{net,blk,rng,scsi,9p}.c, which I prefer because it's
explicit what function can access the device. What's wrong with passing the
pointer around? The entire PCI emulation code works like that.

>
>>  
>> -	mem = mmap(NULL, VESA_MEM_SIZE, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
>> -	if (mem == MAP_FAILED)
>> -		return ERR_PTR(-errno);
>> +	port_addr = pci_get_io_port_block(PCI_IO_SIZE);
>>  
>> -	r = kvm__register_dev_mem(kvm, VESA_MEM_ADDR, VESA_MEM_SIZE, mem);
>> -	if (r < 0)
>> -		return ERR_PTR(r);
>> +	vdev->pci_hdr = (struct pci_device_header) {
>> +		.vendor_id		= cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET),
>> +		.device_id		= cpu_to_le16(PCI_DEVICE_ID_VESA),
>> +		.command		= PCI_COMMAND_IO | PCI_COMMAND_MEMORY,
>> +		.header_type		= PCI_HEADER_TYPE_NORMAL,
>> +		.revision_id		= 0,
>> +		.class[2]		= 0x03,
>> +		.subsys_vendor_id	= cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET),
>> +		.subsys_id		= cpu_to_le16(PCI_SUBSYSTEM_ID_VESA),
>> +		.bar[0]			= cpu_to_le32(port_addr | PCI_BASE_ADDRESS_SPACE_IO),
>> +		.bar_size[0]		= PCI_IO_SIZE,
>> +		.bar[1]			= cpu_to_le32(VESA_MEM_ADDR | PCI_BASE_ADDRESS_SPACE_MEMORY),
>> +		.bar_size[1]		= VESA_MEM_SIZE,
>> +	};
>>  
>> -	vesafb = (struct framebuffer) {
>> +	vdev->fb = (struct framebuffer) {
>>  		.width			= VESA_WIDTH,
>>  		.height			= VESA_HEIGHT,
>>  		.depth			= VESA_BPP,
>> -		.mem			= mem,
>> +		.mem			= NULL,
>>  		.mem_addr		= VESA_MEM_ADDR,
>>  		.mem_size		= VESA_MEM_SIZE,
>>  		.kvm			= kvm,
>>  	};
>> -	return fb__register(&vesafb);
>> +
>> +	r = pci__register_bar_regions(kvm, &vdev->pci_hdr, vesa__bar_activate,
>> +				      vesa__bar_deactivate, vdev);
>> +	if (r < 0)
>> +		return ERR_PTR(r);
>> +
>> +	vdev->dev_hdr = (struct device_header) {
>> +		.bus_type       = DEVICE_BUS_PCI,
>> +		.data           = &vdev->pci_hdr,
>> +	};
>> +
>> +	r = device__register(&vdev->dev_hdr);
>> +	if (r < 0)
>> +		return ERR_PTR(r);
>> +
>> +	return fb__register(&vdev->fb);
>>  }
>> diff --git a/include/kvm/pci.h b/include/kvm/pci.h
>> index 235cd82fff3c..bf42f497168f 100644
>> --- a/include/kvm/pci.h
>> +++ b/include/kvm/pci.h
>> @@ -89,12 +89,19 @@ struct pci_cap_hdr {
>>  	u8	next;
>>  };
>>  
>> +struct pci_device_header;
>> +
>> +typedef int (*bar_activate_fn_t)(struct kvm *kvm,
>> +				 struct pci_device_header *pci_hdr,
>> +				 int bar_num, void *data);
>> +typedef int (*bar_deactivate_fn_t)(struct kvm *kvm,
>> +				   struct pci_device_header *pci_hdr,
>> +				   int bar_num, void *data);
>> +
>>  #define PCI_BAR_OFFSET(b)	(offsetof(struct pci_device_header, bar[b]))
>>  #define PCI_DEV_CFG_SIZE	256
>>  #define PCI_DEV_CFG_MASK	(PCI_DEV_CFG_SIZE - 1)
>>  
>> -struct pci_device_header;
>> -
>>  struct pci_config_operations {
>>  	void (*write)(struct kvm *kvm, struct pci_device_header *pci_hdr,
>>  		      u8 offset, void *data, int sz);
>> @@ -136,6 +143,9 @@ struct pci_device_header {
>>  
>>  	/* Private to lkvm */
>>  	u32		bar_size[6];
>> +	bar_activate_fn_t	bar_activate_fn;
>> +	bar_deactivate_fn_t	bar_deactivate_fn;
>> +	void *data;
>>  	struct pci_config_operations	cfg_ops;
>>  	/*
>>  	 * PCI INTx# are level-triggered, but virtual device often feature
>> @@ -160,8 +170,13 @@ void pci__assign_irq(struct device_header *dev_hdr);
>>  void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void *data, int size);
>>  void pci__config_rd(struct kvm *kvm, union pci_config_address addr, void *data, int size);
>>  
>> +
> Stray empty line?

Indeed, will get rid of it.

Thanks,
Alex
>
> Cheers,
> Andre
>
>>  void *pci_find_cap(struct pci_device_header *hdr, u8 cap_type);
>>  
>> +int pci__register_bar_regions(struct kvm *kvm, struct pci_device_header *pci_hdr,
>> +			      bar_activate_fn_t bar_activate_fn,
>> +			      bar_deactivate_fn_t bar_deactivate_fn, void *data);
>> +
>>  static inline bool __pci__memory_space_enabled(u16 command)
>>  {
>>  	return command & PCI_COMMAND_MEMORY;
>> diff --git a/pci.c b/pci.c
>> index 4f7b863298f6..5412f2defa2e 100644
>> --- a/pci.c
>> +++ b/pci.c
>> @@ -66,6 +66,11 @@ void pci__assign_irq(struct device_header *dev_hdr)
>>  		pci_hdr->irq_type = IRQ_TYPE_EDGE_RISING;
>>  }
>>  
>> +static bool pci_bar_is_implemented(struct pci_device_header *pci_hdr, int bar_num)
>> +{
>> +	return  bar_num < 6 && pci_hdr->bar_size[bar_num];
>> +}
>> +
>>  static void *pci_config_address_ptr(u16 port)
>>  {
>>  	unsigned long offset;
>> @@ -264,6 +269,45 @@ struct pci_device_header *pci__find_dev(u8 dev_num)
>>  	return hdr->data;
>>  }
>>  
>> +int pci__register_bar_regions(struct kvm *kvm, struct pci_device_header *pci_hdr,
>> +			      bar_activate_fn_t bar_activate_fn,
>> +			      bar_deactivate_fn_t bar_deactivate_fn, void *data)
>> +{
>> +	int i, r;
>> +	bool has_bar_regions = false;
>> +
>> +	assert(bar_activate_fn && bar_deactivate_fn);
>> +
>> +	pci_hdr->bar_activate_fn = bar_activate_fn;
>> +	pci_hdr->bar_deactivate_fn = bar_deactivate_fn;
>> +	pci_hdr->data = data;
>> +
>> +	for (i = 0; i < 6; i++) {
>> +		if (!pci_bar_is_implemented(pci_hdr, i))
>> +			continue;
>> +
>> +		has_bar_regions = true;
>> +
>> +		if (pci__bar_is_io(pci_hdr, i) &&
>> +		    pci__io_space_enabled(pci_hdr)) {
>> +				r = bar_activate_fn(kvm, pci_hdr, i, data);
>> +				if (r < 0)
>> +					return r;
>> +			}
>> +
>> +		if (pci__bar_is_memory(pci_hdr, i) &&
>> +		    pci__memory_space_enabled(pci_hdr)) {
>> +				r = bar_activate_fn(kvm, pci_hdr, i, data);
>> +				if (r < 0)
>> +					return r;
>> +			}
>> +	}
>> +
>> +	assert(has_bar_regions);
>> +
>> +	return 0;
>> +}
>> +
>>  int pci__init(struct kvm *kvm)
>>  {
>>  	int r;
>> diff --git a/vfio/pci.c b/vfio/pci.c
>> index 8a775a4a4a54..9e595562180b 100644
>> --- a/vfio/pci.c
>> +++ b/vfio/pci.c
>> @@ -446,6 +446,83 @@ out_unlock:
>>  	mutex_unlock(&pdev->msi.mutex);
>>  }
>>  
>> +static int vfio_pci_bar_activate(struct kvm *kvm,
>> +				 struct pci_device_header *pci_hdr,
>> +				 int bar_num, void *data)
>> +{
>> +	struct vfio_device *vdev = data;
>> +	struct vfio_pci_device *pdev = &vdev->pci;
>> +	struct vfio_pci_msix_pba *pba = &pdev->msix_pba;
>> +	struct vfio_pci_msix_table *table = &pdev->msix_table;
>> +	struct vfio_region *region = &vdev->regions[bar_num];
>> +	int ret;
>> +
>> +	if (!region->info.size) {
>> +		ret = -EINVAL;
>> +		goto out;
>> +	}
>> +
>> +	if ((pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) &&
>> +	    (u32)bar_num == table->bar) {
>> +		ret = kvm__register_mmio(kvm, table->guest_phys_addr,
>> +					 table->size, false,
>> +					 vfio_pci_msix_table_access, pdev);
>> +		if (ret < 0 || table->bar!= pba->bar)
>> +			goto out;
>> +	}
>> +
>> +	if ((pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) &&
>> +	    (u32)bar_num == pba->bar) {
>> +		ret = kvm__register_mmio(kvm, pba->guest_phys_addr,
>> +					 pba->size, false,
>> +					 vfio_pci_msix_pba_access, pdev);
>> +		goto out;
>> +	}
>> +
>> +	ret = vfio_map_region(kvm, vdev, region);
>> +out:
>> +	return ret;
>> +}
>> +
>> +static int vfio_pci_bar_deactivate(struct kvm *kvm,
>> +				   struct pci_device_header *pci_hdr,
>> +				   int bar_num, void *data)
>> +{
>> +	struct vfio_device *vdev = data;
>> +	struct vfio_pci_device *pdev = &vdev->pci;
>> +	struct vfio_pci_msix_pba *pba = &pdev->msix_pba;
>> +	struct vfio_pci_msix_table *table = &pdev->msix_table;
>> +	struct vfio_region *region = &vdev->regions[bar_num];
>> +	int ret;
>> +	bool success;
>> +
>> +	if (!region->info.size) {
>> +		ret = -EINVAL;
>> +		goto out;
>> +	}
>> +
>> +	if ((pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) &&
>> +	    (u32)bar_num == table->bar) {
>> +		success = kvm__deregister_mmio(kvm, table->guest_phys_addr);
>> +		ret = (success ? 0 : -EINVAL);
>> +		if (ret < 0 || table->bar!= pba->bar)
>> +			goto out;
>> +	}
>> +
>> +	if ((pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) &&
>> +	    (u32)bar_num == pba->bar) {
>> +		success = kvm__deregister_mmio(kvm, pba->guest_phys_addr);
>> +		ret = (success ? 0 : -EINVAL);
>> +		goto out;
>> +	}
>> +
>> +	vfio_unmap_region(kvm, region);
>> +	ret = 0;
>> +
>> +out:
>> +	return ret;
>> +}
>> +
>>  static void vfio_pci_cfg_read(struct kvm *kvm, struct pci_device_header *pci_hdr,
>>  			      u8 offset, void *data, int sz)
>>  {
>> @@ -804,12 +881,6 @@ static int vfio_pci_create_msix_table(struct kvm *kvm, struct vfio_device *vdev)
>>  		ret = -ENOMEM;
>>  		goto out_free;
>>  	}
>> -	pba->guest_phys_addr = table->guest_phys_addr + table->size;
>> -
>> -	ret = kvm__register_mmio(kvm, table->guest_phys_addr, table->size,
>> -				 false, vfio_pci_msix_table_access, pdev);
>> -	if (ret < 0)
>> -		goto out_free;
>>  
>>  	/*
>>  	 * We could map the physical PBA directly into the guest, but it's
>> @@ -819,10 +890,7 @@ static int vfio_pci_create_msix_table(struct kvm *kvm, struct vfio_device *vdev)
>>  	 * between MSI-X table and PBA. For the sake of isolation, create a
>>  	 * virtual PBA.
>>  	 */
>> -	ret = kvm__register_mmio(kvm, pba->guest_phys_addr, pba->size, false,
>> -				 vfio_pci_msix_pba_access, pdev);
>> -	if (ret < 0)
>> -		goto out_free;
>> +	pba->guest_phys_addr = table->guest_phys_addr + table->size;
>>  
>>  	pdev->msix.entries = entries;
>>  	pdev->msix.nr_entries = nr_entries;
>> @@ -893,11 +961,6 @@ static int vfio_pci_configure_bar(struct kvm *kvm, struct vfio_device *vdev,
>>  		region->guest_phys_addr = pci_get_mmio_block(map_size);
>>  	}
>>  
>> -	/* Map the BARs into the guest or setup a trap region. */
>> -	ret = vfio_map_region(kvm, vdev, region);
>> -	if (ret)
>> -		return ret;
>> -
>>  	return 0;
>>  }
>>  
>> @@ -944,7 +1007,12 @@ static int vfio_pci_configure_dev_regions(struct kvm *kvm,
>>  	}
>>  
>>  	/* We've configured the BARs, fake up a Configuration Space */
>> -	return vfio_pci_fixup_cfg_space(vdev);
>> +	ret = vfio_pci_fixup_cfg_space(vdev);
>> +	if (ret)
>> +		return ret;
>> +
>> +	return pci__register_bar_regions(kvm, &pdev->hdr, vfio_pci_bar_activate,
>> +					 vfio_pci_bar_deactivate, vdev);
>>  }
>>  
>>  /*
>> diff --git a/virtio/pci.c b/virtio/pci.c
>> index c4822514856c..5a3cc6f1e943 100644
>> --- a/virtio/pci.c
>> +++ b/virtio/pci.c
>> @@ -474,6 +474,65 @@ static void virtio_pci__io_mmio_callback(struct kvm_cpu *vcpu,
>>  		virtio_pci__data_out(vcpu, vdev, addr - mmio_addr, data, len);
>>  }
>>  
>> +static int virtio_pci__bar_activate(struct kvm *kvm,
>> +				    struct pci_device_header *pci_hdr,
>> +				    int bar_num, void *data)
>> +{
>> +	struct virtio_device *vdev = data;
>> +	u32 bar_addr, bar_size;
>> +	int r;
>> +
>> +	bar_addr = pci__bar_address(pci_hdr, bar_num);
>> +	bar_size = pci_hdr->bar_size[bar_num];
>> +
>> +	switch (bar_num) {
>> +	case 0:
>> +		r = ioport__register(kvm, bar_addr, &virtio_pci__io_ops,
>> +				     bar_size, vdev);
>> +		if (r > 0)
>> +			r = 0;
>> +		break;
>> +	case 1:
>> +		r =  kvm__register_mmio(kvm, bar_addr, bar_size, false,
>> +					virtio_pci__io_mmio_callback, vdev);
>> +		break;
>> +	case 2:
>> +		r =  kvm__register_mmio(kvm, bar_addr, bar_size, false,
>> +					virtio_pci__msix_mmio_callback, vdev);
>> +		break;
>> +	default:
>> +		r = -EINVAL;
>> +	}
>> +
>> +	return r;
>> +}
>> +
>> +static int virtio_pci__bar_deactivate(struct kvm *kvm,
>> +				      struct pci_device_header *pci_hdr,
>> +				      int bar_num, void *data)
>> +{
>> +	u32 bar_addr;
>> +	bool success;
>> +	int r;
>> +
>> +	bar_addr = pci__bar_address(pci_hdr, bar_num);
>> +
>> +	switch (bar_num) {
>> +	case 0:
>> +		r = ioport__unregister(kvm, bar_addr);
>> +		break;
>> +	case 1:
>> +	case 2:
>> +		success = kvm__deregister_mmio(kvm, bar_addr);
>> +		r = (success ? 0 : -EINVAL);
>> +		break;
>> +	default:
>> +		r = -EINVAL;
>> +	}
>> +
>> +	return r;
>> +}
>> +
>>  int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
>>  		     int device_id, int subsys_id, int class)
>>  {
>> @@ -488,23 +547,8 @@ int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
>>  	BUILD_BUG_ON(!is_power_of_two(PCI_IO_SIZE));
>>  
>>  	port_addr = pci_get_io_port_block(PCI_IO_SIZE);
>> -	r = ioport__register(kvm, port_addr, &virtio_pci__io_ops, PCI_IO_SIZE,
>> -			     vdev);
>> -	if (r < 0)
>> -		return r;
>> -	port_addr = (u16)r;
>> -
>>  	mmio_addr = pci_get_mmio_block(PCI_IO_SIZE);
>> -	r = kvm__register_mmio(kvm, mmio_addr, PCI_IO_SIZE, false,
>> -			       virtio_pci__io_mmio_callback, vdev);
>> -	if (r < 0)
>> -		goto free_ioport;
>> -
>>  	msix_io_block = pci_get_mmio_block(PCI_IO_SIZE * 2);
>> -	r = kvm__register_mmio(kvm, msix_io_block, PCI_IO_SIZE * 2, false,
>> -			       virtio_pci__msix_mmio_callback, vdev);
>> -	if (r < 0)
>> -		goto free_mmio;
>>  
>>  	vpci->pci_hdr = (struct pci_device_header) {
>>  		.vendor_id		= cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET),
>> @@ -530,6 +574,12 @@ int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
>>  		.bar_size[2]		= cpu_to_le32(PCI_IO_SIZE*2),
>>  	};
>>  
>> +	r = pci__register_bar_regions(kvm, &vpci->pci_hdr,
>> +				      virtio_pci__bar_activate,
>> +				      virtio_pci__bar_deactivate, vdev);
>> +	if (r < 0)
>> +		return r;
>> +
>>  	vpci->dev_hdr = (struct device_header) {
>>  		.bus_type		= DEVICE_BUS_PCI,
>>  		.data			= &vpci->pci_hdr,
>> @@ -560,20 +610,12 @@ int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
>>  
>>  	r = device__register(&vpci->dev_hdr);
>>  	if (r < 0)
>> -		goto free_msix_mmio;
>> +		return r;
>>  
>>  	/* save the IRQ that device__register() has allocated */
>>  	vpci->legacy_irq_line = vpci->pci_hdr.irq_line;
>>  
>>  	return 0;
>> -
>> -free_msix_mmio:
>> -	kvm__deregister_mmio(kvm, msix_io_block);
>> -free_mmio:
>> -	kvm__deregister_mmio(kvm, mmio_addr);
>> -free_ioport:
>> -	ioport__unregister(kvm, port_addr);
>> -	return r;
>>  }
>>  
>>  int virtio_pci__reset(struct kvm *kvm, struct virtio_device *vdev)
Alexandru Elisei Feb. 7, 2020, 3:39 p.m. UTC | #3
Hi,

On 2/7/20 10:12 AM, Alexandru Elisei wrote:
> Hi,
>
> On 2/6/20 6:21 PM, Andre Przywara wrote:
>> On Thu, 23 Jan 2020 13:48:00 +0000
>> Alexandru Elisei <alexandru.elisei@arm.com> wrote:
>>
>> Hi,
>>
>>> Implement callbacks for activating and deactivating emulation for a BAR
>>> region. This is in preparation for allowing a guest operating system to
>>> enable and disable access to I/O or memory space, or to reassign the
>>> BARs.
>>>
>>> The emulated vesa device has been refactored in the process and the static
>>> variables were removed in order to make using the callbacks less painful.
>>> The framebuffer isn't designed to allow stopping and restarting at
>>> arbitrary points in the guest execution. Furthermore, on x86, the kernel
>>> will not change the BAR addresses, which on bare metal are programmed by
>>> the firmware, so take the easy way out and refuse to deactivate emulation
>>> for the BAR regions.
>>>
>>> Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
>>> ---
>>>  hw/vesa.c         | 120 ++++++++++++++++++++++++++++++++--------------
>>>  include/kvm/pci.h |  19 +++++++-
>>>  pci.c             |  44 +++++++++++++++++
>>>  vfio/pci.c        | 100 +++++++++++++++++++++++++++++++-------
>>>  virtio/pci.c      |  90 ++++++++++++++++++++++++----------
>>>  5 files changed, 294 insertions(+), 79 deletions(-)
>>>
>>> diff --git a/hw/vesa.c b/hw/vesa.c
>>> index e988c0425946..74ebebbefa6b 100644
>>> --- a/hw/vesa.c
>>> +++ b/hw/vesa.c
>>> @@ -18,6 +18,12 @@
>>>  #include <inttypes.h>
>>>  #include <unistd.h>
>>>  
>>> +struct vesa_dev {
>>> +	struct pci_device_header	pci_hdr;
>>> +	struct device_header		dev_hdr;
>>> +	struct framebuffer		fb;
>>> +};
>>> +
>>>  static bool vesa_pci_io_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size)
>>>  {
>>>  	return true;
>>> @@ -33,29 +39,52 @@ static struct ioport_operations vesa_io_ops = {
>>>  	.io_out			= vesa_pci_io_out,
>>>  };
>>>  
>>> -static struct pci_device_header vesa_pci_device = {
>>> -	.vendor_id		= cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET),
>>> -	.device_id		= cpu_to_le16(PCI_DEVICE_ID_VESA),
>>> -	.header_type		= PCI_HEADER_TYPE_NORMAL,
>>> -	.revision_id		= 0,
>>> -	.class[2]		= 0x03,
>>> -	.subsys_vendor_id	= cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET),
>>> -	.subsys_id		= cpu_to_le16(PCI_SUBSYSTEM_ID_VESA),
>>> -	.bar[1]			= cpu_to_le32(VESA_MEM_ADDR | PCI_BASE_ADDRESS_SPACE_MEMORY),
>>> -	.bar_size[1]		= VESA_MEM_SIZE,
>>> -};
>>> +static int vesa__bar_activate(struct kvm *kvm,
>>> +			      struct pci_device_header *pci_hdr,
>>> +			      int bar_num, void *data)
>>> +{
>>> +	struct vesa_dev *vdev = data;
>>> +	u32 bar_addr, bar_size;
>>> +	char *mem;
>>> +	int r;
>>>  
>>> -static struct device_header vesa_device = {
>>> -	.bus_type	= DEVICE_BUS_PCI,
>>> -	.data		= &vesa_pci_device,
>>> -};
>>> +	bar_addr = pci__bar_address(pci_hdr, bar_num);
>>> +	bar_size = pci_hdr->bar_size[bar_num];
>>>  
>>> -static struct framebuffer vesafb;
>>> +	switch (bar_num) {
>>> +	case 0:
>>> +		r = ioport__register(kvm, bar_addr, &vesa_io_ops, bar_size,
>>> +				     NULL);
>>> +		break;
>>> +	case 1:
>>> +		mem = mmap(NULL, bar_size, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
>>> +		if (mem == MAP_FAILED) {
>>> +			r = -errno;
>>> +			break;
>>> +		}
>>> +		r = kvm__register_dev_mem(kvm, bar_addr, bar_size, mem);
>>> +		if (r < 0)
>>> +			break;
>>> +		vdev->fb.mem = mem;
>>> +		break;
>>> +	default:
>>> +		r = -EINVAL;
>>> +	}
>>> +
>>> +	return r;
>>> +}
>>> +
>>> +static int vesa__bar_deactivate(struct kvm *kvm,
>>> +				struct pci_device_header *pci_hdr,
>>> +				int bar_num, void *data)
>>> +{
>>> +	return -EINVAL;
>>> +}
>>>  
>>>  struct framebuffer *vesa__init(struct kvm *kvm)
>>>  {
>>> -	u16 vesa_base_addr;
>>> -	char *mem;
>>> +	struct vesa_dev *vdev;
>>> +	u16 port_addr;
>>>  	int r;
>>>  
>>>  	BUILD_BUG_ON(!is_power_of_two(VESA_MEM_SIZE));
>>> @@ -63,34 +92,51 @@ struct framebuffer *vesa__init(struct kvm *kvm)
>>>  
>>>  	if (!kvm->cfg.vnc && !kvm->cfg.sdl && !kvm->cfg.gtk)
>>>  		return NULL;
>>> -	r = pci_get_io_port_block(PCI_IO_SIZE);
>>> -	r = ioport__register(kvm, r, &vesa_io_ops, PCI_IO_SIZE, NULL);
>>> -	if (r < 0)
>>> -		return ERR_PTR(r);
>>>  
>>> -	vesa_base_addr			= (u16)r;
>>> -	vesa_pci_device.bar[0]		= cpu_to_le32(vesa_base_addr | PCI_BASE_ADDRESS_SPACE_IO);
>>> -	vesa_pci_device.bar_size[0]	= PCI_IO_SIZE;
>>> -	r = device__register(&vesa_device);
>>> -	if (r < 0)
>>> -		return ERR_PTR(r);
>>> +	vdev = calloc(1, sizeof(*vdev));
>>> +	if (vdev == NULL)
>>> +		return ERR_PTR(-ENOMEM);
>> Is it really necessary to allocate this here? You never free this, and I don't see how you could actually do this. AFAICS conceptually there can be only one VESA device? So maybe have a static variable above and use that instead of passing the pointer around? Or use &vdev if you need a pointer argument for the callbacks.
> As far as I can tell, there can be only one VESA device, yes. I was following the
> same pattern from virtio/{net,blk,rng,scsi,9p}.c, which I prefer because it's
> explicit what function can access the device. What's wrong with passing the
> pointer around? The entire PCI emulation code works like that.

Coming back to this, I did some testing on my x86 machine and kvmtool breaks
spectacularly if you specify more than one UI option (more than one of --sdl,
--gtk, --vnc). I'm not sure what the original intent was, but right now specifying
only one option (and having one VESA device) is the only configuration that works.
I'll write a patch to make sure that the user specifies only one option.

I also looked at virtio/console.c and virtio/balloon.c, because only one instance
can be created for a VM. They too declare the device struct as static, and in
console.c the usage is inconsistent: in some callbacks, they use the device
argument, in others they use the static device directly. This is exactly the kind
of thing that I am trying to avoid (for this patch and future patches).

Thanks,
Alex
>
>>>  
>>> -	mem = mmap(NULL, VESA_MEM_SIZE, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
>>> -	if (mem == MAP_FAILED)
>>> -		return ERR_PTR(-errno);
>>> +	port_addr = pci_get_io_port_block(PCI_IO_SIZE);
>>>  
>>> -	r = kvm__register_dev_mem(kvm, VESA_MEM_ADDR, VESA_MEM_SIZE, mem);
>>> -	if (r < 0)
>>> -		return ERR_PTR(r);
>>> +	vdev->pci_hdr = (struct pci_device_header) {
>>> +		.vendor_id		= cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET),
>>> +		.device_id		= cpu_to_le16(PCI_DEVICE_ID_VESA),
>>> +		.command		= PCI_COMMAND_IO | PCI_COMMAND_MEMORY,
>>> +		.header_type		= PCI_HEADER_TYPE_NORMAL,
>>> +		.revision_id		= 0,
>>> +		.class[2]		= 0x03,
>>> +		.subsys_vendor_id	= cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET),
>>> +		.subsys_id		= cpu_to_le16(PCI_SUBSYSTEM_ID_VESA),
>>> +		.bar[0]			= cpu_to_le32(port_addr | PCI_BASE_ADDRESS_SPACE_IO),
>>> +		.bar_size[0]		= PCI_IO_SIZE,
>>> +		.bar[1]			= cpu_to_le32(VESA_MEM_ADDR | PCI_BASE_ADDRESS_SPACE_MEMORY),
>>> +		.bar_size[1]		= VESA_MEM_SIZE,
>>> +	};
>>>  
>>> -	vesafb = (struct framebuffer) {
>>> +	vdev->fb = (struct framebuffer) {
>>>  		.width			= VESA_WIDTH,
>>>  		.height			= VESA_HEIGHT,
>>>  		.depth			= VESA_BPP,
>>> -		.mem			= mem,
>>> +		.mem			= NULL,
>>>  		.mem_addr		= VESA_MEM_ADDR,
>>>  		.mem_size		= VESA_MEM_SIZE,
>>>  		.kvm			= kvm,
>>>  	};
>>> -	return fb__register(&vesafb);
>>> +
>>> +	r = pci__register_bar_regions(kvm, &vdev->pci_hdr, vesa__bar_activate,
>>> +				      vesa__bar_deactivate, vdev);
>>> +	if (r < 0)
>>> +		return ERR_PTR(r);
>>> +
>>> +	vdev->dev_hdr = (struct device_header) {
>>> +		.bus_type       = DEVICE_BUS_PCI,
>>> +		.data           = &vdev->pci_hdr,
>>> +	};
>>> +
>>> +	r = device__register(&vdev->dev_hdr);
>>> +	if (r < 0)
>>> +		return ERR_PTR(r);
>>> +
>>> +	return fb__register(&vdev->fb);
>>>  }
>>> diff --git a/include/kvm/pci.h b/include/kvm/pci.h
>>> index 235cd82fff3c..bf42f497168f 100644
>>> --- a/include/kvm/pci.h
>>> +++ b/include/kvm/pci.h
>>> @@ -89,12 +89,19 @@ struct pci_cap_hdr {
>>>  	u8	next;
>>>  };
>>>  
>>> +struct pci_device_header;
>>> +
>>> +typedef int (*bar_activate_fn_t)(struct kvm *kvm,
>>> +				 struct pci_device_header *pci_hdr,
>>> +				 int bar_num, void *data);
>>> +typedef int (*bar_deactivate_fn_t)(struct kvm *kvm,
>>> +				   struct pci_device_header *pci_hdr,
>>> +				   int bar_num, void *data);
>>> +
>>>  #define PCI_BAR_OFFSET(b)	(offsetof(struct pci_device_header, bar[b]))
>>>  #define PCI_DEV_CFG_SIZE	256
>>>  #define PCI_DEV_CFG_MASK	(PCI_DEV_CFG_SIZE - 1)
>>>  
>>> -struct pci_device_header;
>>> -
>>>  struct pci_config_operations {
>>>  	void (*write)(struct kvm *kvm, struct pci_device_header *pci_hdr,
>>>  		      u8 offset, void *data, int sz);
>>> @@ -136,6 +143,9 @@ struct pci_device_header {
>>>  
>>>  	/* Private to lkvm */
>>>  	u32		bar_size[6];
>>> +	bar_activate_fn_t	bar_activate_fn;
>>> +	bar_deactivate_fn_t	bar_deactivate_fn;
>>> +	void *data;
>>>  	struct pci_config_operations	cfg_ops;
>>>  	/*
>>>  	 * PCI INTx# are level-triggered, but virtual device often feature
>>> @@ -160,8 +170,13 @@ void pci__assign_irq(struct device_header *dev_hdr);
>>>  void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void *data, int size);
>>>  void pci__config_rd(struct kvm *kvm, union pci_config_address addr, void *data, int size);
>>>  
>>> +
>> Stray empty line?
> Indeed, will get rid of it.
>
> Thanks,
> Alex
>> Cheers,
>> Andre
>>
>>>  void *pci_find_cap(struct pci_device_header *hdr, u8 cap_type);
>>>  
>>> +int pci__register_bar_regions(struct kvm *kvm, struct pci_device_header *pci_hdr,
>>> +			      bar_activate_fn_t bar_activate_fn,
>>> +			      bar_deactivate_fn_t bar_deactivate_fn, void *data);
>>> +
>>>  static inline bool __pci__memory_space_enabled(u16 command)
>>>  {
>>>  	return command & PCI_COMMAND_MEMORY;
>>> diff --git a/pci.c b/pci.c
>>> index 4f7b863298f6..5412f2defa2e 100644
>>> --- a/pci.c
>>> +++ b/pci.c
>>> @@ -66,6 +66,11 @@ void pci__assign_irq(struct device_header *dev_hdr)
>>>  		pci_hdr->irq_type = IRQ_TYPE_EDGE_RISING;
>>>  }
>>>  
>>> +static bool pci_bar_is_implemented(struct pci_device_header *pci_hdr, int bar_num)
>>> +{
>>> +	return  bar_num < 6 && pci_hdr->bar_size[bar_num];
>>> +}
>>> +
>>>  static void *pci_config_address_ptr(u16 port)
>>>  {
>>>  	unsigned long offset;
>>> @@ -264,6 +269,45 @@ struct pci_device_header *pci__find_dev(u8 dev_num)
>>>  	return hdr->data;
>>>  }
>>>  
>>> +int pci__register_bar_regions(struct kvm *kvm, struct pci_device_header *pci_hdr,
>>> +			      bar_activate_fn_t bar_activate_fn,
>>> +			      bar_deactivate_fn_t bar_deactivate_fn, void *data)
>>> +{
>>> +	int i, r;
>>> +	bool has_bar_regions = false;
>>> +
>>> +	assert(bar_activate_fn && bar_deactivate_fn);
>>> +
>>> +	pci_hdr->bar_activate_fn = bar_activate_fn;
>>> +	pci_hdr->bar_deactivate_fn = bar_deactivate_fn;
>>> +	pci_hdr->data = data;
>>> +
>>> +	for (i = 0; i < 6; i++) {
>>> +		if (!pci_bar_is_implemented(pci_hdr, i))
>>> +			continue;
>>> +
>>> +		has_bar_regions = true;
>>> +
>>> +		if (pci__bar_is_io(pci_hdr, i) &&
>>> +		    pci__io_space_enabled(pci_hdr)) {
>>> +				r = bar_activate_fn(kvm, pci_hdr, i, data);
>>> +				if (r < 0)
>>> +					return r;
>>> +			}
>>> +
>>> +		if (pci__bar_is_memory(pci_hdr, i) &&
>>> +		    pci__memory_space_enabled(pci_hdr)) {
>>> +				r = bar_activate_fn(kvm, pci_hdr, i, data);
>>> +				if (r < 0)
>>> +					return r;
>>> +			}
>>> +	}
>>> +
>>> +	assert(has_bar_regions);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>>  int pci__init(struct kvm *kvm)
>>>  {
>>>  	int r;
>>> diff --git a/vfio/pci.c b/vfio/pci.c
>>> index 8a775a4a4a54..9e595562180b 100644
>>> --- a/vfio/pci.c
>>> +++ b/vfio/pci.c
>>> @@ -446,6 +446,83 @@ out_unlock:
>>>  	mutex_unlock(&pdev->msi.mutex);
>>>  }
>>>  
>>> +static int vfio_pci_bar_activate(struct kvm *kvm,
>>> +				 struct pci_device_header *pci_hdr,
>>> +				 int bar_num, void *data)
>>> +{
>>> +	struct vfio_device *vdev = data;
>>> +	struct vfio_pci_device *pdev = &vdev->pci;
>>> +	struct vfio_pci_msix_pba *pba = &pdev->msix_pba;
>>> +	struct vfio_pci_msix_table *table = &pdev->msix_table;
>>> +	struct vfio_region *region = &vdev->regions[bar_num];
>>> +	int ret;
>>> +
>>> +	if (!region->info.size) {
>>> +		ret = -EINVAL;
>>> +		goto out;
>>> +	}
>>> +
>>> +	if ((pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) &&
>>> +	    (u32)bar_num == table->bar) {
>>> +		ret = kvm__register_mmio(kvm, table->guest_phys_addr,
>>> +					 table->size, false,
>>> +					 vfio_pci_msix_table_access, pdev);
>>> +		if (ret < 0 || table->bar!= pba->bar)
>>> +			goto out;
>>> +	}
>>> +
>>> +	if ((pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) &&
>>> +	    (u32)bar_num == pba->bar) {
>>> +		ret = kvm__register_mmio(kvm, pba->guest_phys_addr,
>>> +					 pba->size, false,
>>> +					 vfio_pci_msix_pba_access, pdev);
>>> +		goto out;
>>> +	}
>>> +
>>> +	ret = vfio_map_region(kvm, vdev, region);
>>> +out:
>>> +	return ret;
>>> +}
>>> +
>>> +static int vfio_pci_bar_deactivate(struct kvm *kvm,
>>> +				   struct pci_device_header *pci_hdr,
>>> +				   int bar_num, void *data)
>>> +{
>>> +	struct vfio_device *vdev = data;
>>> +	struct vfio_pci_device *pdev = &vdev->pci;
>>> +	struct vfio_pci_msix_pba *pba = &pdev->msix_pba;
>>> +	struct vfio_pci_msix_table *table = &pdev->msix_table;
>>> +	struct vfio_region *region = &vdev->regions[bar_num];
>>> +	int ret;
>>> +	bool success;
>>> +
>>> +	if (!region->info.size) {
>>> +		ret = -EINVAL;
>>> +		goto out;
>>> +	}
>>> +
>>> +	if ((pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) &&
>>> +	    (u32)bar_num == table->bar) {
>>> +		success = kvm__deregister_mmio(kvm, table->guest_phys_addr);
>>> +		ret = (success ? 0 : -EINVAL);
>>> +		if (ret < 0 || table->bar!= pba->bar)
>>> +			goto out;
>>> +	}
>>> +
>>> +	if ((pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) &&
>>> +	    (u32)bar_num == pba->bar) {
>>> +		success = kvm__deregister_mmio(kvm, pba->guest_phys_addr);
>>> +		ret = (success ? 0 : -EINVAL);
>>> +		goto out;
>>> +	}
>>> +
>>> +	vfio_unmap_region(kvm, region);
>>> +	ret = 0;
>>> +
>>> +out:
>>> +	return ret;
>>> +}
>>> +
>>>  static void vfio_pci_cfg_read(struct kvm *kvm, struct pci_device_header *pci_hdr,
>>>  			      u8 offset, void *data, int sz)
>>>  {
>>> @@ -804,12 +881,6 @@ static int vfio_pci_create_msix_table(struct kvm *kvm, struct vfio_device *vdev)
>>>  		ret = -ENOMEM;
>>>  		goto out_free;
>>>  	}
>>> -	pba->guest_phys_addr = table->guest_phys_addr + table->size;
>>> -
>>> -	ret = kvm__register_mmio(kvm, table->guest_phys_addr, table->size,
>>> -				 false, vfio_pci_msix_table_access, pdev);
>>> -	if (ret < 0)
>>> -		goto out_free;
>>>  
>>>  	/*
>>>  	 * We could map the physical PBA directly into the guest, but it's
>>> @@ -819,10 +890,7 @@ static int vfio_pci_create_msix_table(struct kvm *kvm, struct vfio_device *vdev)
>>>  	 * between MSI-X table and PBA. For the sake of isolation, create a
>>>  	 * virtual PBA.
>>>  	 */
>>> -	ret = kvm__register_mmio(kvm, pba->guest_phys_addr, pba->size, false,
>>> -				 vfio_pci_msix_pba_access, pdev);
>>> -	if (ret < 0)
>>> -		goto out_free;
>>> +	pba->guest_phys_addr = table->guest_phys_addr + table->size;
>>>  
>>>  	pdev->msix.entries = entries;
>>>  	pdev->msix.nr_entries = nr_entries;
>>> @@ -893,11 +961,6 @@ static int vfio_pci_configure_bar(struct kvm *kvm, struct vfio_device *vdev,
>>>  		region->guest_phys_addr = pci_get_mmio_block(map_size);
>>>  	}
>>>  
>>> -	/* Map the BARs into the guest or setup a trap region. */
>>> -	ret = vfio_map_region(kvm, vdev, region);
>>> -	if (ret)
>>> -		return ret;
>>> -
>>>  	return 0;
>>>  }
>>>  
>>> @@ -944,7 +1007,12 @@ static int vfio_pci_configure_dev_regions(struct kvm *kvm,
>>>  	}
>>>  
>>>  	/* We've configured the BARs, fake up a Configuration Space */
>>> -	return vfio_pci_fixup_cfg_space(vdev);
>>> +	ret = vfio_pci_fixup_cfg_space(vdev);
>>> +	if (ret)
>>> +		return ret;
>>> +
>>> +	return pci__register_bar_regions(kvm, &pdev->hdr, vfio_pci_bar_activate,
>>> +					 vfio_pci_bar_deactivate, vdev);
>>>  }
>>>  
>>>  /*
>>> diff --git a/virtio/pci.c b/virtio/pci.c
>>> index c4822514856c..5a3cc6f1e943 100644
>>> --- a/virtio/pci.c
>>> +++ b/virtio/pci.c
>>> @@ -474,6 +474,65 @@ static void virtio_pci__io_mmio_callback(struct kvm_cpu *vcpu,
>>>  		virtio_pci__data_out(vcpu, vdev, addr - mmio_addr, data, len);
>>>  }
>>>  
>>> +static int virtio_pci__bar_activate(struct kvm *kvm,
>>> +				    struct pci_device_header *pci_hdr,
>>> +				    int bar_num, void *data)
>>> +{
>>> +	struct virtio_device *vdev = data;
>>> +	u32 bar_addr, bar_size;
>>> +	int r;
>>> +
>>> +	bar_addr = pci__bar_address(pci_hdr, bar_num);
>>> +	bar_size = pci_hdr->bar_size[bar_num];
>>> +
>>> +	switch (bar_num) {
>>> +	case 0:
>>> +		r = ioport__register(kvm, bar_addr, &virtio_pci__io_ops,
>>> +				     bar_size, vdev);
>>> +		if (r > 0)
>>> +			r = 0;
>>> +		break;
>>> +	case 1:
>>> +		r =  kvm__register_mmio(kvm, bar_addr, bar_size, false,
>>> +					virtio_pci__io_mmio_callback, vdev);
>>> +		break;
>>> +	case 2:
>>> +		r =  kvm__register_mmio(kvm, bar_addr, bar_size, false,
>>> +					virtio_pci__msix_mmio_callback, vdev);
>>> +		break;
>>> +	default:
>>> +		r = -EINVAL;
>>> +	}
>>> +
>>> +	return r;
>>> +}
>>> +
>>> +static int virtio_pci__bar_deactivate(struct kvm *kvm,
>>> +				      struct pci_device_header *pci_hdr,
>>> +				      int bar_num, void *data)
>>> +{
>>> +	u32 bar_addr;
>>> +	bool success;
>>> +	int r;
>>> +
>>> +	bar_addr = pci__bar_address(pci_hdr, bar_num);
>>> +
>>> +	switch (bar_num) {
>>> +	case 0:
>>> +		r = ioport__unregister(kvm, bar_addr);
>>> +		break;
>>> +	case 1:
>>> +	case 2:
>>> +		success = kvm__deregister_mmio(kvm, bar_addr);
>>> +		r = (success ? 0 : -EINVAL);
>>> +		break;
>>> +	default:
>>> +		r = -EINVAL;
>>> +	}
>>> +
>>> +	return r;
>>> +}
>>> +
>>>  int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
>>>  		     int device_id, int subsys_id, int class)
>>>  {
>>> @@ -488,23 +547,8 @@ int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
>>>  	BUILD_BUG_ON(!is_power_of_two(PCI_IO_SIZE));
>>>  
>>>  	port_addr = pci_get_io_port_block(PCI_IO_SIZE);
>>> -	r = ioport__register(kvm, port_addr, &virtio_pci__io_ops, PCI_IO_SIZE,
>>> -			     vdev);
>>> -	if (r < 0)
>>> -		return r;
>>> -	port_addr = (u16)r;
>>> -
>>>  	mmio_addr = pci_get_mmio_block(PCI_IO_SIZE);
>>> -	r = kvm__register_mmio(kvm, mmio_addr, PCI_IO_SIZE, false,
>>> -			       virtio_pci__io_mmio_callback, vdev);
>>> -	if (r < 0)
>>> -		goto free_ioport;
>>> -
>>>  	msix_io_block = pci_get_mmio_block(PCI_IO_SIZE * 2);
>>> -	r = kvm__register_mmio(kvm, msix_io_block, PCI_IO_SIZE * 2, false,
>>> -			       virtio_pci__msix_mmio_callback, vdev);
>>> -	if (r < 0)
>>> -		goto free_mmio;
>>>  
>>>  	vpci->pci_hdr = (struct pci_device_header) {
>>>  		.vendor_id		= cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET),
>>> @@ -530,6 +574,12 @@ int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
>>>  		.bar_size[2]		= cpu_to_le32(PCI_IO_SIZE*2),
>>>  	};
>>>  
>>> +	r = pci__register_bar_regions(kvm, &vpci->pci_hdr,
>>> +				      virtio_pci__bar_activate,
>>> +				      virtio_pci__bar_deactivate, vdev);
>>> +	if (r < 0)
>>> +		return r;
>>> +
>>>  	vpci->dev_hdr = (struct device_header) {
>>>  		.bus_type		= DEVICE_BUS_PCI,
>>>  		.data			= &vpci->pci_hdr,
>>> @@ -560,20 +610,12 @@ int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
>>>  
>>>  	r = device__register(&vpci->dev_hdr);
>>>  	if (r < 0)
>>> -		goto free_msix_mmio;
>>> +		return r;
>>>  
>>>  	/* save the IRQ that device__register() has allocated */
>>>  	vpci->legacy_irq_line = vpci->pci_hdr.irq_line;
>>>  
>>>  	return 0;
>>> -
>>> -free_msix_mmio:
>>> -	kvm__deregister_mmio(kvm, msix_io_block);
>>> -free_mmio:
>>> -	kvm__deregister_mmio(kvm, mmio_addr);
>>> -free_ioport:
>>> -	ioport__unregister(kvm, port_addr);
>>> -	return r;
>>>  }
>>>  
>>>  int virtio_pci__reset(struct kvm *kvm, struct virtio_device *vdev)
Alexandru Elisei March 11, 2020, 4:16 p.m. UTC | #4
Hi,

On 2/7/20 10:12 AM, Alexandru Elisei wrote:
> Hi,
>
> On 2/6/20 6:21 PM, Andre Przywara wrote:
>> On Thu, 23 Jan 2020 13:48:00 +0000
>> Alexandru Elisei <alexandru.elisei@arm.com> wrote:
>>
>> Hi,
>>
>>> Implement callbacks for activating and deactivating emulation for a BAR
>>> region. This is in preparation for allowing a guest operating system to
>>> enable and disable access to I/O or memory space, or to reassign the
>>> BARs.
>>>
>>> The emulated vesa device has been refactored in the process and the static
>>> variables were removed in order to make using the callbacks less painful.
>>> The framebuffer isn't designed to allow stopping and restarting at
>>> arbitrary points in the guest execution. Furthermore, on x86, the kernel
>>> will not change the BAR addresses, which on bare metal are programmed by
>>> the firmware, so take the easy way out and refuse to deactivate emulation
>>> for the BAR regions.
>>>
>>> Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
>>> ---
>>>  hw/vesa.c         | 120 ++++++++++++++++++++++++++++++++--------------
>>>  include/kvm/pci.h |  19 +++++++-
>>>  pci.c             |  44 +++++++++++++++++
>>>  vfio/pci.c        | 100 +++++++++++++++++++++++++++++++-------
>>>  virtio/pci.c      |  90 ++++++++++++++++++++++++----------
>>>  5 files changed, 294 insertions(+), 79 deletions(-)
>>>
>>> diff --git a/hw/vesa.c b/hw/vesa.c
>>> index e988c0425946..74ebebbefa6b 100644
>>> --- a/hw/vesa.c
>>> +++ b/hw/vesa.c
>>> @@ -18,6 +18,12 @@
>>>  #include <inttypes.h>
>>>  #include <unistd.h>
>>>  
>>> +struct vesa_dev {
>>> +	struct pci_device_header	pci_hdr;
>>> +	struct device_header		dev_hdr;
>>> +	struct framebuffer		fb;
>>> +};
>>> +
>>>  static bool vesa_pci_io_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size)
>>>  {
>>>  	return true;
>>> @@ -33,29 +39,52 @@ static struct ioport_operations vesa_io_ops = {
>>>  	.io_out			= vesa_pci_io_out,
>>>  };
>>>  
>>> -static struct pci_device_header vesa_pci_device = {
>>> -	.vendor_id		= cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET),
>>> -	.device_id		= cpu_to_le16(PCI_DEVICE_ID_VESA),
>>> -	.header_type		= PCI_HEADER_TYPE_NORMAL,
>>> -	.revision_id		= 0,
>>> -	.class[2]		= 0x03,
>>> -	.subsys_vendor_id	= cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET),
>>> -	.subsys_id		= cpu_to_le16(PCI_SUBSYSTEM_ID_VESA),
>>> -	.bar[1]			= cpu_to_le32(VESA_MEM_ADDR | PCI_BASE_ADDRESS_SPACE_MEMORY),
>>> -	.bar_size[1]		= VESA_MEM_SIZE,
>>> -};
>>> +static int vesa__bar_activate(struct kvm *kvm,
>>> +			      struct pci_device_header *pci_hdr,
>>> +			      int bar_num, void *data)
>>> +{
>>> +	struct vesa_dev *vdev = data;
>>> +	u32 bar_addr, bar_size;
>>> +	char *mem;
>>> +	int r;
>>>  
>>> -static struct device_header vesa_device = {
>>> -	.bus_type	= DEVICE_BUS_PCI,
>>> -	.data		= &vesa_pci_device,
>>> -};
>>> +	bar_addr = pci__bar_address(pci_hdr, bar_num);
>>> +	bar_size = pci_hdr->bar_size[bar_num];
>>>  
>>> -static struct framebuffer vesafb;
>>> +	switch (bar_num) {
>>> +	case 0:
>>> +		r = ioport__register(kvm, bar_addr, &vesa_io_ops, bar_size,
>>> +				     NULL);
>>> +		break;
>>> +	case 1:
>>> +		mem = mmap(NULL, bar_size, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
>>> +		if (mem == MAP_FAILED) {
>>> +			r = -errno;
>>> +			break;
>>> +		}
>>> +		r = kvm__register_dev_mem(kvm, bar_addr, bar_size, mem);
>>> +		if (r < 0)
>>> +			break;
>>> +		vdev->fb.mem = mem;
>>> +		break;
>>> +	default:
>>> +		r = -EINVAL;
>>> +	}
>>> +
>>> +	return r;
>>> +}
>>> +
>>> +static int vesa__bar_deactivate(struct kvm *kvm,
>>> +				struct pci_device_header *pci_hdr,
>>> +				int bar_num, void *data)
>>> +{
>>> +	return -EINVAL;
>>> +}
>>>  
>>>  struct framebuffer *vesa__init(struct kvm *kvm)
>>>  {
>>> -	u16 vesa_base_addr;
>>> -	char *mem;
>>> +	struct vesa_dev *vdev;
>>> +	u16 port_addr;
>>>  	int r;
>>>  
>>>  	BUILD_BUG_ON(!is_power_of_two(VESA_MEM_SIZE));
>>> @@ -63,34 +92,51 @@ struct framebuffer *vesa__init(struct kvm *kvm)
>>>  
>>>  	if (!kvm->cfg.vnc && !kvm->cfg.sdl && !kvm->cfg.gtk)
>>>  		return NULL;
>>> -	r = pci_get_io_port_block(PCI_IO_SIZE);
>>> -	r = ioport__register(kvm, r, &vesa_io_ops, PCI_IO_SIZE, NULL);
>>> -	if (r < 0)
>>> -		return ERR_PTR(r);
>>>  
>>> -	vesa_base_addr			= (u16)r;
>>> -	vesa_pci_device.bar[0]		= cpu_to_le32(vesa_base_addr | PCI_BASE_ADDRESS_SPACE_IO);
>>> -	vesa_pci_device.bar_size[0]	= PCI_IO_SIZE;
>>> -	r = device__register(&vesa_device);
>>> -	if (r < 0)
>>> -		return ERR_PTR(r);
>>> +	vdev = calloc(1, sizeof(*vdev));
>>> +	if (vdev == NULL)
>>> +		return ERR_PTR(-ENOMEM);
>> Is it really necessary to allocate this here? You never free this, and I don't see how you could actually do this. AFAICS conceptually there can be only one VESA device? So maybe have a static variable above and use that instead of passing the pointer around? Or use &vdev if you need a pointer argument for the callbacks.
> As far as I can tell, there can be only one VESA device, yes. I was following the
> same pattern from virtio/{net,blk,rng,scsi,9p}.c, which I prefer because it's
> explicit what function can access the device. What's wrong with passing the
> pointer around? The entire PCI emulation code works like that.
>
I took a closer look at this patch and it turns out that vesa__bar_activate is
called exactly once, at initialization, because vesa__bar_deactivate doesn't
actually deactivate emulation (and returns an error to let the PCI emulation code
know that). I will drop the changes to hw/vesa.c and keep the vesa device static
and framebuffer creation in vesa__init.

Thanks,
Alex
diff mbox series

Patch

diff --git a/hw/vesa.c b/hw/vesa.c
index e988c0425946..74ebebbefa6b 100644
--- a/hw/vesa.c
+++ b/hw/vesa.c
@@ -18,6 +18,12 @@ 
 #include <inttypes.h>
 #include <unistd.h>
 
+struct vesa_dev {
+	struct pci_device_header	pci_hdr;
+	struct device_header		dev_hdr;
+	struct framebuffer		fb;
+};
+
 static bool vesa_pci_io_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size)
 {
 	return true;
@@ -33,29 +39,52 @@  static struct ioport_operations vesa_io_ops = {
 	.io_out			= vesa_pci_io_out,
 };
 
-static struct pci_device_header vesa_pci_device = {
-	.vendor_id		= cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET),
-	.device_id		= cpu_to_le16(PCI_DEVICE_ID_VESA),
-	.header_type		= PCI_HEADER_TYPE_NORMAL,
-	.revision_id		= 0,
-	.class[2]		= 0x03,
-	.subsys_vendor_id	= cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET),
-	.subsys_id		= cpu_to_le16(PCI_SUBSYSTEM_ID_VESA),
-	.bar[1]			= cpu_to_le32(VESA_MEM_ADDR | PCI_BASE_ADDRESS_SPACE_MEMORY),
-	.bar_size[1]		= VESA_MEM_SIZE,
-};
+static int vesa__bar_activate(struct kvm *kvm,
+			      struct pci_device_header *pci_hdr,
+			      int bar_num, void *data)
+{
+	struct vesa_dev *vdev = data;
+	u32 bar_addr, bar_size;
+	char *mem;
+	int r;
 
-static struct device_header vesa_device = {
-	.bus_type	= DEVICE_BUS_PCI,
-	.data		= &vesa_pci_device,
-};
+	bar_addr = pci__bar_address(pci_hdr, bar_num);
+	bar_size = pci_hdr->bar_size[bar_num];
 
-static struct framebuffer vesafb;
+	switch (bar_num) {
+	case 0:
+		r = ioport__register(kvm, bar_addr, &vesa_io_ops, bar_size,
+				     NULL);
+		break;
+	case 1:
+		mem = mmap(NULL, bar_size, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
+		if (mem == MAP_FAILED) {
+			r = -errno;
+			break;
+		}
+		r = kvm__register_dev_mem(kvm, bar_addr, bar_size, mem);
+		if (r < 0)
+			break;
+		vdev->fb.mem = mem;
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+	return r;
+}
+
+static int vesa__bar_deactivate(struct kvm *kvm,
+				struct pci_device_header *pci_hdr,
+				int bar_num, void *data)
+{
+	return -EINVAL;
+}
 
 struct framebuffer *vesa__init(struct kvm *kvm)
 {
-	u16 vesa_base_addr;
-	char *mem;
+	struct vesa_dev *vdev;
+	u16 port_addr;
 	int r;
 
 	BUILD_BUG_ON(!is_power_of_two(VESA_MEM_SIZE));
@@ -63,34 +92,51 @@  struct framebuffer *vesa__init(struct kvm *kvm)
 
 	if (!kvm->cfg.vnc && !kvm->cfg.sdl && !kvm->cfg.gtk)
 		return NULL;
-	r = pci_get_io_port_block(PCI_IO_SIZE);
-	r = ioport__register(kvm, r, &vesa_io_ops, PCI_IO_SIZE, NULL);
-	if (r < 0)
-		return ERR_PTR(r);
 
-	vesa_base_addr			= (u16)r;
-	vesa_pci_device.bar[0]		= cpu_to_le32(vesa_base_addr | PCI_BASE_ADDRESS_SPACE_IO);
-	vesa_pci_device.bar_size[0]	= PCI_IO_SIZE;
-	r = device__register(&vesa_device);
-	if (r < 0)
-		return ERR_PTR(r);
+	vdev = calloc(1, sizeof(*vdev));
+	if (vdev == NULL)
+		return ERR_PTR(-ENOMEM);
 
-	mem = mmap(NULL, VESA_MEM_SIZE, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
-	if (mem == MAP_FAILED)
-		return ERR_PTR(-errno);
+	port_addr = pci_get_io_port_block(PCI_IO_SIZE);
 
-	r = kvm__register_dev_mem(kvm, VESA_MEM_ADDR, VESA_MEM_SIZE, mem);
-	if (r < 0)
-		return ERR_PTR(r);
+	vdev->pci_hdr = (struct pci_device_header) {
+		.vendor_id		= cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET),
+		.device_id		= cpu_to_le16(PCI_DEVICE_ID_VESA),
+		.command		= PCI_COMMAND_IO | PCI_COMMAND_MEMORY,
+		.header_type		= PCI_HEADER_TYPE_NORMAL,
+		.revision_id		= 0,
+		.class[2]		= 0x03,
+		.subsys_vendor_id	= cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET),
+		.subsys_id		= cpu_to_le16(PCI_SUBSYSTEM_ID_VESA),
+		.bar[0]			= cpu_to_le32(port_addr | PCI_BASE_ADDRESS_SPACE_IO),
+		.bar_size[0]		= PCI_IO_SIZE,
+		.bar[1]			= cpu_to_le32(VESA_MEM_ADDR | PCI_BASE_ADDRESS_SPACE_MEMORY),
+		.bar_size[1]		= VESA_MEM_SIZE,
+	};
 
-	vesafb = (struct framebuffer) {
+	vdev->fb = (struct framebuffer) {
 		.width			= VESA_WIDTH,
 		.height			= VESA_HEIGHT,
 		.depth			= VESA_BPP,
-		.mem			= mem,
+		.mem			= NULL,
 		.mem_addr		= VESA_MEM_ADDR,
 		.mem_size		= VESA_MEM_SIZE,
 		.kvm			= kvm,
 	};
-	return fb__register(&vesafb);
+
+	r = pci__register_bar_regions(kvm, &vdev->pci_hdr, vesa__bar_activate,
+				      vesa__bar_deactivate, vdev);
+	if (r < 0)
+		return ERR_PTR(r);
+
+	vdev->dev_hdr = (struct device_header) {
+		.bus_type       = DEVICE_BUS_PCI,
+		.data           = &vdev->pci_hdr,
+	};
+
+	r = device__register(&vdev->dev_hdr);
+	if (r < 0)
+		return ERR_PTR(r);
+
+	return fb__register(&vdev->fb);
 }
diff --git a/include/kvm/pci.h b/include/kvm/pci.h
index 235cd82fff3c..bf42f497168f 100644
--- a/include/kvm/pci.h
+++ b/include/kvm/pci.h
@@ -89,12 +89,19 @@  struct pci_cap_hdr {
 	u8	next;
 };
 
+struct pci_device_header;
+
+typedef int (*bar_activate_fn_t)(struct kvm *kvm,
+				 struct pci_device_header *pci_hdr,
+				 int bar_num, void *data);
+typedef int (*bar_deactivate_fn_t)(struct kvm *kvm,
+				   struct pci_device_header *pci_hdr,
+				   int bar_num, void *data);
+
 #define PCI_BAR_OFFSET(b)	(offsetof(struct pci_device_header, bar[b]))
 #define PCI_DEV_CFG_SIZE	256
 #define PCI_DEV_CFG_MASK	(PCI_DEV_CFG_SIZE - 1)
 
-struct pci_device_header;
-
 struct pci_config_operations {
 	void (*write)(struct kvm *kvm, struct pci_device_header *pci_hdr,
 		      u8 offset, void *data, int sz);
@@ -136,6 +143,9 @@  struct pci_device_header {
 
 	/* Private to lkvm */
 	u32		bar_size[6];
+	bar_activate_fn_t	bar_activate_fn;
+	bar_deactivate_fn_t	bar_deactivate_fn;
+	void *data;
 	struct pci_config_operations	cfg_ops;
 	/*
 	 * PCI INTx# are level-triggered, but virtual device often feature
@@ -160,8 +170,13 @@  void pci__assign_irq(struct device_header *dev_hdr);
 void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void *data, int size);
 void pci__config_rd(struct kvm *kvm, union pci_config_address addr, void *data, int size);
 
+
 void *pci_find_cap(struct pci_device_header *hdr, u8 cap_type);
 
+int pci__register_bar_regions(struct kvm *kvm, struct pci_device_header *pci_hdr,
+			      bar_activate_fn_t bar_activate_fn,
+			      bar_deactivate_fn_t bar_deactivate_fn, void *data);
+
 static inline bool __pci__memory_space_enabled(u16 command)
 {
 	return command & PCI_COMMAND_MEMORY;
diff --git a/pci.c b/pci.c
index 4f7b863298f6..5412f2defa2e 100644
--- a/pci.c
+++ b/pci.c
@@ -66,6 +66,11 @@  void pci__assign_irq(struct device_header *dev_hdr)
 		pci_hdr->irq_type = IRQ_TYPE_EDGE_RISING;
 }
 
+static bool pci_bar_is_implemented(struct pci_device_header *pci_hdr, int bar_num)
+{
+	return  bar_num < 6 && pci_hdr->bar_size[bar_num];
+}
+
 static void *pci_config_address_ptr(u16 port)
 {
 	unsigned long offset;
@@ -264,6 +269,45 @@  struct pci_device_header *pci__find_dev(u8 dev_num)
 	return hdr->data;
 }
 
+int pci__register_bar_regions(struct kvm *kvm, struct pci_device_header *pci_hdr,
+			      bar_activate_fn_t bar_activate_fn,
+			      bar_deactivate_fn_t bar_deactivate_fn, void *data)
+{
+	int i, r;
+	bool has_bar_regions = false;
+
+	assert(bar_activate_fn && bar_deactivate_fn);
+
+	pci_hdr->bar_activate_fn = bar_activate_fn;
+	pci_hdr->bar_deactivate_fn = bar_deactivate_fn;
+	pci_hdr->data = data;
+
+	for (i = 0; i < 6; i++) {
+		if (!pci_bar_is_implemented(pci_hdr, i))
+			continue;
+
+		has_bar_regions = true;
+
+		if (pci__bar_is_io(pci_hdr, i) &&
+		    pci__io_space_enabled(pci_hdr)) {
+				r = bar_activate_fn(kvm, pci_hdr, i, data);
+				if (r < 0)
+					return r;
+			}
+
+		if (pci__bar_is_memory(pci_hdr, i) &&
+		    pci__memory_space_enabled(pci_hdr)) {
+				r = bar_activate_fn(kvm, pci_hdr, i, data);
+				if (r < 0)
+					return r;
+			}
+	}
+
+	assert(has_bar_regions);
+
+	return 0;
+}
+
 int pci__init(struct kvm *kvm)
 {
 	int r;
diff --git a/vfio/pci.c b/vfio/pci.c
index 8a775a4a4a54..9e595562180b 100644
--- a/vfio/pci.c
+++ b/vfio/pci.c
@@ -446,6 +446,83 @@  out_unlock:
 	mutex_unlock(&pdev->msi.mutex);
 }
 
+static int vfio_pci_bar_activate(struct kvm *kvm,
+				 struct pci_device_header *pci_hdr,
+				 int bar_num, void *data)
+{
+	struct vfio_device *vdev = data;
+	struct vfio_pci_device *pdev = &vdev->pci;
+	struct vfio_pci_msix_pba *pba = &pdev->msix_pba;
+	struct vfio_pci_msix_table *table = &pdev->msix_table;
+	struct vfio_region *region = &vdev->regions[bar_num];
+	int ret;
+
+	if (!region->info.size) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if ((pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) &&
+	    (u32)bar_num == table->bar) {
+		ret = kvm__register_mmio(kvm, table->guest_phys_addr,
+					 table->size, false,
+					 vfio_pci_msix_table_access, pdev);
+		if (ret < 0 || table->bar!= pba->bar)
+			goto out;
+	}
+
+	if ((pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) &&
+	    (u32)bar_num == pba->bar) {
+		ret = kvm__register_mmio(kvm, pba->guest_phys_addr,
+					 pba->size, false,
+					 vfio_pci_msix_pba_access, pdev);
+		goto out;
+	}
+
+	ret = vfio_map_region(kvm, vdev, region);
+out:
+	return ret;
+}
+
+static int vfio_pci_bar_deactivate(struct kvm *kvm,
+				   struct pci_device_header *pci_hdr,
+				   int bar_num, void *data)
+{
+	struct vfio_device *vdev = data;
+	struct vfio_pci_device *pdev = &vdev->pci;
+	struct vfio_pci_msix_pba *pba = &pdev->msix_pba;
+	struct vfio_pci_msix_table *table = &pdev->msix_table;
+	struct vfio_region *region = &vdev->regions[bar_num];
+	int ret;
+	bool success;
+
+	if (!region->info.size) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if ((pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) &&
+	    (u32)bar_num == table->bar) {
+		success = kvm__deregister_mmio(kvm, table->guest_phys_addr);
+		ret = (success ? 0 : -EINVAL);
+		if (ret < 0 || table->bar!= pba->bar)
+			goto out;
+	}
+
+	if ((pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) &&
+	    (u32)bar_num == pba->bar) {
+		success = kvm__deregister_mmio(kvm, pba->guest_phys_addr);
+		ret = (success ? 0 : -EINVAL);
+		goto out;
+	}
+
+	vfio_unmap_region(kvm, region);
+	ret = 0;
+
+out:
+	return ret;
+}
+
 static void vfio_pci_cfg_read(struct kvm *kvm, struct pci_device_header *pci_hdr,
 			      u8 offset, void *data, int sz)
 {
@@ -804,12 +881,6 @@  static int vfio_pci_create_msix_table(struct kvm *kvm, struct vfio_device *vdev)
 		ret = -ENOMEM;
 		goto out_free;
 	}
-	pba->guest_phys_addr = table->guest_phys_addr + table->size;
-
-	ret = kvm__register_mmio(kvm, table->guest_phys_addr, table->size,
-				 false, vfio_pci_msix_table_access, pdev);
-	if (ret < 0)
-		goto out_free;
 
 	/*
 	 * We could map the physical PBA directly into the guest, but it's
@@ -819,10 +890,7 @@  static int vfio_pci_create_msix_table(struct kvm *kvm, struct vfio_device *vdev)
 	 * between MSI-X table and PBA. For the sake of isolation, create a
 	 * virtual PBA.
 	 */
-	ret = kvm__register_mmio(kvm, pba->guest_phys_addr, pba->size, false,
-				 vfio_pci_msix_pba_access, pdev);
-	if (ret < 0)
-		goto out_free;
+	pba->guest_phys_addr = table->guest_phys_addr + table->size;
 
 	pdev->msix.entries = entries;
 	pdev->msix.nr_entries = nr_entries;
@@ -893,11 +961,6 @@  static int vfio_pci_configure_bar(struct kvm *kvm, struct vfio_device *vdev,
 		region->guest_phys_addr = pci_get_mmio_block(map_size);
 	}
 
-	/* Map the BARs into the guest or setup a trap region. */
-	ret = vfio_map_region(kvm, vdev, region);
-	if (ret)
-		return ret;
-
 	return 0;
 }
 
@@ -944,7 +1007,12 @@  static int vfio_pci_configure_dev_regions(struct kvm *kvm,
 	}
 
 	/* We've configured the BARs, fake up a Configuration Space */
-	return vfio_pci_fixup_cfg_space(vdev);
+	ret = vfio_pci_fixup_cfg_space(vdev);
+	if (ret)
+		return ret;
+
+	return pci__register_bar_regions(kvm, &pdev->hdr, vfio_pci_bar_activate,
+					 vfio_pci_bar_deactivate, vdev);
 }
 
 /*
diff --git a/virtio/pci.c b/virtio/pci.c
index c4822514856c..5a3cc6f1e943 100644
--- a/virtio/pci.c
+++ b/virtio/pci.c
@@ -474,6 +474,65 @@  static void virtio_pci__io_mmio_callback(struct kvm_cpu *vcpu,
 		virtio_pci__data_out(vcpu, vdev, addr - mmio_addr, data, len);
 }
 
+static int virtio_pci__bar_activate(struct kvm *kvm,
+				    struct pci_device_header *pci_hdr,
+				    int bar_num, void *data)
+{
+	struct virtio_device *vdev = data;
+	u32 bar_addr, bar_size;
+	int r;
+
+	bar_addr = pci__bar_address(pci_hdr, bar_num);
+	bar_size = pci_hdr->bar_size[bar_num];
+
+	switch (bar_num) {
+	case 0:
+		r = ioport__register(kvm, bar_addr, &virtio_pci__io_ops,
+				     bar_size, vdev);
+		if (r > 0)
+			r = 0;
+		break;
+	case 1:
+		r =  kvm__register_mmio(kvm, bar_addr, bar_size, false,
+					virtio_pci__io_mmio_callback, vdev);
+		break;
+	case 2:
+		r =  kvm__register_mmio(kvm, bar_addr, bar_size, false,
+					virtio_pci__msix_mmio_callback, vdev);
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+	return r;
+}
+
+static int virtio_pci__bar_deactivate(struct kvm *kvm,
+				      struct pci_device_header *pci_hdr,
+				      int bar_num, void *data)
+{
+	u32 bar_addr;
+	bool success;
+	int r;
+
+	bar_addr = pci__bar_address(pci_hdr, bar_num);
+
+	switch (bar_num) {
+	case 0:
+		r = ioport__unregister(kvm, bar_addr);
+		break;
+	case 1:
+	case 2:
+		success = kvm__deregister_mmio(kvm, bar_addr);
+		r = (success ? 0 : -EINVAL);
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+	return r;
+}
+
 int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
 		     int device_id, int subsys_id, int class)
 {
@@ -488,23 +547,8 @@  int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
 	BUILD_BUG_ON(!is_power_of_two(PCI_IO_SIZE));
 
 	port_addr = pci_get_io_port_block(PCI_IO_SIZE);
-	r = ioport__register(kvm, port_addr, &virtio_pci__io_ops, PCI_IO_SIZE,
-			     vdev);
-	if (r < 0)
-		return r;
-	port_addr = (u16)r;
-
 	mmio_addr = pci_get_mmio_block(PCI_IO_SIZE);
-	r = kvm__register_mmio(kvm, mmio_addr, PCI_IO_SIZE, false,
-			       virtio_pci__io_mmio_callback, vdev);
-	if (r < 0)
-		goto free_ioport;
-
 	msix_io_block = pci_get_mmio_block(PCI_IO_SIZE * 2);
-	r = kvm__register_mmio(kvm, msix_io_block, PCI_IO_SIZE * 2, false,
-			       virtio_pci__msix_mmio_callback, vdev);
-	if (r < 0)
-		goto free_mmio;
 
 	vpci->pci_hdr = (struct pci_device_header) {
 		.vendor_id		= cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET),
@@ -530,6 +574,12 @@  int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
 		.bar_size[2]		= cpu_to_le32(PCI_IO_SIZE*2),
 	};
 
+	r = pci__register_bar_regions(kvm, &vpci->pci_hdr,
+				      virtio_pci__bar_activate,
+				      virtio_pci__bar_deactivate, vdev);
+	if (r < 0)
+		return r;
+
 	vpci->dev_hdr = (struct device_header) {
 		.bus_type		= DEVICE_BUS_PCI,
 		.data			= &vpci->pci_hdr,
@@ -560,20 +610,12 @@  int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
 
 	r = device__register(&vpci->dev_hdr);
 	if (r < 0)
-		goto free_msix_mmio;
+		return r;
 
 	/* save the IRQ that device__register() has allocated */
 	vpci->legacy_irq_line = vpci->pci_hdr.irq_line;
 
 	return 0;
-
-free_msix_mmio:
-	kvm__deregister_mmio(kvm, msix_io_block);
-free_mmio:
-	kvm__deregister_mmio(kvm, mmio_addr);
-free_ioport:
-	ioport__unregister(kvm, port_addr);
-	return r;
 }
 
 int virtio_pci__reset(struct kvm *kvm, struct virtio_device *vdev)