Message ID | 20230720003205.1828537-6-volodymyr_babchuk@epam.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | PCI devices passthrough on Arm, part 3 | expand |
On Thu, Jul 20, 2023 at 12:32:32AM +0000, Volodymyr Babchuk wrote: > From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > > Add relevant vpci register handlers when assigning PCI device to a domain > and remove those when de-assigning. This allows having different > handlers for different domains, e.g. hwdom and other guests. > > Emulate guest BAR register values: this allows creating a guest view > of the registers and emulates size and properties probe as it is done > during PCI device enumeration by the guest. > > All empty, IO and ROM BARs for guests are emulated by returning 0 on > reads and ignoring writes: this BARs are special with this respect as > their lower bits have special meaning, so returning default ~0 on read > may confuse guest OS. > > Memory decoding is initially disabled when used by guests in order to > prevent the BAR being placed on top of a RAM region. I'm kind of lost on this last sentence, as I don't see the patch explicitly disabling PCI_COMMAND_MEMORY form the command register. Is that more of an expectation on the initial device state? Maybe there should be some checking in that case then? > > Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > --- > > Since v6: > - unify the writing of the PCI_COMMAND register on the > error path into a label > - do not introduce bar_ignore_access helper and open code > - s/guest_bar_ignore_read/empty_bar_read > - update error message in guest_bar_write > - only setup empty_bar_read for IO if !x86 > Since v5: > - make sure that the guest set address has the same page offset > as the physical address on the host > - remove guest_rom_{read|write} as those just implement the default > behaviour of the registers not being handled > - adjusted comment for struct vpci.addr field > - add guest handlers for BARs which are not handled and will otherwise > return ~0 on read and ignore writes. The BARs are special with this > respect as their lower bits have special meaning, so returning ~0 > doesn't seem to be right > Since v4: > - updated commit message > - s/guest_addr/guest_reg > Since v3: > - squashed two patches: dynamic add/remove handlers and guest BAR > handler implementation > - fix guest BAR read of the high part of a 64bit BAR (Roger) > - add error handling to vpci_assign_device > - s/dom%pd/%pd > - blank line before return > Since v2: > - remove unneeded ifdefs for CONFIG_HAS_VPCI_GUEST_SUPPORT as more code > has been eliminated from being built on x86 > Since v1: > - constify struct pci_dev where possible > - do not open code is_system_domain() > - simplify some code3. simplify > - use gdprintk + error code instead of gprintk > - gate vpci_bar_{add|remove}_handlers with CONFIG_HAS_VPCI_GUEST_SUPPORT, > so these do not get compiled for x86 > - removed unneeded is_system_domain check > - re-work guest read/write to be much simpler and do more work on write > than read which is expected to be called more frequently > - removed one too obvious comment > --- > xen/drivers/vpci/header.c | 156 +++++++++++++++++++++++++++++++------- > xen/include/xen/vpci.h | 3 + > 2 files changed, 130 insertions(+), 29 deletions(-) > > diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c > index 2780fcae72..5dc9b5338b 100644 > --- a/xen/drivers/vpci/header.c > +++ b/xen/drivers/vpci/header.c > @@ -457,6 +457,71 @@ static void cf_check bar_write( > pci_conf_write32(pdev->sbdf, reg, val); > } > > +static void cf_check guest_bar_write(const struct pci_dev *pdev, > + unsigned int reg, uint32_t val, void *data) > +{ > + struct vpci_bar *bar = data; > + bool hi = false; > + uint64_t guest_reg = bar->guest_reg; > + > + if ( bar->type == VPCI_BAR_MEM64_HI ) > + { > + ASSERT(reg > PCI_BASE_ADDRESS_0); > + bar--; > + hi = true; > + } > + else > + { > + val &= PCI_BASE_ADDRESS_MEM_MASK; > + val |= bar->type == VPCI_BAR_MEM32 ? PCI_BASE_ADDRESS_MEM_TYPE_32 > + : PCI_BASE_ADDRESS_MEM_TYPE_64; > + val |= bar->prefetchable ? PCI_BASE_ADDRESS_MEM_PREFETCH : 0; > + } > + > + guest_reg &= ~(0xffffffffull << (hi ? 32 : 0)); > + guest_reg |= (uint64_t)val << (hi ? 32 : 0); > + > + guest_reg &= ~(bar->size - 1) | ~PCI_BASE_ADDRESS_MEM_MASK; > + > + /* > + * Make sure that the guest set address has the same page offset > + * as the physical address on the host or otherwise things won't work as > + * expected. > + */ > + if ( (guest_reg & (~PAGE_MASK & PCI_BASE_ADDRESS_MEM_MASK)) != > + (bar->addr & ~PAGE_MASK) ) > + { > + gprintk(XENLOG_WARNING, > + "%pp: ignored BAR %zu write attempting to change page offset\n", > + &pdev->sbdf, bar - pdev->vpci->header.bars + hi); > + return; > + } > + > + bar->guest_reg = guest_reg; > +} > + > +static uint32_t cf_check guest_bar_read(const struct pci_dev *pdev, > + unsigned int reg, void *data) > +{ > + const struct vpci_bar *bar = data; > + bool hi = false; > + > + if ( bar->type == VPCI_BAR_MEM64_HI ) > + { > + ASSERT(reg > PCI_BASE_ADDRESS_0); > + bar--; > + hi = true; > + } > + > + return bar->guest_reg >> (hi ? 32 : 0); > +} > + > +static uint32_t cf_check empty_bar_read(const struct pci_dev *pdev, > + unsigned int reg, void *data) > +{ > + return 0; > +} > + > static void cf_check rom_write( > const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data) > { > @@ -517,6 +582,7 @@ static int cf_check init_bars(struct pci_dev *pdev) > struct vpci_header *header = &pdev->vpci->header; > struct vpci_bar *bars = header->bars; > int rc; > + bool is_hwdom = is_hardware_domain(pdev->domain); > > ASSERT(rw_is_locked(&pdev->domain->pci_lock)); > > @@ -558,13 +624,12 @@ static int cf_check init_bars(struct pci_dev *pdev) > if ( i && bars[i - 1].type == VPCI_BAR_MEM64_LO ) > { > bars[i].type = VPCI_BAR_MEM64_HI; > - rc = vpci_add_register(pdev->vpci, vpci_hw_read32, bar_write, reg, > - 4, &bars[i]); > + rc = vpci_add_register(pdev->vpci, > + is_hwdom ? vpci_hw_read32 : guest_bar_read, > + is_hwdom ? bar_write : guest_bar_write, > + reg, 4, &bars[i]); > if ( rc ) > - { > - pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd); > - return rc; > - } > + goto fail; > > continue; > } > @@ -573,6 +638,17 @@ static int cf_check init_bars(struct pci_dev *pdev) > if ( (val & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO ) > { > bars[i].type = VPCI_BAR_IO; > + > +#ifndef CONFIG_X86 > + if ( !is_hwdom ) > + { > + rc = vpci_add_register(pdev->vpci, empty_bar_read, NULL, > + reg, 4, &bars[i]); For an empty BAR there's no need to pass &bars[i] around? (same for all callers that setup empty_bar_read() handlers. > + if ( rc ) > + goto fail; > + } > +#endif This might be better done as an IS_ENABLED() check in the introduced if condition. Need a bit of a description as to why IO space BARs are handled as empty BARs for domUs. > + > continue; > } > if ( (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == > @@ -584,14 +660,20 @@ static int cf_check init_bars(struct pci_dev *pdev) > rc = pci_size_mem_bar(pdev->sbdf, reg, &addr, &size, > (i == num_bars - 1) ? PCI_BAR_LAST : 0); > if ( rc < 0 ) > - { > - pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd); > - return rc; > - } > + goto fail; > > if ( size == 0 ) > { > bars[i].type = VPCI_BAR_EMPTY; > + > + if ( !is_hwdom ) > + { > + rc = vpci_add_register(pdev->vpci, empty_bar_read, NULL, > + reg, 4, &bars[i]); > + if ( rc ) > + goto fail; > + } > + > continue; > } > > @@ -599,34 +681,50 @@ static int cf_check init_bars(struct pci_dev *pdev) > bars[i].size = size; > bars[i].prefetchable = val & PCI_BASE_ADDRESS_MEM_PREFETCH; > > - rc = vpci_add_register(pdev->vpci, vpci_hw_read32, bar_write, reg, 4, > - &bars[i]); > + rc = vpci_add_register(pdev->vpci, > + is_hwdom ? vpci_hw_read32 : guest_bar_read, > + is_hwdom ? bar_write : guest_bar_write, > + reg, 4, &bars[i]); > if ( rc ) > - { > - pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd); > - return rc; > - } > + goto fail; > } > > - /* Check expansion ROM. */ > - rc = pci_size_mem_bar(pdev->sbdf, rom_reg, &addr, &size, PCI_BAR_ROM); > - if ( rc > 0 && size ) > + /* Check expansion ROM: we do not handle ROM for guests. */ Is there any specific reason for not handling ROM BAR for guests? > + if ( is_hwdom ) > { > - struct vpci_bar *rom = &header->bars[num_bars]; > + rc = pci_size_mem_bar(pdev->sbdf, rom_reg, &addr, &size, PCI_BAR_ROM); > + if ( rc > 0 && size ) > + { > + struct vpci_bar *rom = &header->bars[num_bars]; > > - rom->type = VPCI_BAR_ROM; > - rom->size = size; > - rom->addr = addr; > - header->rom_enabled = pci_conf_read32(pdev->sbdf, rom_reg) & > - PCI_ROM_ADDRESS_ENABLE; > + rom->type = VPCI_BAR_ROM; > + rom->size = size; > + rom->addr = addr; > + header->rom_enabled = pci_conf_read32(pdev->sbdf, rom_reg) & > + PCI_ROM_ADDRESS_ENABLE; > > - rc = vpci_add_register(pdev->vpci, vpci_hw_read32, rom_write, rom_reg, > - 4, rom); > - if ( rc ) > - rom->type = VPCI_BAR_EMPTY; > + rc = vpci_add_register(pdev->vpci, vpci_hw_read32, rom_write, > + rom_reg, 4, rom); > + if ( rc ) > + rom->type = VPCI_BAR_EMPTY; > + } > + } > + else > + { > + if ( !is_hwdom ) Extra !is_hwdown? The condition on the outer if is already is_hwdom, and this is the else branch. > + { > + rc = vpci_add_register(pdev->vpci, empty_bar_read, NULL, > + rom_reg, 4, &header->bars[num_bars]); > + if ( rc ) > + goto fail; > + } > } > > return (cmd & PCI_COMMAND_MEMORY) ? modify_bars(pdev, cmd, false) : 0; > + > + fail: > + pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd); > + return rc; It might have been better for the usage of the fail label to be introduced in a pre-patch, as there would then be less changes here (and the pre-patch would be a non-functional change). Thanks, Roger.
Hi Volodymyr, > On 20 Jul 2023, at 1:32 am, Volodymyr Babchuk <Volodymyr_Babchuk@epam.com> wrote: > > From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > > Add relevant vpci register handlers when assigning PCI device to a domain > and remove those when de-assigning. This allows having different > handlers for different domains, e.g. hwdom and other guests. > > Emulate guest BAR register values: this allows creating a guest view > of the registers and emulates size and properties probe as it is done > during PCI device enumeration by the guest. > > All empty, IO and ROM BARs for guests are emulated by returning 0 on > reads and ignoring writes: this BARs are special with this respect as > their lower bits have special meaning, so returning default ~0 on read > may confuse guest OS. > > Memory decoding is initially disabled when used by guests in order to > prevent the BAR being placed on top of a RAM region. > > Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > --- > > Since v6: > - unify the writing of the PCI_COMMAND register on the > error path into a label > - do not introduce bar_ignore_access helper and open code > - s/guest_bar_ignore_read/empty_bar_read > - update error message in guest_bar_write > - only setup empty_bar_read for IO if !x86 > Since v5: > - make sure that the guest set address has the same page offset > as the physical address on the host > - remove guest_rom_{read|write} as those just implement the default > behaviour of the registers not being handled > - adjusted comment for struct vpci.addr field > - add guest handlers for BARs which are not handled and will otherwise > return ~0 on read and ignore writes. The BARs are special with this > respect as their lower bits have special meaning, so returning ~0 > doesn't seem to be right > Since v4: > - updated commit message > - s/guest_addr/guest_reg > Since v3: > - squashed two patches: dynamic add/remove handlers and guest BAR > handler implementation > - fix guest BAR read of the high part of a 64bit BAR (Roger) > - add error handling to vpci_assign_device > - s/dom%pd/%pd > - blank line before return > Since v2: > - remove unneeded ifdefs for CONFIG_HAS_VPCI_GUEST_SUPPORT as more code > has been eliminated from being built on x86 > Since v1: > - constify struct pci_dev where possible > - do not open code is_system_domain() > - simplify some code3. simplify > - use gdprintk + error code instead of gprintk > - gate vpci_bar_{add|remove}_handlers with CONFIG_HAS_VPCI_GUEST_SUPPORT, > so these do not get compiled for x86 > - removed unneeded is_system_domain check > - re-work guest read/write to be much simpler and do more work on write > than read which is expected to be called more frequently > - removed one too obvious comment > --- > xen/drivers/vpci/header.c | 156 +++++++++++++++++++++++++++++++------- > xen/include/xen/vpci.h | 3 + > 2 files changed, 130 insertions(+), 29 deletions(-) > > diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c > index 2780fcae72..5dc9b5338b 100644 > --- a/xen/drivers/vpci/header.c > +++ b/xen/drivers/vpci/header.c > @@ -457,6 +457,71 @@ static void cf_check bar_write( > pci_conf_write32(pdev->sbdf, reg, val); > } > > +static void cf_check guest_bar_write(const struct pci_dev *pdev, > + unsigned int reg, uint32_t val, void *data) > +{ > + struct vpci_bar *bar = data; > + bool hi = false; > + uint64_t guest_reg = bar->guest_reg; > + > + if ( bar->type == VPCI_BAR_MEM64_HI ) > + { > + ASSERT(reg > PCI_BASE_ADDRESS_0); > + bar--; > + hi = true; > + } > + else > + { > + val &= PCI_BASE_ADDRESS_MEM_MASK; > + val |= bar->type == VPCI_BAR_MEM32 ? PCI_BASE_ADDRESS_MEM_TYPE_32 > + : PCI_BASE_ADDRESS_MEM_TYPE_64; > + val |= bar->prefetchable ? PCI_BASE_ADDRESS_MEM_PREFETCH : 0; > + } > + > + guest_reg &= ~(0xffffffffull << (hi ? 32 : 0)); > + guest_reg |= (uint64_t)val << (hi ? 32 : 0); > + > + guest_reg &= ~(bar->size - 1) | ~PCI_BASE_ADDRESS_MEM_MASK; > + > + /* > + * Make sure that the guest set address has the same page offset > + * as the physical address on the host or otherwise things won't work as > + * expected. > + */ > + if ( (guest_reg & (~PAGE_MASK & PCI_BASE_ADDRESS_MEM_MASK)) != > + (bar->addr & ~PAGE_MASK) ) > + { > + gprintk(XENLOG_WARNING, > + "%pp: ignored BAR %zu write attempting to change page offset\n", > + &pdev->sbdf, bar - pdev->vpci->header.bars + hi); > + return; > + } > + > + bar->guest_reg = guest_reg; > +} > + > +static uint32_t cf_check guest_bar_read(const struct pci_dev *pdev, > + unsigned int reg, void *data) > +{ > + const struct vpci_bar *bar = data; > + bool hi = false; > + > + if ( bar->type == VPCI_BAR_MEM64_HI ) > + { > + ASSERT(reg > PCI_BASE_ADDRESS_0); > + bar--; > + hi = true; > + } > + > + return bar->guest_reg >> (hi ? 32 : 0); > +} > + > +static uint32_t cf_check empty_bar_read(const struct pci_dev *pdev, > + unsigned int reg, void *data) > +{ > + return 0; > +} > + > static void cf_check rom_write( > const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data) > { > @@ -517,6 +582,7 @@ static int cf_check init_bars(struct pci_dev *pdev) > struct vpci_header *header = &pdev->vpci->header; > struct vpci_bar *bars = header->bars; > int rc; > + bool is_hwdom = is_hardware_domain(pdev->domain); > > ASSERT(rw_is_locked(&pdev->domain->pci_lock)); > > @@ -558,13 +624,12 @@ static int cf_check init_bars(struct pci_dev *pdev) > if ( i && bars[i - 1].type == VPCI_BAR_MEM64_LO ) > { > bars[i].type = VPCI_BAR_MEM64_HI; > - rc = vpci_add_register(pdev->vpci, vpci_hw_read32, bar_write, reg, > - 4, &bars[i]); > + rc = vpci_add_register(pdev->vpci, > + is_hwdom ? vpci_hw_read32 : guest_bar_read, > + is_hwdom ? bar_write : guest_bar_write, > + reg, 4, &bars[i]); > if ( rc ) > - { > - pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd); > - return rc; > - } > + goto fail; > > continue; > } > @@ -573,6 +638,17 @@ static int cf_check init_bars(struct pci_dev *pdev) > if ( (val & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO ) > { > bars[i].type = VPCI_BAR_IO; > + > +#ifndef CONFIG_X86 > + if ( !is_hwdom ) > + { > + rc = vpci_add_register(pdev->vpci, empty_bar_read, NULL, > + reg, 4, &bars[i]); > + if ( rc ) > + goto fail; > + } > +#endif > + > continue; > } > if ( (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == > @@ -584,14 +660,20 @@ static int cf_check init_bars(struct pci_dev *pdev) > rc = pci_size_mem_bar(pdev->sbdf, reg, &addr, &size, > (i == num_bars - 1) ? PCI_BAR_LAST : 0); > if ( rc < 0 ) > - { > - pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd); > - return rc; > - } > + goto fail; > > if ( size == 0 ) > { > bars[i].type = VPCI_BAR_EMPTY; > + > + if ( !is_hwdom ) > + { > + rc = vpci_add_register(pdev->vpci, empty_bar_read, NULL, > + reg, 4, &bars[i]); > + if ( rc ) > + goto fail; > + } > + > continue; > } > > @@ -599,34 +681,50 @@ static int cf_check init_bars(struct pci_dev *pdev) > bars[i].size = size; > bars[i].prefetchable = val & PCI_BASE_ADDRESS_MEM_PREFETCH; I think there is a need to set the BAR mem type and prefetchable bit to the guest_reg also to avoid mismatch when Guest kernel initially read the BAR’s. if ( !is_hwdom ) { bars[i].guest_reg |= bars[i].type == VPCI_BAR_MEM32 ? PCI_BASE_ADDRESS_MEM_TYPE_32 : PCI_BASE_ADDRESS_MEM_TYPE_64; bars[i].guest_reg |= bars[i].prefetchable ? PCI_BASE_ADDRESS_MEM_PREFETCH : 0; } Regards, Rahul
On 21.07.2023 12:36, Rahul Singh wrote: >> On 20 Jul 2023, at 1:32 am, Volodymyr Babchuk <Volodymyr_Babchuk@epam.com> wrote: >> @@ -599,34 +681,50 @@ static int cf_check init_bars(struct pci_dev *pdev) >> bars[i].size = size; >> bars[i].prefetchable = val & PCI_BASE_ADDRESS_MEM_PREFETCH; > > I think there is a need to set the BAR mem type and prefetchable bit to the > guest_reg also to avoid mismatch when Guest kernel initially read the BAR’s. Perhaps more generally: Shouldn't r/o bits be handed through in almost all cases? Jan
On Fri, Jul 21, 2023 at 12:50:23PM +0200, Jan Beulich wrote: > On 21.07.2023 12:36, Rahul Singh wrote: > >> On 20 Jul 2023, at 1:32 am, Volodymyr Babchuk <Volodymyr_Babchuk@epam.com> wrote: > >> @@ -599,34 +681,50 @@ static int cf_check init_bars(struct pci_dev *pdev) > >> bars[i].size = size; > >> bars[i].prefetchable = val & PCI_BASE_ADDRESS_MEM_PREFETCH; > > > > I think there is a need to set the BAR mem type and prefetchable bit to the > > guest_reg also to avoid mismatch when Guest kernel initially read the BAR’s. > > Perhaps more generally: Shouldn't r/o bits be handed through in almost > all cases? I remember in an earlier version suggesting to store the guest address, instead of the guest BAR register value. Then the flags would be unconditionally added in guest_bar_read() and we wouldn't need to worry about initializing the register. Thanks, Roger.
diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c index 2780fcae72..5dc9b5338b 100644 --- a/xen/drivers/vpci/header.c +++ b/xen/drivers/vpci/header.c @@ -457,6 +457,71 @@ static void cf_check bar_write( pci_conf_write32(pdev->sbdf, reg, val); } +static void cf_check guest_bar_write(const struct pci_dev *pdev, + unsigned int reg, uint32_t val, void *data) +{ + struct vpci_bar *bar = data; + bool hi = false; + uint64_t guest_reg = bar->guest_reg; + + if ( bar->type == VPCI_BAR_MEM64_HI ) + { + ASSERT(reg > PCI_BASE_ADDRESS_0); + bar--; + hi = true; + } + else + { + val &= PCI_BASE_ADDRESS_MEM_MASK; + val |= bar->type == VPCI_BAR_MEM32 ? PCI_BASE_ADDRESS_MEM_TYPE_32 + : PCI_BASE_ADDRESS_MEM_TYPE_64; + val |= bar->prefetchable ? PCI_BASE_ADDRESS_MEM_PREFETCH : 0; + } + + guest_reg &= ~(0xffffffffull << (hi ? 32 : 0)); + guest_reg |= (uint64_t)val << (hi ? 32 : 0); + + guest_reg &= ~(bar->size - 1) | ~PCI_BASE_ADDRESS_MEM_MASK; + + /* + * Make sure that the guest set address has the same page offset + * as the physical address on the host or otherwise things won't work as + * expected. + */ + if ( (guest_reg & (~PAGE_MASK & PCI_BASE_ADDRESS_MEM_MASK)) != + (bar->addr & ~PAGE_MASK) ) + { + gprintk(XENLOG_WARNING, + "%pp: ignored BAR %zu write attempting to change page offset\n", + &pdev->sbdf, bar - pdev->vpci->header.bars + hi); + return; + } + + bar->guest_reg = guest_reg; +} + +static uint32_t cf_check guest_bar_read(const struct pci_dev *pdev, + unsigned int reg, void *data) +{ + const struct vpci_bar *bar = data; + bool hi = false; + + if ( bar->type == VPCI_BAR_MEM64_HI ) + { + ASSERT(reg > PCI_BASE_ADDRESS_0); + bar--; + hi = true; + } + + return bar->guest_reg >> (hi ? 32 : 0); +} + +static uint32_t cf_check empty_bar_read(const struct pci_dev *pdev, + unsigned int reg, void *data) +{ + return 0; +} + static void cf_check rom_write( const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data) { @@ -517,6 +582,7 @@ static int cf_check init_bars(struct pci_dev *pdev) struct vpci_header *header = &pdev->vpci->header; struct vpci_bar *bars = header->bars; int rc; + bool is_hwdom = is_hardware_domain(pdev->domain); ASSERT(rw_is_locked(&pdev->domain->pci_lock)); @@ -558,13 +624,12 @@ static int cf_check init_bars(struct pci_dev *pdev) if ( i && bars[i - 1].type == VPCI_BAR_MEM64_LO ) { bars[i].type = VPCI_BAR_MEM64_HI; - rc = vpci_add_register(pdev->vpci, vpci_hw_read32, bar_write, reg, - 4, &bars[i]); + rc = vpci_add_register(pdev->vpci, + is_hwdom ? vpci_hw_read32 : guest_bar_read, + is_hwdom ? bar_write : guest_bar_write, + reg, 4, &bars[i]); if ( rc ) - { - pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd); - return rc; - } + goto fail; continue; } @@ -573,6 +638,17 @@ static int cf_check init_bars(struct pci_dev *pdev) if ( (val & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO ) { bars[i].type = VPCI_BAR_IO; + +#ifndef CONFIG_X86 + if ( !is_hwdom ) + { + rc = vpci_add_register(pdev->vpci, empty_bar_read, NULL, + reg, 4, &bars[i]); + if ( rc ) + goto fail; + } +#endif + continue; } if ( (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == @@ -584,14 +660,20 @@ static int cf_check init_bars(struct pci_dev *pdev) rc = pci_size_mem_bar(pdev->sbdf, reg, &addr, &size, (i == num_bars - 1) ? PCI_BAR_LAST : 0); if ( rc < 0 ) - { - pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd); - return rc; - } + goto fail; if ( size == 0 ) { bars[i].type = VPCI_BAR_EMPTY; + + if ( !is_hwdom ) + { + rc = vpci_add_register(pdev->vpci, empty_bar_read, NULL, + reg, 4, &bars[i]); + if ( rc ) + goto fail; + } + continue; } @@ -599,34 +681,50 @@ static int cf_check init_bars(struct pci_dev *pdev) bars[i].size = size; bars[i].prefetchable = val & PCI_BASE_ADDRESS_MEM_PREFETCH; - rc = vpci_add_register(pdev->vpci, vpci_hw_read32, bar_write, reg, 4, - &bars[i]); + rc = vpci_add_register(pdev->vpci, + is_hwdom ? vpci_hw_read32 : guest_bar_read, + is_hwdom ? bar_write : guest_bar_write, + reg, 4, &bars[i]); if ( rc ) - { - pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd); - return rc; - } + goto fail; } - /* Check expansion ROM. */ - rc = pci_size_mem_bar(pdev->sbdf, rom_reg, &addr, &size, PCI_BAR_ROM); - if ( rc > 0 && size ) + /* Check expansion ROM: we do not handle ROM for guests. */ + if ( is_hwdom ) { - struct vpci_bar *rom = &header->bars[num_bars]; + rc = pci_size_mem_bar(pdev->sbdf, rom_reg, &addr, &size, PCI_BAR_ROM); + if ( rc > 0 && size ) + { + struct vpci_bar *rom = &header->bars[num_bars]; - rom->type = VPCI_BAR_ROM; - rom->size = size; - rom->addr = addr; - header->rom_enabled = pci_conf_read32(pdev->sbdf, rom_reg) & - PCI_ROM_ADDRESS_ENABLE; + rom->type = VPCI_BAR_ROM; + rom->size = size; + rom->addr = addr; + header->rom_enabled = pci_conf_read32(pdev->sbdf, rom_reg) & + PCI_ROM_ADDRESS_ENABLE; - rc = vpci_add_register(pdev->vpci, vpci_hw_read32, rom_write, rom_reg, - 4, rom); - if ( rc ) - rom->type = VPCI_BAR_EMPTY; + rc = vpci_add_register(pdev->vpci, vpci_hw_read32, rom_write, + rom_reg, 4, rom); + if ( rc ) + rom->type = VPCI_BAR_EMPTY; + } + } + else + { + if ( !is_hwdom ) + { + rc = vpci_add_register(pdev->vpci, empty_bar_read, NULL, + rom_reg, 4, &header->bars[num_bars]); + if ( rc ) + goto fail; + } } return (cmd & PCI_COMMAND_MEMORY) ? modify_bars(pdev, cmd, false) : 0; + + fail: + pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd); + return rc; } REGISTER_VPCI_INIT(init_bars, VPCI_PRIORITY_MIDDLE); diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h index 44296623e1..486a655e8d 100644 --- a/xen/include/xen/vpci.h +++ b/xen/include/xen/vpci.h @@ -67,7 +67,10 @@ struct vpci { struct vpci_header { /* Information about the PCI BARs of this device. */ struct vpci_bar { + /* Physical (host) address. */ uint64_t addr; + /* Guest view of the BAR: address and lower bits. */ + uint64_t guest_reg; uint64_t size; enum { VPCI_BAR_EMPTY,