Message ID | 20220719174253.541965-4-olekstysh@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | PCI devices passthrough on Arm, part 3 | expand |
On 19.07.2022 19:42, Oleksandr Tyshchenko wrote: > @@ -527,6 +592,17 @@ static int cf_check init_bars(struct pci_dev *pdev) > if ( (val & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO ) > { > bars[i].type = VPCI_BAR_IO; > + > +#ifndef CONFIG_X86 > + if ( !is_hwdom ) > + { > + rc = vpci_add_register(pdev->vpci, empty_bar_read, NULL, > + reg, 4, &bars[i]); > + if ( rc ) > + goto fail; > + } > +#endif Since long term this can't be correct, it wants a TODO comment put next to it. > @@ -553,34 +635,47 @@ static int cf_check init_bars(struct pci_dev *pdev) > bars[i].size = size; > bars[i].prefetchable = val & PCI_BASE_ADDRESS_MEM_PREFETCH; > > - rc = vpci_add_register(pdev->vpci, vpci_hw_read32, bar_write, reg, 4, > - &bars[i]); > + rc = vpci_add_register(pdev->vpci, > + is_hwdom ? vpci_hw_read32 : guest_bar_read, > + is_hwdom ? bar_write : guest_bar_write, > + reg, 4, &bars[i]); > if ( rc ) > - { > - pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd); > - return rc; > - } > + goto fail; > } > > - /* Check expansion ROM. */ > - rc = pci_size_mem_bar(pdev->sbdf, rom_reg, &addr, &size, PCI_BAR_ROM); > - if ( rc > 0 && size ) > + /* Check expansion ROM: we do not handle ROM for guests. */ > + if ( is_hwdom ) This again can't be right long-term. Personally I'd prefer if the code was (largely) left as is, with adjustments (with suitable TODO comments) made on a much smaller scope only. But I'm not the maintainer of this code - Roger may have a different view on this. Jan
On 27.07.22 13:15, Jan Beulich wrote: Hello Jan > On 19.07.2022 19:42, Oleksandr Tyshchenko wrote: >> @@ -527,6 +592,17 @@ static int cf_check init_bars(struct pci_dev *pdev) >> if ( (val & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO ) >> { >> bars[i].type = VPCI_BAR_IO; >> + >> +#ifndef CONFIG_X86 >> + if ( !is_hwdom ) >> + { >> + rc = vpci_add_register(pdev->vpci, empty_bar_read, NULL, >> + reg, 4, &bars[i]); >> + if ( rc ) >> + goto fail; >> + } >> +#endif > Since long term this can't be correct, it wants a TODO comment put next > to it. Looking into the previous versions of this patch (up to V3) I failed to find any changes in current version which hadn't been discussed (and agreed in some form). Could you please clarify what exactly can't be correct the long term, for me to put the proper TODO here. Do you perhaps mean that TODO needs to explain why we have to diverge? > >> @@ -553,34 +635,47 @@ static int cf_check init_bars(struct pci_dev *pdev) >> bars[i].size = size; >> bars[i].prefetchable = val & PCI_BASE_ADDRESS_MEM_PREFETCH; >> >> - rc = vpci_add_register(pdev->vpci, vpci_hw_read32, bar_write, reg, 4, >> - &bars[i]); >> + rc = vpci_add_register(pdev->vpci, >> + is_hwdom ? vpci_hw_read32 : guest_bar_read, >> + is_hwdom ? bar_write : guest_bar_write, >> + reg, 4, &bars[i]); >> if ( rc ) >> - { >> - pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd); >> - return rc; >> - } >> + goto fail; >> } >> >> - /* Check expansion ROM. */ >> - rc = pci_size_mem_bar(pdev->sbdf, rom_reg, &addr, &size, PCI_BAR_ROM); >> - if ( rc > 0 && size ) >> + /* Check expansion ROM: we do not handle ROM for guests. */ >> + if ( is_hwdom ) > This again can't be right long-term. Personally I'd prefer if the code > was (largely) left as is, with adjustments (with suitable TODO comments) > made on a much smaller scope only. I can revive a comment that Oleksandr Andrushchenko provided for earlier version by transforming into TODO: ROM BAR is only handled for the hardware domain and for guest domains there is a stub: at the moment PCI expansion ROM handling is supported for x86 only and it might not be used by other architectures without emulating x86. Other use-cases may include using that expansion ROM before Xen boots, hence no emulation is needed in Xen itself. Or when a guest wants to use the ROM code which seems to be rare. > But I'm not the maintainer of this > code - Roger may have a different view on this. Well, let's wait for Roger's input here. > > Jan
On 27.07.2022 18:17, Oleksandr wrote: > On 27.07.22 13:15, Jan Beulich wrote: >> On 19.07.2022 19:42, Oleksandr Tyshchenko wrote: >>> @@ -527,6 +592,17 @@ static int cf_check init_bars(struct pci_dev *pdev) >>> if ( (val & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO ) >>> { >>> bars[i].type = VPCI_BAR_IO; >>> + >>> +#ifndef CONFIG_X86 >>> + if ( !is_hwdom ) >>> + { >>> + rc = vpci_add_register(pdev->vpci, empty_bar_read, NULL, >>> + reg, 4, &bars[i]); >>> + if ( rc ) >>> + goto fail; >>> + } >>> +#endif >> Since long term this can't be correct, it wants a TODO comment put next >> to it. > > > Looking into the previous versions of this patch (up to V3) I failed to > find any changes in current version which hadn't been discussed (and > agreed in some form). > > Could you please clarify what exactly can't be correct the long term, > for me to put the proper TODO here. Do you perhaps mean that TODO needs > to explain why we have to diverge? If a device has I/O port ranges, then that's typically for a reason. Drivers (in the guest) may therefore want to use those ranges to communicate with the device. Imagine in particular a device without any MMIO BARs, and with only I/O port one(s). >>> @@ -553,34 +635,47 @@ static int cf_check init_bars(struct pci_dev *pdev) >>> bars[i].size = size; >>> bars[i].prefetchable = val & PCI_BASE_ADDRESS_MEM_PREFETCH; >>> >>> - rc = vpci_add_register(pdev->vpci, vpci_hw_read32, bar_write, reg, 4, >>> - &bars[i]); >>> + rc = vpci_add_register(pdev->vpci, >>> + is_hwdom ? vpci_hw_read32 : guest_bar_read, >>> + is_hwdom ? bar_write : guest_bar_write, >>> + reg, 4, &bars[i]); >>> if ( rc ) >>> - { >>> - pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd); >>> - return rc; >>> - } >>> + goto fail; >>> } >>> >>> - /* Check expansion ROM. */ >>> - rc = pci_size_mem_bar(pdev->sbdf, rom_reg, &addr, &size, PCI_BAR_ROM); >>> - if ( rc > 0 && size ) >>> + /* Check expansion ROM: we do not handle ROM for guests. */ >>> + if ( is_hwdom ) >> This again can't be right long-term. Personally I'd prefer if the code >> was (largely) left as is, with adjustments (with suitable TODO comments) >> made on a much smaller scope only. > > > I can revive a comment that Oleksandr Andrushchenko provided for earlier > version by transforming into TODO: > > > ROM BAR is only handled for the hardware domain and for guest domains > there is a stub: at the moment PCI expansion ROM handling is supported > for x86 only and it might not be used by other architectures without > emulating x86. Other use-cases may include using that expansion ROM before > Xen boots, hence no emulation is needed in Xen itself. Or when a guest > wants to use the ROM code which seems to be rare. ROMs can contain other than x86 code. While reportedly mostly dead, EFI bytecode was an example of an abstraction layer supporting arbitrary architectures. Therefore a comment along these lines would be okay, but personally I'd prefer it to be less verbose - along the lines of the one to be supplied for the I/O port restriction. Jan
On 28.07.22 10:01, Jan Beulich wrote: Hello Jan > On 27.07.2022 18:17, Oleksandr wrote: >> On 27.07.22 13:15, Jan Beulich wrote: >>> On 19.07.2022 19:42, Oleksandr Tyshchenko wrote: >>>> @@ -527,6 +592,17 @@ static int cf_check init_bars(struct pci_dev *pdev) >>>> if ( (val & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO ) >>>> { >>>> bars[i].type = VPCI_BAR_IO; >>>> + >>>> +#ifndef CONFIG_X86 >>>> + if ( !is_hwdom ) >>>> + { >>>> + rc = vpci_add_register(pdev->vpci, empty_bar_read, NULL, >>>> + reg, 4, &bars[i]); >>>> + if ( rc ) >>>> + goto fail; >>>> + } >>>> +#endif >>> Since long term this can't be correct, it wants a TODO comment put next >>> to it. >> >> Looking into the previous versions of this patch (up to V3) I failed to >> find any changes in current version which hadn't been discussed (and >> agreed in some form). >> >> Could you please clarify what exactly can't be correct the long term, >> for me to put the proper TODO here. Do you perhaps mean that TODO needs >> to explain why we have to diverge? > If a device has I/O port ranges, then that's typically for a reason. > Drivers (in the guest) may therefore want to use those ranges to > communicate with the device. Imagine in particular a device without > any MMIO BARs, and with only I/O port one(s). > >>>> @@ -553,34 +635,47 @@ static int cf_check init_bars(struct pci_dev *pdev) >>>> bars[i].size = size; >>>> bars[i].prefetchable = val & PCI_BASE_ADDRESS_MEM_PREFETCH; >>>> >>>> - rc = vpci_add_register(pdev->vpci, vpci_hw_read32, bar_write, reg, 4, >>>> - &bars[i]); >>>> + rc = vpci_add_register(pdev->vpci, >>>> + is_hwdom ? vpci_hw_read32 : guest_bar_read, >>>> + is_hwdom ? bar_write : guest_bar_write, >>>> + reg, 4, &bars[i]); >>>> if ( rc ) >>>> - { >>>> - pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd); >>>> - return rc; >>>> - } >>>> + goto fail; >>>> } >>>> >>>> - /* Check expansion ROM. */ >>>> - rc = pci_size_mem_bar(pdev->sbdf, rom_reg, &addr, &size, PCI_BAR_ROM); >>>> - if ( rc > 0 && size ) >>>> + /* Check expansion ROM: we do not handle ROM for guests. */ >>>> + if ( is_hwdom ) >>> This again can't be right long-term. Personally I'd prefer if the code >>> was (largely) left as is, with adjustments (with suitable TODO comments) >>> made on a much smaller scope only. >> >> I can revive a comment that Oleksandr Andrushchenko provided for earlier >> version by transforming into TODO: >> >> >> ROM BAR is only handled for the hardware domain and for guest domains >> there is a stub: at the moment PCI expansion ROM handling is supported >> for x86 only and it might not be used by other architectures without >> emulating x86. Other use-cases may include using that expansion ROM before >> Xen boots, hence no emulation is needed in Xen itself. Or when a guest >> wants to use the ROM code which seems to be rare. > ROMs can contain other than x86 code. While reportedly mostly dead, EFI > bytecode was an example of an abstraction layer supporting arbitrary > architectures. Therefore a comment along these lines would be okay, but > personally I'd prefer it to be less verbose - along the lines of the > one to be supplied for the I/O port restriction. Thanks for the clarification. I will add two TODOs. > > Jan
diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c index e0461b1139..9fbbdc3500 100644 --- a/xen/drivers/vpci/header.c +++ b/xen/drivers/vpci/header.c @@ -412,6 +412,71 @@ static void cf_check bar_write( pci_conf_write32(pdev->sbdf, reg, val); } +static void cf_check guest_bar_write( + const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data) +{ + struct vpci_bar *bar = data; + bool hi = false; + uint64_t guest_reg = bar->guest_reg; + + if ( bar->type == VPCI_BAR_MEM64_HI ) + { + ASSERT(reg > PCI_BASE_ADDRESS_0); + bar--; + hi = true; + } + else + { + val &= PCI_BASE_ADDRESS_MEM_MASK; + val |= bar->type == VPCI_BAR_MEM32 ? PCI_BASE_ADDRESS_MEM_TYPE_32 + : PCI_BASE_ADDRESS_MEM_TYPE_64; + val |= bar->prefetchable ? PCI_BASE_ADDRESS_MEM_PREFETCH : 0; + } + + guest_reg &= ~(0xffffffffull << (hi ? 32 : 0)); + guest_reg |= (uint64_t)val << (hi ? 32 : 0); + + guest_reg &= ~(bar->size - 1) | ~PCI_BASE_ADDRESS_MEM_MASK; + + /* + * Make sure that the guest set address has the same page offset + * as the physical address on the host or otherwise things won't work as + * expected. + */ + if ( (guest_reg & (~PAGE_MASK & PCI_BASE_ADDRESS_MEM_MASK)) != + (bar->addr & ~PAGE_MASK) ) + { + gprintk(XENLOG_WARNING, + "%pp: ignored BAR %zu write attempting to change page offset\n", + &pdev->sbdf, bar - pdev->vpci->header.bars + hi); + return; + } + + bar->guest_reg = guest_reg; +} + +static uint32_t cf_check guest_bar_read( + const struct pci_dev *pdev, unsigned int reg, void *data) +{ + const struct vpci_bar *bar = data; + bool hi = false; + + if ( bar->type == VPCI_BAR_MEM64_HI ) + { + ASSERT(reg > PCI_BASE_ADDRESS_0); + bar--; + hi = true; + } + + return bar->guest_reg >> (hi ? 32 : 0); +} + +static uint32_t cf_check empty_bar_read( + const struct pci_dev *pdev, unsigned int reg, void *data) +{ + return 0; +} + static void cf_check rom_write( const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data) { @@ -468,6 +533,7 @@ static int cf_check init_bars(struct pci_dev *pdev) struct vpci_header *header; struct vpci_bar *bars; int rc; + bool is_hwdom = is_hardware_domain(pdev->domain); ASSERT(pcidevs_write_locked()); @@ -512,13 +578,12 @@ static int cf_check init_bars(struct pci_dev *pdev) if ( i && bars[i - 1].type == VPCI_BAR_MEM64_LO ) { bars[i].type = VPCI_BAR_MEM64_HI; - rc = vpci_add_register(pdev->vpci, vpci_hw_read32, bar_write, reg, - 4, &bars[i]); + rc = vpci_add_register(pdev->vpci, + is_hwdom ? vpci_hw_read32 : guest_bar_read, + is_hwdom ? bar_write : guest_bar_write, + reg, 4, &bars[i]); if ( rc ) - { - pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd); - return rc; - } + goto fail; continue; } @@ -527,6 +592,17 @@ static int cf_check init_bars(struct pci_dev *pdev) if ( (val & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO ) { bars[i].type = VPCI_BAR_IO; + +#ifndef CONFIG_X86 + if ( !is_hwdom ) + { + rc = vpci_add_register(pdev->vpci, empty_bar_read, NULL, + reg, 4, &bars[i]); + if ( rc ) + goto fail; + } +#endif + continue; } if ( (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == @@ -538,14 +614,20 @@ static int cf_check init_bars(struct pci_dev *pdev) rc = pci_size_mem_bar(pdev->sbdf, reg, &addr, &size, (i == num_bars - 1) ? PCI_BAR_LAST : 0); if ( rc < 0 ) - { - pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd); - return rc; - } + goto fail; if ( size == 0 ) { bars[i].type = VPCI_BAR_EMPTY; + + if ( !is_hwdom ) + { + rc = vpci_add_register(pdev->vpci, empty_bar_read, NULL, + reg, 4, &bars[i]); + if ( rc ) + goto fail; + } + continue; } @@ -553,34 +635,47 @@ static int cf_check init_bars(struct pci_dev *pdev) bars[i].size = size; bars[i].prefetchable = val & PCI_BASE_ADDRESS_MEM_PREFETCH; - rc = vpci_add_register(pdev->vpci, vpci_hw_read32, bar_write, reg, 4, - &bars[i]); + rc = vpci_add_register(pdev->vpci, + is_hwdom ? vpci_hw_read32 : guest_bar_read, + is_hwdom ? bar_write : guest_bar_write, + reg, 4, &bars[i]); if ( rc ) - { - pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd); - return rc; - } + goto fail; } - /* Check expansion ROM. */ - rc = pci_size_mem_bar(pdev->sbdf, rom_reg, &addr, &size, PCI_BAR_ROM); - if ( rc > 0 && size ) + /* Check expansion ROM: we do not handle ROM for guests. */ + if ( is_hwdom ) { - struct vpci_bar *rom = &header->bars[num_bars]; + rc = pci_size_mem_bar(pdev->sbdf, rom_reg, &addr, &size, PCI_BAR_ROM); + if ( rc > 0 && size ) + { + struct vpci_bar *rom = &header->bars[num_bars]; - rom->type = VPCI_BAR_ROM; - rom->size = size; - rom->addr = addr; - header->rom_enabled = pci_conf_read32(pdev->sbdf, rom_reg) & - PCI_ROM_ADDRESS_ENABLE; + rom->type = VPCI_BAR_ROM; + rom->size = size; + rom->addr = addr; + header->rom_enabled = pci_conf_read32(pdev->sbdf, rom_reg) & + PCI_ROM_ADDRESS_ENABLE; - rc = vpci_add_register(pdev->vpci, vpci_hw_read32, rom_write, rom_reg, - 4, rom); + rc = vpci_add_register(pdev->vpci, vpci_hw_read32, rom_write, + rom_reg, 4, rom); + if ( rc ) + rom->type = VPCI_BAR_EMPTY; + } + } + else + { + rc = vpci_add_register(pdev->vpci, empty_bar_read, NULL, + rom_reg, 4, &header->bars[num_bars]); if ( rc ) - rom->type = VPCI_BAR_EMPTY; + goto fail; } return (cmd & PCI_COMMAND_MEMORY) ? modify_bars(pdev, cmd, false) : 0; + + fail: + pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd); + return rc; } REGISTER_VPCI_INIT(init_bars, VPCI_PRIORITY_MIDDLE); diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h index e5501b9207..6e1d3b93cd 100644 --- a/xen/include/xen/vpci.h +++ b/xen/include/xen/vpci.h @@ -67,7 +67,10 @@ struct vpci { struct vpci_header { /* Information about the PCI BARs of this device. */ struct vpci_bar { + /* Physical (host) address. */ uint64_t addr; + /* Guest view of the BAR: address and lower bits. */ + uint64_t guest_reg; uint64_t size; enum { VPCI_BAR_EMPTY,