Message ID | 7e5a37e51303ba17dab8e6a92830257f670f3355.1741891599.git.sultanovandriy@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [v1] drivers: Change amd_iommu struct to contain pci_sbdf_t, simplify code | expand |
On 2025-03-13 14:57, Andrii Sultanov wrote: > Following on from 250d87dc, struct amd_iommu has its seg and bdf fields > backwards with relation to pci_sbdf_t. Swap them around, and simplify the > expressions regenerating an sbdf_t from seg+bdf. > > Simplify ioapic_sbdf and bpet_sbdf along the way. Adjust functions > taking seg and bdf fields of these structs to take pci_sbdf_t instead. > Simplify comparisons similarly. It's rather large. Can this be sensibly split into smaller patches? > diff --git a/xen/drivers/passthrough/amd/iommu.h b/xen/drivers/passthrough/amd/iommu.h > index 00e81b4b2a..6903b1bc5d 100644 > --- a/xen/drivers/passthrough/amd/iommu.h > +++ b/xen/drivers/passthrough/amd/iommu.h > @@ -77,8 +77,14 @@ struct amd_iommu { > struct list_head list; > spinlock_t lock; /* protect iommu */ > > - u16 seg; > - u16 bdf; > + union { > + struct { > + uint16_t bdf; > + uint16_t seg; Are these still needed by the end of this patch? > + }; > + pci_sbdf_t sbdf; > + }; > + > struct msi_desc msi; > > u16 cap_offset; > diff --git a/xen/drivers/passthrough/amd/iommu_acpi.c b/xen/drivers/passthrough/amd/iommu_acpi.c > index 5bdbfb5ba8..57efb7ddda 100644 > --- a/xen/drivers/passthrough/amd/iommu_acpi.c > +++ b/xen/drivers/passthrough/amd/iommu_acpi.c > @@ -107,12 +107,12 @@ static void __init add_ivrs_mapping_entry( > @@ -239,17 +239,17 @@ static int __init register_range_for_device( > unsigned int bdf, paddr_t base, paddr_t limit, > bool iw, bool ir, bool exclusion) > { > - int seg = 0; /* XXX */ > - struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg); > + pci_sbdf_t sbdf = { .seg = 0, .bdf = bdf }; Maybe retain the /* XXX */ to highlight that segment is hardcoded to 0. > + struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(sbdf.seg); > struct amd_iommu *iommu; > u16 req; > int rc = 0; > > - iommu = find_iommu_for_device(seg, bdf); > + iommu = find_iommu_for_device(sbdf); > if ( !iommu ) > { > AMD_IOMMU_WARN("IVMD: no IOMMU for device %pp - ignoring constrain\n", > - &PCI_SBDF(seg, bdf)); > + &(sbdf)); Please drop () for just &sbdf. > return 0; > } > req = ivrs_mappings[bdf].dte_requestor_id; > @@ -263,9 +263,9 @@ static int __init register_range_for_device( > paddr_t length = limit + PAGE_SIZE - base; > > /* reserve unity-mapped page entries for device */ > - rc = reserve_unity_map_for_device(seg, bdf, base, length, iw, ir, > + rc = reserve_unity_map_for_device(sbdf.seg, bdf, base, length, iw, ir, Another candidate for conversion? > false) ?: > - reserve_unity_map_for_device(seg, req, base, length, iw, ir, > + reserve_unity_map_for_device(sbdf.seg, req, base, length, iw, ir, > false); > } > else > @@ -916,8 +916,8 @@ static int __init parse_ivhd_block(const struct acpi_ivrs_hardware *ivhd_block) > ivhd_block->pci_segment_group, ivhd_block->info, > ivhd_block->iommu_attr); > > - iommu = find_iommu_from_bdf_cap(ivhd_block->pci_segment_group, > - ivhd_block->header.device_id, > + iommu = find_iommu_from_bdf_cap(PCI_SBDF(ivhd_block->pci_segment_group, > + ivhd_block->header.device_id), Please indent to match the end of "PCI_SBDF(". > ivhd_block->capability_offset); > if ( !iommu ) > { > diff --git a/xen/drivers/passthrough/amd/iommu_cmd.c b/xen/drivers/passthrough/amd/iommu_cmd.c > index 83c525b84f..dc3d2394a1 100644 > --- a/xen/drivers/passthrough/amd/iommu_cmd.c > +++ b/xen/drivers/passthrough/amd/iommu_cmd.c > @@ -85,7 +85,7 @@ static void flush_command_buffer(struct amd_iommu *iommu, > threshold |= threshold << 1; > printk(XENLOG_WARNING > "AMD IOMMU %pp: %scompletion wait taking too long\n", > - &PCI_SBDF(iommu->seg, iommu->bdf), > + &(iommu->sbdf), Please drop (). > timeout_base ? "iotlb " : ""); > timeout = 0; > } > @@ -95,7 +95,7 @@ static void flush_command_buffer(struct amd_iommu *iommu, > if ( !timeout ) > printk(XENLOG_WARNING > "AMD IOMMU %pp: %scompletion wait took %lums\n", > - &PCI_SBDF(iommu->seg, iommu->bdf), > + &(iommu->sbdf), Please drop (). > timeout_base ? "iotlb " : "", > (NOW() - start) / 10000000); > } > diff --git a/xen/drivers/passthrough/amd/iommu_detect.c b/xen/drivers/passthrough/amd/iommu_detect.c > index cede44e651..7d60389500 100644 > --- a/xen/drivers/passthrough/amd/iommu_detect.c > +++ b/xen/drivers/passthrough/amd/iommu_detect.c > @@ -231,7 +231,7 @@ int __init amd_iommu_detect_one_acpi( > rt = pci_ro_device(iommu->seg, bus, PCI_DEVFN(dev, func)); > if ( rt ) > printk(XENLOG_ERR "Could not mark config space of %pp read-only (%d)\n", > - &PCI_SBDF(iommu->seg, iommu->bdf), rt); > + &(iommu->sbdf), rt); Please drop (). > > list_add_tail(&iommu->list, &amd_iommu_head); > rt = 0; > diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c > index bb25b55c85..e2c205a857 100644 > --- a/xen/drivers/passthrough/amd/iommu_init.c > +++ b/xen/drivers/passthrough/amd/iommu_init.c > @@ -752,12 +750,11 @@ static bool __init set_iommu_interrupt_handler(struct amd_iommu *iommu) > } > > pcidevs_lock(); > - iommu->msi.dev = pci_get_pdev(NULL, PCI_SBDF(iommu->seg, iommu->bdf)); > + iommu->msi.dev = pci_get_pdev(NULL, iommu->sbdf); > pcidevs_unlock(); > if ( !iommu->msi.dev ) > { > - AMD_IOMMU_WARN("no pdev for %pp\n", > - &PCI_SBDF(iommu->seg, iommu->bdf)); > + AMD_IOMMU_WARN("no pdev for %pp\n", &(iommu->sbdf)); Please drop (). > return 0; > } > > @@ -1543,14 +1540,14 @@ static void invalidate_all_domain_pages(void) > static int cf_check _invalidate_all_devices( > u16 seg, struct ivrs_mappings *ivrs_mappings) > { > - unsigned int bdf; > + pci_sbdf_t sbdf = { .seg = seg, .bdf = 0 }; .bdf = 0 isn't necessary as it will be set to 0 by default. > u16 req_id; > struct amd_iommu *iommu; > > - for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ ) > + for ( /* sbdf.bdf = 0 */ ; sbdf.bdf < ivrs_bdf_entries; sbdf.bdf++ ) I'd either set it or just drop the comment. > { > - iommu = find_iommu_for_device(seg, bdf); > - req_id = ivrs_mappings[bdf].dte_requestor_id; > + iommu = find_iommu_for_device(sbdf); > + req_id = ivrs_mappings[sbdf.bdf].dte_requestor_id; > if ( iommu ) > { > /* > diff --git a/xen/drivers/passthrough/amd/iommu_intr.c b/xen/drivers/passthrough/amd/iommu_intr.c > index 9abdc38053..0c91125ec0 100644 > --- a/xen/drivers/passthrough/amd/iommu_intr.c > +++ b/xen/drivers/passthrough/amd/iommu_intr.c > diff --git a/xen/drivers/passthrough/amd/iommu_map.c b/xen/drivers/passthrough/amd/iommu_map.c > index dde393645a..17070904fa 100644 > --- a/xen/drivers/passthrough/amd/iommu_map.c > +++ b/xen/drivers/passthrough/amd/iommu_map.c > @@ -694,17 +694,16 @@ int amd_iommu_reserve_domain_unity_unmap(struct domain *d, > int cf_check amd_iommu_get_reserved_device_memory( > iommu_grdm_t *func, void *ctxt) > { > - unsigned int seg = 0 /* XXX */, bdf; > - const struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg); > + pci_sbdf_t sbdf = {0}; Just " = {};" > + const struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(sbdf.seg); > /* At least for global entries, avoid reporting them multiple times. */ > enum { pending, processing, done } global = pending; > > - for ( bdf = 0; bdf < ivrs_bdf_entries; ++bdf ) > + for ( /* sbdf.bdf = 0 */ ; sbdf.bdf < ivrs_bdf_entries; ++sbdf.bdf ) Like earlier, change to code or remove comment. > { > - pci_sbdf_t sbdf = PCI_SBDF(seg, bdf); > - const struct ivrs_unity_map *um = ivrs_mappings[bdf].unity_map; > - unsigned int req = ivrs_mappings[bdf].dte_requestor_id; > - const struct amd_iommu *iommu = ivrs_mappings[bdf].iommu; > + const struct ivrs_unity_map *um = ivrs_mappings[sbdf.bdf].unity_map; > + unsigned int req = ivrs_mappings[sbdf.bdf].dte_requestor_id; > + const struct amd_iommu *iommu = ivrs_mappings[sbdf.bdf].iommu; > int rc; > > if ( !iommu ) > diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c > index d00697edb3..16bab0f948 100644 > --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c > +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c > @@ -32,35 +32,35 @@ static bool __read_mostly init_done; > > static const struct iommu_init_ops _iommu_init_ops; > > -struct amd_iommu *find_iommu_for_device(int seg, int bdf) > +struct amd_iommu *find_iommu_for_device(pci_sbdf_t sbdf) > { > - struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg); > + struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(sbdf.seg); Adding: unsigned int bdf = sbdf.bdf here would eliminate all the sbdf.bdf use below. Thanks, Jason > > - if ( !ivrs_mappings || bdf >= ivrs_bdf_entries ) > + if ( !ivrs_mappings || sbdf.bdf >= ivrs_bdf_entries ) > return NULL; > > - if ( unlikely(!ivrs_mappings[bdf].iommu) && likely(init_done) ) > + if ( unlikely(!ivrs_mappings[sbdf.bdf].iommu) && likely(init_done) ) > { > - unsigned int bd0 = bdf & ~PCI_FUNC(~0); > + unsigned int bd0 = sbdf.bdf & ~PCI_FUNC(~0); > > - if ( ivrs_mappings[bd0].iommu && ivrs_mappings[bd0].iommu->bdf != bdf ) > + if ( ivrs_mappings[bd0].iommu && ivrs_mappings[bd0].iommu->bdf != sbdf.bdf ) > { > struct ivrs_mappings tmp = ivrs_mappings[bd0]; > > tmp.iommu = NULL; > if ( tmp.dte_requestor_id == bd0 ) > - tmp.dte_requestor_id = bdf; > - ivrs_mappings[bdf] = tmp; > + tmp.dte_requestor_id = sbdf.bdf; > + ivrs_mappings[sbdf.bdf] = tmp; > > printk(XENLOG_WARNING "%pp not found in ACPI tables;" > - " using same IOMMU as function 0\n", &PCI_SBDF(seg, bdf)); > + " using same IOMMU as function 0\n", &sbdf); > > /* write iommu field last */ > - ivrs_mappings[bdf].iommu = ivrs_mappings[bd0].iommu; > + ivrs_mappings[sbdf.bdf].iommu = ivrs_mappings[bd0].iommu; > } > } > > - return ivrs_mappings[bdf].iommu; > + return ivrs_mappings[sbdf.bdf].iommu; > } > > /*
On 13/03/2025 6:57 pm, Andrii Sultanov wrote: > Following on from 250d87dc, struct amd_iommu has its seg and bdf fields > backwards with relation to pci_sbdf_t. Swap them around, and simplify the > expressions regenerating an sbdf_t from seg+bdf. > > Simplify ioapic_sbdf and bpet_sbdf along the way. Adjust functions > taking seg and bdf fields of these structs to take pci_sbdf_t instead. > Simplify comparisons similarly. > > Bloat-o-meter reports: > > ``` > add/remove: 0/0 grow/shrink: 13/21 up/down: 352/-576 (-224) > Function old new delta > _einittext 22028 22220 +192 > amd_iommu_prepare 853 897 +44 > _invalidate_all_devices 133 154 +21 > amd_iommu_domain_destroy 46 63 +17 > disable_fmt 12336 12352 +16 > _acpi_module_name 416 432 +16 > amd_iommu_get_reserved_device_memory 521 536 +15 > parse_ivrs_table 3955 3966 +11 > amd_iommu_assign_device 271 282 +11 > find_iommu_for_device 242 246 +4 > get_intremap_requestor_id 49 52 +3 > amd_iommu_free_intremap_table 360 361 +1 > allocate_domain_resources 82 83 +1 > reassign_device 843 838 -5 > amd_iommu_remove_device 352 347 -5 > amd_iommu_flush_iotlb 359 354 -5 > iov_supports_xt 270 264 -6 > amd_iommu_setup_domain_device 1478 1472 -6 > amd_setup_hpet_msi 232 224 -8 > amd_iommu_ioapic_update_ire 572 564 -8 > _hvm_dpci_msi_eoi 157 149 -8 > amd_iommu_msi_enable 33 20 -13 > register_range_for_device 297 281 -16 > amd_iommu_add_device 856 839 -17 > update_intremap_entry_from_msi_msg 879 861 -18 > amd_iommu_read_ioapic_from_ire 347 323 -24 > amd_iommu_msi_msg_update_ire 472 431 -41 > flush_command_buffer 460 417 -43 > set_iommu_interrupt_handler 421 377 -44 > amd_iommu_detect_one_acpi 918 868 -50 > amd_iommu_get_supported_ivhd_type 86 31 -55 > iterate_ivrs_mappings 169 113 -56 > parse_ivmd_block 1339 1271 -68 > enable_iommu 1745 1665 -80 > ``` > > Resolves: https://gitlab.com/xen-project/xen/-/issues/198 > > Reported-by: Andrew Cooper <andrew.cooper3@citrix.com> > Signed-off-by: Andrii Sultanov <sultanovandriy@gmail.com> Well, this is awkward. This is the task I'd put together for Cody to try. I guess I have to find another one. In commit messages, we always want the subject alongside a hash. I have this local alias to help: > xen.git/xen$ git commit-str 250d87dc > commit 250d87dc3ff9 ("x86/msi: Change __msi_set_enable() to take > pci_sbdf_t") > xen.git/xen$ git help commit-str > 'commit-str' is aliased to 'log -1 --abbrev=12 --pretty=format:'commit > %h ("%s")'' (The name is not imaginative, and could probably be better.) > @@ -239,17 +239,17 @@ static int __init register_range_for_device( > unsigned int bdf, paddr_t base, paddr_t limit, > bool iw, bool ir, bool exclusion) > { > - int seg = 0; /* XXX */ > - struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg); > + pci_sbdf_t sbdf = { .seg = 0, .bdf = bdf }; The /* XXX */ wants to stay. It's highlighting that this code isn't muti-segment aware (yet). > + struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(sbdf.seg); > struct amd_iommu *iommu; > u16 req; > int rc = 0; > > - iommu = find_iommu_for_device(seg, bdf); > + iommu = find_iommu_for_device(sbdf); > if ( !iommu ) > { > AMD_IOMMU_WARN("IVMD: no IOMMU for device %pp - ignoring constrain\n", > - &PCI_SBDF(seg, bdf)); > + &(sbdf)); The brackets should be dropped now. This should be just &sbdf. > @@ -1543,14 +1540,14 @@ static void invalidate_all_domain_pages(void) > static int cf_check _invalidate_all_devices( > u16 seg, struct ivrs_mappings *ivrs_mappings) > { > - unsigned int bdf; > + pci_sbdf_t sbdf = { .seg = seg, .bdf = 0 }; > u16 req_id; > struct amd_iommu *iommu; > > - for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ ) > + for ( /* sbdf.bdf = 0 */ ; sbdf.bdf < ivrs_bdf_entries; sbdf.bdf++ ) > { > - iommu = find_iommu_for_device(seg, bdf); > - req_id = ivrs_mappings[bdf].dte_requestor_id; > + iommu = find_iommu_for_device(sbdf); > + req_id = ivrs_mappings[sbdf.bdf].dte_requestor_id; See how bloat-o-meter reports this as the 3rd most increased function. This is an example where merging to a pci_sbdf_t local variable is making things worse. Keep the bdf local variable, and use PCI_SBDF() for the call to find_iommu_for_device(). The reason is that you're now modifying the low uint16_t half of a uint32_t. This requires emitting 16-bit logic (requires the Operand Size Override prefix, contributing to your code size), it also suffers register merge penalty > diff --git a/xen/drivers/passthrough/amd/iommu_intr.c b/xen/drivers/passthrough/amd/iommu_intr.c > index 9abdc38053..0c91125ec0 100644 > --- a/xen/drivers/passthrough/amd/iommu_intr.c > +++ b/xen/drivers/passthrough/amd/iommu_intr.c > @@ -123,10 +123,10 @@ static spinlock_t* get_intremap_lock(int seg, int req_id) > &shared_intremap_lock); > } > > -static int get_intremap_requestor_id(int seg, int bdf) > +static int get_intremap_requestor_id(pci_sbdf_t sbdf) > { > - ASSERT( bdf < ivrs_bdf_entries ); > - return get_ivrs_mappings(seg)[bdf].dte_requestor_id; > + ASSERT( sbdf.bdf < ivrs_bdf_entries ); > + return get_ivrs_mappings(sbdf.seg)[sbdf.bdf].dte_requestor_id; This is also an example where merging the parameter is not necessarily wise. The segment and bdf parts are used differently, so this function now has to split the one parameter in two, and shift segment by 16 just to use it. > @@ -335,20 +336,19 @@ void cf_check amd_iommu_ioapic_update_ire( > new_rte.raw = rte; > > /* get device id of ioapic devices */ > - bdf = ioapic_sbdf[idx].bdf; > - seg = ioapic_sbdf[idx].seg; > - iommu = find_iommu_for_device(seg, bdf); > + sbdf.sbdf = ioapic_sbdf[idx].sbdf.sbdf; sbdf = ioapic_sbdf[idx].sbdf; > + iommu = find_iommu_for_device(sbdf); > if ( !iommu ) > { > AMD_IOMMU_WARN("failed to find IOMMU for IO-APIC @ %04x:%04x\n", > - seg, bdf); > + sbdf.seg, sbdf.bdf); This should be converted to %pp, which has a side effect of correcting the rendering of bdf. > @@ -515,15 +515,15 @@ int cf_check amd_iommu_msi_msg_update_ire( > struct msi_desc *msi_desc, struct msi_msg *msg) > { > struct pci_dev *pdev = msi_desc->dev; > - int bdf, seg, rc; > + pci_sbdf_t sbdf; > + int rc; > struct amd_iommu *iommu; > unsigned int i, nr = 1; > u32 data; > > - bdf = pdev ? pdev->sbdf.bdf : hpet_sbdf.bdf; > - seg = pdev ? pdev->seg : hpet_sbdf.seg; > + sbdf.sbdf = pdev ? pdev->sbdf.sbdf : hpet_sbdf.sbdf.sbdf; This is a better example where sbdf = pdev ? pdev->sbdf : hpet_sbdf.sbdf; is equivalent. > diff --git a/xen/drivers/passthrough/amd/iommu_map.c b/xen/drivers/passthrough/amd/iommu_map.c > index dde393645a..17070904fa 100644 > --- a/xen/drivers/passthrough/amd/iommu_map.c > +++ b/xen/drivers/passthrough/amd/iommu_map.c > @@ -694,17 +694,16 @@ int amd_iommu_reserve_domain_unity_unmap(struct domain *d, > int cf_check amd_iommu_get_reserved_device_memory( > iommu_grdm_t *func, void *ctxt) > { > - unsigned int seg = 0 /* XXX */, bdf; > - const struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg); > + pci_sbdf_t sbdf = {0}; "= {}" please. GCC has just introduced a nasty bug (they claim its a feature) where {0} on unions now zeros less than it used to do. pci_sbdf_t doesn't tickle this corner case, but we need to be proactive about removing examples of "= {0}". > + const struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(sbdf.seg); > /* At least for global entries, avoid reporting them multiple times. */ > enum { pending, processing, done } global = pending; > > - for ( bdf = 0; bdf < ivrs_bdf_entries; ++bdf ) > + for ( /* sbdf.bdf = 0 */ ; sbdf.bdf < ivrs_bdf_entries; ++sbdf.bdf ) > { > - pci_sbdf_t sbdf = PCI_SBDF(seg, bdf); > - const struct ivrs_unity_map *um = ivrs_mappings[bdf].unity_map; > - unsigned int req = ivrs_mappings[bdf].dte_requestor_id; > - const struct amd_iommu *iommu = ivrs_mappings[bdf].iommu; > + const struct ivrs_unity_map *um = ivrs_mappings[sbdf.bdf].unity_map; > + unsigned int req = ivrs_mappings[sbdf.bdf].dte_requestor_id; > + const struct amd_iommu *iommu = ivrs_mappings[sbdf.bdf].iommu; Again, this will be better staying as two split variables. ~Andrew
On Thu, 13 Mar 2025 at 19:59, Jason Andryuk <jason.andryuk@amd.com> wrote: > > On 2025-03-13 14:57, Andrii Sultanov wrote: > > Following on from 250d87dc, struct amd_iommu has its seg and bdf fields > > backwards with relation to pci_sbdf_t. Swap them around, and simplify the > > expressions regenerating an sbdf_t from seg+bdf. > > > > Simplify ioapic_sbdf and bpet_sbdf along the way. Adjust functions > > taking seg and bdf fields of these structs to take pci_sbdf_t instead. > > Simplify comparisons similarly. > > It's rather large. Can this be sensibly split into smaller patches? Will do. > > diff --git a/xen/drivers/passthrough/amd/iommu.h b/xen/drivers/passthrough/amd/iommu.h > > index 00e81b4b2a..6903b1bc5d 100644 > > --- a/xen/drivers/passthrough/amd/iommu.h > > +++ b/xen/drivers/passthrough/amd/iommu.h > > @@ -77,8 +77,14 @@ struct amd_iommu { > > struct list_head list; > > spinlock_t lock; /* protect iommu */ > > > > - u16 seg; > > - u16 bdf; > > + union { > > + struct { > > + uint16_t bdf; > > + uint16_t seg; > > Are these still needed by the end of this patch? Yes - otherwise the patch would be larger as bdf and seg would be one namespace deeper - /iommu->seg/iommu->sbdf.seg/ > > + }; > > + pci_sbdf_t sbdf; > > + }; > > + > > struct msi_desc msi; > > > > u16 cap_offset; > > > diff --git a/xen/drivers/passthrough/amd/iommu_acpi.c b/xen/drivers/passthrough/amd/iommu_acpi.c > > index 5bdbfb5ba8..57efb7ddda 100644 > > --- a/xen/drivers/passthrough/amd/iommu_acpi.c > > +++ b/xen/drivers/passthrough/amd/iommu_acpi.c > > @@ -107,12 +107,12 @@ static void __init add_ivrs_mapping_entry( > > > @@ -239,17 +239,17 @@ static int __init register_range_for_device( > > unsigned int bdf, paddr_t base, paddr_t limit, > > bool iw, bool ir, bool exclusion) > > { > > - int seg = 0; /* XXX */ > > - struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg); > > + pci_sbdf_t sbdf = { .seg = 0, .bdf = bdf }; > > Maybe retain the /* XXX */ to highlight that segment is hardcoded to 0. Will do > > + struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(sbdf.seg); > > struct amd_iommu *iommu; > > u16 req; > > int rc = 0; > > > > - iommu = find_iommu_for_device(seg, bdf); > > + iommu = find_iommu_for_device(sbdf); > > if ( !iommu ) > > { > > AMD_IOMMU_WARN("IVMD: no IOMMU for device %pp - ignoring constrain\n", > > - &PCI_SBDF(seg, bdf)); > > + &(sbdf)); > > Please drop () for just &sbdf. Will do here and below. > > return 0; > > } > > req = ivrs_mappings[bdf].dte_requestor_id; > > @@ -263,9 +263,9 @@ static int __init register_range_for_device( > > paddr_t length = limit + PAGE_SIZE - base; > > > > /* reserve unity-mapped page entries for device */ > > - rc = reserve_unity_map_for_device(seg, bdf, base, length, iw, ir, > > + rc = reserve_unity_map_for_device(sbdf.seg, bdf, base, length, iw, ir, > > Another candidate for conversion? This function is always used with seg and bdf coming from two different places, and it doesn't convert them to pci_sbdf_t internally, so this is unnecessary and would only increase code size.* * This particular example would be neutral: add/remove: 0/0 grow/shrink: 3/3 up/down: 51/-51 (0) Function old new delta parse_ivmd_block 1271 1296 +25 register_range_for_device 281 299 +18 __mon_lengths 2928 2936 +8 build_info 752 744 -8 parse_ivrs_table 3966 3953 -13 reserve_unity_map_for_device 453 423 -30 But would still increase the diff. > > false) ?: > > - reserve_unity_map_for_device(seg, req, base, length, iw, ir, > > + reserve_unity_map_for_device(sbdf.seg, req, base, length, iw, ir, > > false); > > } > > else > > > @@ -916,8 +916,8 @@ static int __init parse_ivhd_block(const struct acpi_ivrs_hardware *ivhd_block) > > ivhd_block->pci_segment_group, ivhd_block->info, > > ivhd_block->iommu_attr); > > > > - iommu = find_iommu_from_bdf_cap(ivhd_block->pci_segment_group, > > - ivhd_block->header.device_id, > > + iommu = find_iommu_from_bdf_cap(PCI_SBDF(ivhd_block->pci_segment_group, > > + ivhd_block->header.device_id), > > Please indent to match the end of "PCI_SBDF(". Will do. > > ivhd_block->capability_offset); > > if ( !iommu ) > > { > > diff --git a/xen/drivers/passthrough/amd/iommu_cmd.c b/xen/drivers/passthrough/amd/iommu_cmd.c > > index 83c525b84f..dc3d2394a1 100644 > > --- a/xen/drivers/passthrough/amd/iommu_cmd.c > > +++ b/xen/drivers/passthrough/amd/iommu_cmd.c > > @@ -85,7 +85,7 @@ static void flush_command_buffer(struct amd_iommu *iommu, > > threshold |= threshold << 1; > > printk(XENLOG_WARNING > > "AMD IOMMU %pp: %scompletion wait taking too long\n", > > - &PCI_SBDF(iommu->seg, iommu->bdf), > > + &(iommu->sbdf), > > Please drop (). > > > timeout_base ? "iotlb " : ""); > > timeout = 0; > > } > > @@ -95,7 +95,7 @@ static void flush_command_buffer(struct amd_iommu *iommu, > > if ( !timeout ) > > printk(XENLOG_WARNING > > "AMD IOMMU %pp: %scompletion wait took %lums\n", > > - &PCI_SBDF(iommu->seg, iommu->bdf), > > + &(iommu->sbdf), > > Please drop (). > > > timeout_base ? "iotlb " : "", > > (NOW() - start) / 10000000); > > } > > > diff --git a/xen/drivers/passthrough/amd/iommu_detect.c b/xen/drivers/passthrough/amd/iommu_detect.c > > index cede44e651..7d60389500 100644 > > --- a/xen/drivers/passthrough/amd/iommu_detect.c > > +++ b/xen/drivers/passthrough/amd/iommu_detect.c > > @@ -231,7 +231,7 @@ int __init amd_iommu_detect_one_acpi( > > rt = pci_ro_device(iommu->seg, bus, PCI_DEVFN(dev, func)); > > if ( rt ) > > printk(XENLOG_ERR "Could not mark config space of %pp read-only (%d)\n", > > - &PCI_SBDF(iommu->seg, iommu->bdf), rt); > > + &(iommu->sbdf), rt); > > Please drop (). > > > > > list_add_tail(&iommu->list, &amd_iommu_head); > > rt = 0; > > diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c > > index bb25b55c85..e2c205a857 100644 > > --- a/xen/drivers/passthrough/amd/iommu_init.c > > +++ b/xen/drivers/passthrough/amd/iommu_init.c > > > @@ -752,12 +750,11 @@ static bool __init set_iommu_interrupt_handler(struct amd_iommu *iommu) > > } > > > > pcidevs_lock(); > > - iommu->msi.dev = pci_get_pdev(NULL, PCI_SBDF(iommu->seg, iommu->bdf)); > > + iommu->msi.dev = pci_get_pdev(NULL, iommu->sbdf); > > pcidevs_unlock(); > > if ( !iommu->msi.dev ) > > { > > - AMD_IOMMU_WARN("no pdev for %pp\n", > > - &PCI_SBDF(iommu->seg, iommu->bdf)); > > + AMD_IOMMU_WARN("no pdev for %pp\n", &(iommu->sbdf)); > > Please drop (). > > > return 0; > > } > > > > > > @@ -1543,14 +1540,14 @@ static void invalidate_all_domain_pages(void) > > static int cf_check _invalidate_all_devices( > > u16 seg, struct ivrs_mappings *ivrs_mappings) > > { > > - unsigned int bdf; > > + pci_sbdf_t sbdf = { .seg = seg, .bdf = 0 }; > > .bdf = 0 isn't necessary as it will be set to 0 by default. > > u16 req_id; > > struct amd_iommu *iommu; > > > > - for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ ) > > + for ( /* sbdf.bdf = 0 */ ; sbdf.bdf < ivrs_bdf_entries; sbdf.bdf++ ) > > I'd either set it or just drop the comment. Will drop _invalidate_all_devices hunk entirely, as suggested by Andrew in the sibling reply. > > { > > - iommu = find_iommu_for_device(seg, bdf); > > - req_id = ivrs_mappings[bdf].dte_requestor_id; > > + iommu = find_iommu_for_device(sbdf); > > + req_id = ivrs_mappings[sbdf.bdf].dte_requestor_id; > > if ( iommu ) > > { > > /* > > diff --git a/xen/drivers/passthrough/amd/iommu_intr.c b/xen/drivers/passthrough/amd/iommu_intr.c > > index 9abdc38053..0c91125ec0 100644 > > --- a/xen/drivers/passthrough/amd/iommu_intr.c > > +++ b/xen/drivers/passthrough/amd/iommu_intr.c > > > diff --git a/xen/drivers/passthrough/amd/iommu_map.c b/xen/drivers/passthrough/amd/iommu_map.c > > index dde393645a..17070904fa 100644 > > --- a/xen/drivers/passthrough/amd/iommu_map.c > > +++ b/xen/drivers/passthrough/amd/iommu_map.c > > @@ -694,17 +694,16 @@ int amd_iommu_reserve_domain_unity_unmap(struct domain *d, > > int cf_check amd_iommu_get_reserved_device_memory( > > iommu_grdm_t *func, void *ctxt) > > { > > - unsigned int seg = 0 /* XXX */, bdf; > > - const struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg); > > + pci_sbdf_t sbdf = {0}; > > Just " = {};" > > > + const struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(sbdf.seg); > > /* At least for global entries, avoid reporting them multiple times. */ > > enum { pending, processing, done } global = pending; > > > > - for ( bdf = 0; bdf < ivrs_bdf_entries; ++bdf ) > > + for ( /* sbdf.bdf = 0 */ ; sbdf.bdf < ivrs_bdf_entries; ++sbdf.bdf ) > > Like earlier, change to code or remove comment. > > > { > > - pci_sbdf_t sbdf = PCI_SBDF(seg, bdf); > > - const struct ivrs_unity_map *um = ivrs_mappings[bdf].unity_map; > > - unsigned int req = ivrs_mappings[bdf].dte_requestor_id; > > - const struct amd_iommu *iommu = ivrs_mappings[bdf].iommu; > > + const struct ivrs_unity_map *um = ivrs_mappings[sbdf.bdf].unity_map; > > + unsigned int req = ivrs_mappings[sbdf.bdf].dte_requestor_id; > > + const struct amd_iommu *iommu = ivrs_mappings[sbdf.bdf].iommu; > > int rc; > > > > if ( !iommu ) Will drop the entire amd_iommu_get_reserved_device_memory hunk as suggested by Andrew. > > diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c > > index d00697edb3..16bab0f948 100644 > > --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c > > +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c > > @@ -32,35 +32,35 @@ static bool __read_mostly init_done; > > > > static const struct iommu_init_ops _iommu_init_ops; > > > > -struct amd_iommu *find_iommu_for_device(int seg, int bdf) > > +struct amd_iommu *find_iommu_for_device(pci_sbdf_t sbdf) > > { > > - struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg); > > + struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(sbdf.seg); > > Adding: > unsigned int bdf = sbdf.bdf > > here would eliminate all the sbdf.bdf use below. > > Thanks, > Jason > > > > > - if ( !ivrs_mappings || bdf >= ivrs_bdf_entries ) > > + if ( !ivrs_mappings || sbdf.bdf >= ivrs_bdf_entries ) > > return NULL; > > > > - if ( unlikely(!ivrs_mappings[bdf].iommu) && likely(init_done) ) > > + if ( unlikely(!ivrs_mappings[sbdf.bdf].iommu) && likely(init_done) ) > > { > > - unsigned int bd0 = bdf & ~PCI_FUNC(~0); > > + unsigned int bd0 = sbdf.bdf & ~PCI_FUNC(~0); > > > > - if ( ivrs_mappings[bd0].iommu && ivrs_mappings[bd0].iommu->bdf != bdf ) > > + if ( ivrs_mappings[bd0].iommu && ivrs_mappings[bd0].iommu->bdf != sbdf.bdf ) > > { > > struct ivrs_mappings tmp = ivrs_mappings[bd0]; > > > > tmp.iommu = NULL; > > if ( tmp.dte_requestor_id == bd0 ) > > - tmp.dte_requestor_id = bdf; > > - ivrs_mappings[bdf] = tmp; > > + tmp.dte_requestor_id = sbdf.bdf; > > + ivrs_mappings[sbdf.bdf] = tmp; > > > > printk(XENLOG_WARNING "%pp not found in ACPI tables;" > > - " using same IOMMU as function 0\n", &PCI_SBDF(seg, bdf)); > > + " using same IOMMU as function 0\n", &sbdf); > > > > /* write iommu field last */ > > - ivrs_mappings[bdf].iommu = ivrs_mappings[bd0].iommu; > > + ivrs_mappings[sbdf.bdf].iommu = ivrs_mappings[bd0].iommu; > > } > > } > > > > - return ivrs_mappings[bdf].iommu; > > + return ivrs_mappings[sbdf.bdf].iommu; > > } > > > > /* Thank you!
On Thu, 13 Mar 2025 at 20:16, Andrew Cooper <andrew.cooper3@citrix.com> wrote: > > On 13/03/2025 6:57 pm, Andrii Sultanov wrote: > > Following on from 250d87dc, struct amd_iommu has its seg and bdf fields > > backwards with relation to pci_sbdf_t. Swap them around, and simplify the > > expressions regenerating an sbdf_t from seg+bdf. > > > > Simplify ioapic_sbdf and bpet_sbdf along the way. Adjust functions > > taking seg and bdf fields of these structs to take pci_sbdf_t instead. > > Simplify comparisons similarly. > > > > Bloat-o-meter reports: > > > > ``` > > add/remove: 0/0 grow/shrink: 13/21 up/down: 352/-576 (-224) > > Function old new delta > > _einittext 22028 22220 +192 > > amd_iommu_prepare 853 897 +44 > > _invalidate_all_devices 133 154 +21 > > amd_iommu_domain_destroy 46 63 +17 > > disable_fmt 12336 12352 +16 > > _acpi_module_name 416 432 +16 > > amd_iommu_get_reserved_device_memory 521 536 +15 > > parse_ivrs_table 3955 3966 +11 > > amd_iommu_assign_device 271 282 +11 > > find_iommu_for_device 242 246 +4 > > get_intremap_requestor_id 49 52 +3 > > amd_iommu_free_intremap_table 360 361 +1 > > allocate_domain_resources 82 83 +1 > > reassign_device 843 838 -5 > > amd_iommu_remove_device 352 347 -5 > > amd_iommu_flush_iotlb 359 354 -5 > > iov_supports_xt 270 264 -6 > > amd_iommu_setup_domain_device 1478 1472 -6 > > amd_setup_hpet_msi 232 224 -8 > > amd_iommu_ioapic_update_ire 572 564 -8 > > _hvm_dpci_msi_eoi 157 149 -8 > > amd_iommu_msi_enable 33 20 -13 > > register_range_for_device 297 281 -16 > > amd_iommu_add_device 856 839 -17 > > update_intremap_entry_from_msi_msg 879 861 -18 > > amd_iommu_read_ioapic_from_ire 347 323 -24 > > amd_iommu_msi_msg_update_ire 472 431 -41 > > flush_command_buffer 460 417 -43 > > set_iommu_interrupt_handler 421 377 -44 > > amd_iommu_detect_one_acpi 918 868 -50 > > amd_iommu_get_supported_ivhd_type 86 31 -55 > > iterate_ivrs_mappings 169 113 -56 > > parse_ivmd_block 1339 1271 -68 > > enable_iommu 1745 1665 -80 > > ``` > > > > Resolves: https://gitlab.com/xen-project/xen/-/issues/198 > > > > Reported-by: Andrew Cooper <andrew.cooper3@citrix.com> > > Signed-off-by: Andrii Sultanov <sultanovandriy@gmail.com> > > Well, this is awkward. This is the task I'd put together for Cody to > try. I guess I have to find another one. My apologies! Just noticed it wasn't claimed on Gitlab, probably should have pinged you first. > In commit messages, we always want the subject alongside a hash. I have > this local alias to help: > > > xen.git/xen$ git commit-str 250d87dc > > commit 250d87dc3ff9 ("x86/msi: Change __msi_set_enable() to take > > pci_sbdf_t") > > xen.git/xen$ git help commit-str > > 'commit-str' is aliased to 'log -1 --abbrev=12 --pretty=format:'commit > > %h ("%s")'' > > (The name is not imaginative, and could probably be better.) Thanks, will amend. > > @@ -239,17 +239,17 @@ static int __init register_range_for_device( > > unsigned int bdf, paddr_t base, paddr_t limit, > > bool iw, bool ir, bool exclusion) > > { > > - int seg = 0; /* XXX */ > > - struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg); > > + pci_sbdf_t sbdf = { .seg = 0, .bdf = bdf }; > > The /* XXX */ wants to stay. It's highlighting that this code isn't > muti-segment aware (yet). Will do. > > + struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(sbdf.seg); > > struct amd_iommu *iommu; > > u16 req; > > int rc = 0; > > > > - iommu = find_iommu_for_device(seg, bdf); > > + iommu = find_iommu_for_device(sbdf); > > if ( !iommu ) > > { > > AMD_IOMMU_WARN("IVMD: no IOMMU for device %pp - ignoring constrain\n", > > - &PCI_SBDF(seg, bdf)); > > + &(sbdf)); > > The brackets should be dropped now. This should be just &sbdf. Will do > > @@ -1543,14 +1540,14 @@ static void invalidate_all_domain_pages(void) > > static int cf_check _invalidate_all_devices( > > u16 seg, struct ivrs_mappings *ivrs_mappings) > > { > > - unsigned int bdf; > > + pci_sbdf_t sbdf = { .seg = seg, .bdf = 0 }; > > u16 req_id; > > struct amd_iommu *iommu; > > > > - for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ ) > > + for ( /* sbdf.bdf = 0 */ ; sbdf.bdf < ivrs_bdf_entries; sbdf.bdf++ ) > > { > > - iommu = find_iommu_for_device(seg, bdf); > > - req_id = ivrs_mappings[bdf].dte_requestor_id; > > + iommu = find_iommu_for_device(sbdf); > > + req_id = ivrs_mappings[sbdf.bdf].dte_requestor_id; > > See how bloat-o-meter reports this as the 3rd most increased function. > This is an example where merging to a pci_sbdf_t local variable is > making things worse. > > Keep the bdf local variable, and use PCI_SBDF() for the call to > find_iommu_for_device(). > > The reason is that you're now modifying the low uint16_t half of a > uint32_t. This requires emitting 16-bit logic (requires the Operand > Size Override prefix, contributing to your code size), it also suffers > register merge penalty This particular example would be equivalent: add/remove: 0/0 grow/shrink: 2/2 up/down: 24/-24 (0) Function old new delta _invalidate_all_devices 138 154 +16 build_info 744 752 +8 __mon_lengths 2936 2928 -8 iterate_ivrs_mappings 129 113 -16 Will drop the hunk anyhow to reduce the size of diff. Thanks! > > diff --git a/xen/drivers/passthrough/amd/iommu_intr.c b/xen/drivers/passthrough/amd/iommu_intr.c > > index 9abdc38053..0c91125ec0 100644 > > --- a/xen/drivers/passthrough/amd/iommu_intr.c > > +++ b/xen/drivers/passthrough/amd/iommu_intr.c > > @@ -123,10 +123,10 @@ static spinlock_t* get_intremap_lock(int seg, int req_id) > > &shared_intremap_lock); > > } > > > > -static int get_intremap_requestor_id(int seg, int bdf) > > +static int get_intremap_requestor_id(pci_sbdf_t sbdf) > > { > > - ASSERT( bdf < ivrs_bdf_entries ); > > - return get_ivrs_mappings(seg)[bdf].dte_requestor_id; > > + ASSERT( sbdf.bdf < ivrs_bdf_entries ); > > + return get_ivrs_mappings(sbdf.seg)[sbdf.bdf].dte_requestor_id; > > This is also an example where merging the parameter is not necessarily > wise. The segment and bdf parts are used differently, so this function > now has to split the one parameter in two, and shift segment by 16 just > to use it. Will do: add/remove: 0/0 grow/shrink: 4/6 up/down: 32/-64 (-32) > > @@ -335,20 +336,19 @@ void cf_check amd_iommu_ioapic_update_ire( > > new_rte.raw = rte; > > > > /* get device id of ioapic devices */ > > - bdf = ioapic_sbdf[idx].bdf; > > - seg = ioapic_sbdf[idx].seg; > > - iommu = find_iommu_for_device(seg, bdf); > > + sbdf.sbdf = ioapic_sbdf[idx].sbdf.sbdf; > > sbdf = ioapic_sbdf[idx].sbdf; > > > + iommu = find_iommu_for_device(sbdf); > > if ( !iommu ) > > { > > AMD_IOMMU_WARN("failed to find IOMMU for IO-APIC @ %04x:%04x\n", > > - seg, bdf); > > + sbdf.seg, sbdf.bdf); > > This should be converted to %pp, which has a side effect of correcting > the rendering of bdf. > > > @@ -515,15 +515,15 @@ int cf_check amd_iommu_msi_msg_update_ire( > > struct msi_desc *msi_desc, struct msi_msg *msg) > > { > > struct pci_dev *pdev = msi_desc->dev; > > - int bdf, seg, rc; > > + pci_sbdf_t sbdf; > > + int rc; > > struct amd_iommu *iommu; > > unsigned int i, nr = 1; > > u32 data; > > > > - bdf = pdev ? pdev->sbdf.bdf : hpet_sbdf.bdf; > > - seg = pdev ? pdev->seg : hpet_sbdf.seg; > > + sbdf.sbdf = pdev ? pdev->sbdf.sbdf : hpet_sbdf.sbdf.sbdf; > > This is a better example where > > sbdf = pdev ? pdev->sbdf : hpet_sbdf.sbdf; > > is equivalent. > > > diff --git a/xen/drivers/passthrough/amd/iommu_map.c b/xen/drivers/passthrough/amd/iommu_map.c > > index dde393645a..17070904fa 100644 > > --- a/xen/drivers/passthrough/amd/iommu_map.c > > +++ b/xen/drivers/passthrough/amd/iommu_map.c > > @@ -694,17 +694,16 @@ int amd_iommu_reserve_domain_unity_unmap(struct domain *d, > > int cf_check amd_iommu_get_reserved_device_memory( > > iommu_grdm_t *func, void *ctxt) > > { > > - unsigned int seg = 0 /* XXX */, bdf; > > - const struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg); > > + pci_sbdf_t sbdf = {0}; > > "= {}" please. > > GCC has just introduced a nasty bug (they claim its a feature) where {0} > on unions now zeros less than it used to do. pci_sbdf_t doesn't tickle > this corner case, but we need to be proactive about removing examples of > "= {0}". Will amend with all of these. > > + const struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(sbdf.seg); > > /* At least for global entries, avoid reporting them multiple times. */ > > enum { pending, processing, done } global = pending; > > > > - for ( bdf = 0; bdf < ivrs_bdf_entries; ++bdf ) > > + for ( /* sbdf.bdf = 0 */ ; sbdf.bdf < ivrs_bdf_entries; ++sbdf.bdf ) > > { > > - pci_sbdf_t sbdf = PCI_SBDF(seg, bdf); > > - const struct ivrs_unity_map *um = ivrs_mappings[bdf].unity_map; > > - unsigned int req = ivrs_mappings[bdf].dte_requestor_id; > > - const struct amd_iommu *iommu = ivrs_mappings[bdf].iommu; > > + const struct ivrs_unity_map *um = ivrs_mappings[sbdf.bdf].unity_map; > > + unsigned int req = ivrs_mappings[sbdf.bdf].dte_requestor_id; > > + const struct amd_iommu *iommu = ivrs_mappings[sbdf.bdf].iommu; > > Again, this will be better staying as two split variables. Indeed (on top of the previous similar suggestion): add/remove: 0/0 grow/shrink: 4/9 up/down: 32/-128 (-96) > ~Andrew Thank you!
On 14.03.2025 09:07, Andriy Sultanov wrote: > On Thu, 13 Mar 2025 at 19:59, Jason Andryuk <jason.andryuk@amd.com> wrote: >> On 2025-03-13 14:57, Andrii Sultanov wrote: >>> --- a/xen/drivers/passthrough/amd/iommu.h >>> +++ b/xen/drivers/passthrough/amd/iommu.h >>> @@ -77,8 +77,14 @@ struct amd_iommu { >>> struct list_head list; >>> spinlock_t lock; /* protect iommu */ >>> >>> - u16 seg; >>> - u16 bdf; >>> + union { >>> + struct { >>> + uint16_t bdf; >>> + uint16_t seg; >> >> Are these still needed by the end of this patch? > > Yes - otherwise the patch would be larger as bdf and seg would be one > namespace deeper - /iommu->seg/iommu->sbdf.seg/ This kind of union is fragile. Hence we want to avoid it, even if this means an overall larger diff. As Jason has suggested, it may help reviewability if you split things some. Jan
On 14/03/2025 8:56 am, Jan Beulich wrote: > On 14.03.2025 09:07, Andriy Sultanov wrote: >> On Thu, 13 Mar 2025 at 19:59, Jason Andryuk <jason.andryuk@amd.com> wrote: >>> On 2025-03-13 14:57, Andrii Sultanov wrote: >>>> --- a/xen/drivers/passthrough/amd/iommu.h >>>> +++ b/xen/drivers/passthrough/amd/iommu.h >>>> @@ -77,8 +77,14 @@ struct amd_iommu { >>>> struct list_head list; >>>> spinlock_t lock; /* protect iommu */ >>>> >>>> - u16 seg; >>>> - u16 bdf; >>>> + union { >>>> + struct { >>>> + uint16_t bdf; >>>> + uint16_t seg; >>> Are these still needed by the end of this patch? >> Yes - otherwise the patch would be larger as bdf and seg would be one >> namespace deeper - /iommu->seg/iommu->sbdf.seg/ > This kind of union is fragile. Hence we want to avoid it, even if this means > an overall larger diff. This is my suggestion, and it's the pattern used in struct pci_dev. pci_sbdf_t is nice for code generation, but it's not great for source verbosity. ~Andrew
On 14.03.2025 10:30, Andrew Cooper wrote: > On 14/03/2025 8:56 am, Jan Beulich wrote: >> On 14.03.2025 09:07, Andriy Sultanov wrote: >>> On Thu, 13 Mar 2025 at 19:59, Jason Andryuk <jason.andryuk@amd.com> wrote: >>>> On 2025-03-13 14:57, Andrii Sultanov wrote: >>>>> --- a/xen/drivers/passthrough/amd/iommu.h >>>>> +++ b/xen/drivers/passthrough/amd/iommu.h >>>>> @@ -77,8 +77,14 @@ struct amd_iommu { >>>>> struct list_head list; >>>>> spinlock_t lock; /* protect iommu */ >>>>> >>>>> - u16 seg; >>>>> - u16 bdf; >>>>> + union { >>>>> + struct { >>>>> + uint16_t bdf; >>>>> + uint16_t seg; >>>> Are these still needed by the end of this patch? >>> Yes - otherwise the patch would be larger as bdf and seg would be one >>> namespace deeper - /iommu->seg/iommu->sbdf.seg/ >> This kind of union is fragile. Hence we want to avoid it, even if this means >> an overall larger diff. > > This is my suggestion, and it's the pattern used in struct pci_dev. And I'm hoping to eliminate it there, too, at some point. But adding a hidden dependency on the layout in an entirely different part of the tree just cannot do us any good. > pci_sbdf_t is nice for code generation, but it's not great for source > verbosity. I agree, yet if anything we'd need a global approach to deal with that aspect. Jan
diff --git a/xen/drivers/passthrough/amd/iommu.h b/xen/drivers/passthrough/amd/iommu.h index 00e81b4b2a..6903b1bc5d 100644 --- a/xen/drivers/passthrough/amd/iommu.h +++ b/xen/drivers/passthrough/amd/iommu.h @@ -77,8 +77,14 @@ struct amd_iommu { struct list_head list; spinlock_t lock; /* protect iommu */ - u16 seg; - u16 bdf; + union { + struct { + uint16_t bdf; + uint16_t seg; + }; + pci_sbdf_t sbdf; + }; + struct msi_desc msi; u16 cap_offset; @@ -240,7 +246,7 @@ void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf); void amd_iommu_flush_all_caches(struct amd_iommu *iommu); /* find iommu for bdf */ -struct amd_iommu *find_iommu_for_device(int seg, int bdf); +struct amd_iommu *find_iommu_for_device(pci_sbdf_t sbdf); /* interrupt remapping */ bool cf_check iov_supports_xt(void); @@ -262,7 +268,13 @@ int cf_check amd_setup_hpet_msi(struct msi_desc *msi_desc); void cf_check amd_iommu_dump_intremap_tables(unsigned char key); extern struct ioapic_sbdf { - u16 bdf, seg; + union { + struct { + uint16_t bdf; + uint16_t seg; + }; + pci_sbdf_t sbdf; + }; u8 id; bool cmdline; u16 *pin_2_idx; @@ -273,7 +285,14 @@ unsigned int ioapic_id_to_index(unsigned int apic_id); unsigned int get_next_ioapic_sbdf_index(void); extern struct hpet_sbdf { - u16 bdf, seg, id; + union { + struct { + uint16_t bdf; + uint16_t seg; + }; + pci_sbdf_t sbdf; + }; + uint16_t id; enum { HPET_NONE, HPET_CMDL, diff --git a/xen/drivers/passthrough/amd/iommu_acpi.c b/xen/drivers/passthrough/amd/iommu_acpi.c index 5bdbfb5ba8..57efb7ddda 100644 --- a/xen/drivers/passthrough/amd/iommu_acpi.c +++ b/xen/drivers/passthrough/amd/iommu_acpi.c @@ -107,12 +107,12 @@ static void __init add_ivrs_mapping_entry( } static struct amd_iommu * __init find_iommu_from_bdf_cap( - u16 seg, u16 bdf, u16 cap_offset) + pci_sbdf_t sbdf, u16 cap_offset) { struct amd_iommu *iommu; for_each_amd_iommu ( iommu ) - if ( (iommu->seg == seg) && (iommu->bdf == bdf) && + if ( (iommu->sbdf.sbdf == sbdf.sbdf) && (iommu->cap_offset == cap_offset) ) return iommu; @@ -239,17 +239,17 @@ static int __init register_range_for_device( unsigned int bdf, paddr_t base, paddr_t limit, bool iw, bool ir, bool exclusion) { - int seg = 0; /* XXX */ - struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg); + pci_sbdf_t sbdf = { .seg = 0, .bdf = bdf }; + struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(sbdf.seg); struct amd_iommu *iommu; u16 req; int rc = 0; - iommu = find_iommu_for_device(seg, bdf); + iommu = find_iommu_for_device(sbdf); if ( !iommu ) { AMD_IOMMU_WARN("IVMD: no IOMMU for device %pp - ignoring constrain\n", - &PCI_SBDF(seg, bdf)); + &(sbdf)); return 0; } req = ivrs_mappings[bdf].dte_requestor_id; @@ -263,9 +263,9 @@ static int __init register_range_for_device( paddr_t length = limit + PAGE_SIZE - base; /* reserve unity-mapped page entries for device */ - rc = reserve_unity_map_for_device(seg, bdf, base, length, iw, ir, + rc = reserve_unity_map_for_device(sbdf.seg, bdf, base, length, iw, ir, false) ?: - reserve_unity_map_for_device(seg, req, base, length, iw, ir, + reserve_unity_map_for_device(sbdf.seg, req, base, length, iw, ir, false); } else @@ -297,7 +297,7 @@ static int __init register_range_for_iommu_devices( /* reserve unity-mapped page entries for devices */ for ( bdf = rc = 0; !rc && bdf < ivrs_bdf_entries; bdf++ ) { - if ( iommu != find_iommu_for_device(iommu->seg, bdf) ) + if ( iommu != find_iommu_for_device(iommu->sbdf) ) continue; req = get_ivrs_mappings(iommu->seg)[bdf].dte_requestor_id; @@ -362,7 +362,7 @@ static int __init parse_ivmd_device_iommu( struct amd_iommu *iommu; /* find target IOMMU */ - iommu = find_iommu_from_bdf_cap(seg, ivmd_block->header.device_id, + iommu = find_iommu_from_bdf_cap(PCI_SBDF(seg, ivmd_block->header.device_id), ivmd_block->aux_data); if ( !iommu ) { @@ -916,8 +916,8 @@ static int __init parse_ivhd_block(const struct acpi_ivrs_hardware *ivhd_block) ivhd_block->pci_segment_group, ivhd_block->info, ivhd_block->iommu_attr); - iommu = find_iommu_from_bdf_cap(ivhd_block->pci_segment_group, - ivhd_block->header.device_id, + iommu = find_iommu_from_bdf_cap(PCI_SBDF(ivhd_block->pci_segment_group, + ivhd_block->header.device_id), ivhd_block->capability_offset); if ( !iommu ) { diff --git a/xen/drivers/passthrough/amd/iommu_cmd.c b/xen/drivers/passthrough/amd/iommu_cmd.c index 83c525b84f..dc3d2394a1 100644 --- a/xen/drivers/passthrough/amd/iommu_cmd.c +++ b/xen/drivers/passthrough/amd/iommu_cmd.c @@ -85,7 +85,7 @@ static void flush_command_buffer(struct amd_iommu *iommu, threshold |= threshold << 1; printk(XENLOG_WARNING "AMD IOMMU %pp: %scompletion wait taking too long\n", - &PCI_SBDF(iommu->seg, iommu->bdf), + &(iommu->sbdf), timeout_base ? "iotlb " : ""); timeout = 0; } @@ -95,7 +95,7 @@ static void flush_command_buffer(struct amd_iommu *iommu, if ( !timeout ) printk(XENLOG_WARNING "AMD IOMMU %pp: %scompletion wait took %lums\n", - &PCI_SBDF(iommu->seg, iommu->bdf), + &(iommu->sbdf), timeout_base ? "iotlb " : "", (NOW() - start) / 10000000); } @@ -288,7 +288,7 @@ void amd_iommu_flush_iotlb(u8 devfn, const struct pci_dev *pdev, if ( !pci_ats_enabled(pdev->seg, pdev->bus, pdev->devfn) ) return; - iommu = find_iommu_for_device(pdev->seg, pdev->sbdf.bdf); + iommu = find_iommu_for_device(pdev->sbdf); if ( !iommu ) { diff --git a/xen/drivers/passthrough/amd/iommu_detect.c b/xen/drivers/passthrough/amd/iommu_detect.c index cede44e651..7d60389500 100644 --- a/xen/drivers/passthrough/amd/iommu_detect.c +++ b/xen/drivers/passthrough/amd/iommu_detect.c @@ -231,7 +231,7 @@ int __init amd_iommu_detect_one_acpi( rt = pci_ro_device(iommu->seg, bus, PCI_DEVFN(dev, func)); if ( rt ) printk(XENLOG_ERR "Could not mark config space of %pp read-only (%d)\n", - &PCI_SBDF(iommu->seg, iommu->bdf), rt); + &(iommu->sbdf), rt); list_add_tail(&iommu->list, &amd_iommu_head); rt = 0; diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c index bb25b55c85..e2c205a857 100644 --- a/xen/drivers/passthrough/amd/iommu_init.c +++ b/xen/drivers/passthrough/amd/iommu_init.c @@ -409,9 +409,7 @@ static void iommu_reset_log(struct amd_iommu *iommu, static void amd_iommu_msi_enable(struct amd_iommu *iommu, int flag) { - pci_sbdf_t sbdf = { .seg = iommu->seg, .bdf = iommu->bdf }; - - __msi_set_enable(sbdf, iommu->msi.msi_attrib.pos, flag); + __msi_set_enable(iommu->sbdf, iommu->msi.msi_attrib.pos, flag); } static void cf_check iommu_msi_unmask(struct irq_desc *desc) @@ -752,12 +750,11 @@ static bool __init set_iommu_interrupt_handler(struct amd_iommu *iommu) } pcidevs_lock(); - iommu->msi.dev = pci_get_pdev(NULL, PCI_SBDF(iommu->seg, iommu->bdf)); + iommu->msi.dev = pci_get_pdev(NULL, iommu->sbdf); pcidevs_unlock(); if ( !iommu->msi.dev ) { - AMD_IOMMU_WARN("no pdev for %pp\n", - &PCI_SBDF(iommu->seg, iommu->bdf)); + AMD_IOMMU_WARN("no pdev for %pp\n", &(iommu->sbdf)); return 0; } @@ -779,7 +776,7 @@ static bool __init set_iommu_interrupt_handler(struct amd_iommu *iommu) hw_irq_controller *handler; u16 control; - control = pci_conf_read16(PCI_SBDF(iommu->seg, iommu->bdf), + control = pci_conf_read16(iommu->sbdf, iommu->msi.msi_attrib.pos + PCI_MSI_FLAGS); iommu->msi.msi.nvec = 1; @@ -843,22 +840,22 @@ static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu) (boot_cpu_data.x86_model > 0x1f) ) return; - pci_conf_write32(PCI_SBDF(iommu->seg, iommu->bdf), 0xf0, 0x90); - value = pci_conf_read32(PCI_SBDF(iommu->seg, iommu->bdf), 0xf4); + pci_conf_write32(iommu->sbdf, 0xf0, 0x90); + value = pci_conf_read32(iommu->sbdf, 0xf4); if ( value & (1 << 2) ) return; /* Select NB indirect register 0x90 and enable writing */ - pci_conf_write32(PCI_SBDF(iommu->seg, iommu->bdf), 0xf0, 0x90 | (1 << 8)); + pci_conf_write32(iommu->sbdf, 0xf0, 0x90 | (1 << 8)); - pci_conf_write32(PCI_SBDF(iommu->seg, iommu->bdf), 0xf4, value | (1 << 2)); + pci_conf_write32(iommu->sbdf, 0xf4, value | (1 << 2)); printk(XENLOG_INFO "AMD-Vi: Applying erratum 746 workaround for IOMMU at %pp\n", - &PCI_SBDF(iommu->seg, iommu->bdf)); + &iommu->sbdf); /* Clear the enable writing bit */ - pci_conf_write32(PCI_SBDF(iommu->seg, iommu->bdf), 0xf0, 0x90); + pci_conf_write32(iommu->sbdf, 0xf0, 0x90); } static void enable_iommu(struct amd_iommu *iommu) @@ -1543,14 +1540,14 @@ static void invalidate_all_domain_pages(void) static int cf_check _invalidate_all_devices( u16 seg, struct ivrs_mappings *ivrs_mappings) { - unsigned int bdf; + pci_sbdf_t sbdf = { .seg = seg, .bdf = 0 }; u16 req_id; struct amd_iommu *iommu; - for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ ) + for ( /* sbdf.bdf = 0 */ ; sbdf.bdf < ivrs_bdf_entries; sbdf.bdf++ ) { - iommu = find_iommu_for_device(seg, bdf); - req_id = ivrs_mappings[bdf].dte_requestor_id; + iommu = find_iommu_for_device(sbdf); + req_id = ivrs_mappings[sbdf.bdf].dte_requestor_id; if ( iommu ) { /* diff --git a/xen/drivers/passthrough/amd/iommu_intr.c b/xen/drivers/passthrough/amd/iommu_intr.c index 9abdc38053..0c91125ec0 100644 --- a/xen/drivers/passthrough/amd/iommu_intr.c +++ b/xen/drivers/passthrough/amd/iommu_intr.c @@ -123,10 +123,10 @@ static spinlock_t* get_intremap_lock(int seg, int req_id) &shared_intremap_lock); } -static int get_intremap_requestor_id(int seg, int bdf) +static int get_intremap_requestor_id(pci_sbdf_t sbdf) { - ASSERT( bdf < ivrs_bdf_entries ); - return get_ivrs_mappings(seg)[bdf].dte_requestor_id; + ASSERT( sbdf.bdf < ivrs_bdf_entries ); + return get_ivrs_mappings(sbdf.seg)[sbdf.bdf].dte_requestor_id; } static unsigned int alloc_intremap_entry(const struct amd_iommu *iommu, @@ -281,7 +281,7 @@ static int update_intremap_entry_from_ioapic( unsigned int dest, offset; bool fresh = false; - req_id = get_intremap_requestor_id(iommu->seg, bdf); + req_id = get_intremap_requestor_id(PCI_SBDF(iommu->seg, bdf)); lock = get_intremap_lock(iommu->seg, req_id); delivery_mode = rte->delivery_mode; @@ -323,7 +323,8 @@ void cf_check amd_iommu_ioapic_update_ire( unsigned int apic, unsigned int pin, uint64_t rte) { struct IO_APIC_route_entry new_rte; - int seg, bdf, rc; + pci_sbdf_t sbdf; + int rc; struct amd_iommu *iommu; unsigned int idx; @@ -335,20 +336,19 @@ void cf_check amd_iommu_ioapic_update_ire( new_rte.raw = rte; /* get device id of ioapic devices */ - bdf = ioapic_sbdf[idx].bdf; - seg = ioapic_sbdf[idx].seg; - iommu = find_iommu_for_device(seg, bdf); + sbdf.sbdf = ioapic_sbdf[idx].sbdf.sbdf; + iommu = find_iommu_for_device(sbdf); if ( !iommu ) { AMD_IOMMU_WARN("failed to find IOMMU for IO-APIC @ %04x:%04x\n", - seg, bdf); + sbdf.seg, sbdf.bdf); __ioapic_write_entry(apic, pin, true, new_rte); return; } /* Update interrupt remapping entry */ rc = update_intremap_entry_from_ioapic( - bdf, iommu, &new_rte, + sbdf.bdf, iommu, &new_rte, &ioapic_sbdf[idx].pin_2_idx[pin]); if ( rc ) @@ -369,7 +369,8 @@ unsigned int cf_check amd_iommu_read_ioapic_from_ire( unsigned int offset; unsigned int val = __io_apic_read(apic, reg); unsigned int pin = (reg - 0x10) / 2; - uint16_t seg, bdf, req_id; + pci_sbdf_t sbdf; + uint16_t req_id; const struct amd_iommu *iommu; union irte_ptr entry; @@ -381,12 +382,11 @@ unsigned int cf_check amd_iommu_read_ioapic_from_ire( if ( offset >= INTREMAP_MAX_ENTRIES ) return val; - seg = ioapic_sbdf[idx].seg; - bdf = ioapic_sbdf[idx].bdf; - iommu = find_iommu_for_device(seg, bdf); + sbdf.sbdf = ioapic_sbdf[idx].sbdf.sbdf; + iommu = find_iommu_for_device(sbdf); if ( !iommu ) return val; - req_id = get_intremap_requestor_id(seg, bdf); + req_id = get_intremap_requestor_id(sbdf); entry = get_intremap_entry(iommu, req_id, offset); if ( !(reg & 1) ) @@ -420,7 +420,7 @@ static int update_intremap_entry_from_msi_msg( bool fresh = false; req_id = get_dma_requestor_id(iommu->seg, bdf); - alias_id = get_intremap_requestor_id(iommu->seg, bdf); + alias_id = get_intremap_requestor_id(PCI_SBDF(iommu->seg, bdf)); lock = get_intremap_lock(iommu->seg, req_id); spin_lock_irqsave(lock, flags); @@ -495,19 +495,19 @@ static int update_intremap_entry_from_msi_msg( return fresh; } -static struct amd_iommu *_find_iommu_for_device(int seg, int bdf) +static struct amd_iommu *_find_iommu_for_device(pci_sbdf_t sbdf) { struct amd_iommu *iommu; for_each_amd_iommu ( iommu ) - if ( iommu->seg == seg && iommu->bdf == bdf ) + if ( iommu->sbdf.sbdf == sbdf.sbdf ) return NULL; - iommu = find_iommu_for_device(seg, bdf); + iommu = find_iommu_for_device(sbdf); if ( iommu ) return iommu; - AMD_IOMMU_DEBUG("No IOMMU for MSI dev = %pp\n", &PCI_SBDF(seg, bdf)); + AMD_IOMMU_DEBUG("No IOMMU for MSI dev = %pp\n", &sbdf); return ERR_PTR(-EINVAL); } @@ -515,15 +515,15 @@ int cf_check amd_iommu_msi_msg_update_ire( struct msi_desc *msi_desc, struct msi_msg *msg) { struct pci_dev *pdev = msi_desc->dev; - int bdf, seg, rc; + pci_sbdf_t sbdf; + int rc; struct amd_iommu *iommu; unsigned int i, nr = 1; u32 data; - bdf = pdev ? pdev->sbdf.bdf : hpet_sbdf.bdf; - seg = pdev ? pdev->seg : hpet_sbdf.seg; + sbdf.sbdf = pdev ? pdev->sbdf.sbdf : hpet_sbdf.sbdf.sbdf; - iommu = _find_iommu_for_device(seg, bdf); + iommu = _find_iommu_for_device(sbdf); if ( IS_ERR_OR_NULL(iommu) ) return PTR_ERR(iommu); @@ -532,7 +532,7 @@ int cf_check amd_iommu_msi_msg_update_ire( if ( msi_desc->remap_index >= 0 && !msg ) { - update_intremap_entry_from_msi_msg(iommu, bdf, nr, + update_intremap_entry_from_msi_msg(iommu, sbdf.bdf, nr, &msi_desc->remap_index, NULL, NULL); @@ -543,7 +543,7 @@ int cf_check amd_iommu_msi_msg_update_ire( if ( !msg ) return 0; - rc = update_intremap_entry_from_msi_msg(iommu, bdf, nr, + rc = update_intremap_entry_from_msi_msg(iommu, sbdf.bdf, nr, &msi_desc->remap_index, msg, &data); if ( rc > 0 ) @@ -660,8 +660,7 @@ bool __init cf_check iov_supports_xt(void) if ( idx == MAX_IO_APICS ) return false; - if ( !find_iommu_for_device(ioapic_sbdf[idx].seg, - ioapic_sbdf[idx].bdf) ) + if ( !find_iommu_for_device(ioapic_sbdf[idx].sbdf) ) { AMD_IOMMU_WARN("no IOMMU for IO-APIC %#x (ID %x)\n", apic, IO_APIC_ID(apic)); @@ -690,7 +689,7 @@ int __init cf_check amd_setup_hpet_msi(struct msi_desc *msi_desc) return -ENODEV; } - iommu = find_iommu_for_device(hpet_sbdf.seg, hpet_sbdf.bdf); + iommu = find_iommu_for_device(hpet_sbdf.sbdf); if ( !iommu ) return -ENXIO; diff --git a/xen/drivers/passthrough/amd/iommu_map.c b/xen/drivers/passthrough/amd/iommu_map.c index dde393645a..17070904fa 100644 --- a/xen/drivers/passthrough/amd/iommu_map.c +++ b/xen/drivers/passthrough/amd/iommu_map.c @@ -694,17 +694,16 @@ int amd_iommu_reserve_domain_unity_unmap(struct domain *d, int cf_check amd_iommu_get_reserved_device_memory( iommu_grdm_t *func, void *ctxt) { - unsigned int seg = 0 /* XXX */, bdf; - const struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg); + pci_sbdf_t sbdf = {0}; + const struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(sbdf.seg); /* At least for global entries, avoid reporting them multiple times. */ enum { pending, processing, done } global = pending; - for ( bdf = 0; bdf < ivrs_bdf_entries; ++bdf ) + for ( /* sbdf.bdf = 0 */ ; sbdf.bdf < ivrs_bdf_entries; ++sbdf.bdf ) { - pci_sbdf_t sbdf = PCI_SBDF(seg, bdf); - const struct ivrs_unity_map *um = ivrs_mappings[bdf].unity_map; - unsigned int req = ivrs_mappings[bdf].dte_requestor_id; - const struct amd_iommu *iommu = ivrs_mappings[bdf].iommu; + const struct ivrs_unity_map *um = ivrs_mappings[sbdf.bdf].unity_map; + unsigned int req = ivrs_mappings[sbdf.bdf].dte_requestor_id; + const struct amd_iommu *iommu = ivrs_mappings[sbdf.bdf].iommu; int rc; if ( !iommu ) @@ -717,7 +716,7 @@ int cf_check amd_iommu_get_reserved_device_memory( pcidevs_unlock(); if ( pdev ) - iommu = find_iommu_for_device(seg, bdf); + iommu = find_iommu_for_device(sbdf); if ( !iommu ) continue; } @@ -729,8 +728,8 @@ int cf_check amd_iommu_get_reserved_device_memory( * multiple times the same range(s) for perhaps many devices with * the same alias ID. */ - if ( bdf != req && ivrs_mappings[req].iommu && - func(0, 0, PCI_SBDF(seg, req).sbdf, ctxt) ) + if ( sbdf.bdf != req && ivrs_mappings[req].iommu && + func(0, 0, sbdf.sbdf, ctxt) ) continue; if ( global == pending ) @@ -740,7 +739,7 @@ int cf_check amd_iommu_get_reserved_device_memory( if ( iommu->exclusion_enable && (iommu->exclusion_allow_all ? global == processing : - ivrs_mappings[bdf].dte_allow_exclusion) ) + ivrs_mappings[sbdf.bdf].dte_allow_exclusion) ) { rc = func(PFN_DOWN(iommu->exclusion_base), PFN_UP(iommu->exclusion_limit | 1) - diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c index d00697edb3..16bab0f948 100644 --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c @@ -32,35 +32,35 @@ static bool __read_mostly init_done; static const struct iommu_init_ops _iommu_init_ops; -struct amd_iommu *find_iommu_for_device(int seg, int bdf) +struct amd_iommu *find_iommu_for_device(pci_sbdf_t sbdf) { - struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg); + struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(sbdf.seg); - if ( !ivrs_mappings || bdf >= ivrs_bdf_entries ) + if ( !ivrs_mappings || sbdf.bdf >= ivrs_bdf_entries ) return NULL; - if ( unlikely(!ivrs_mappings[bdf].iommu) && likely(init_done) ) + if ( unlikely(!ivrs_mappings[sbdf.bdf].iommu) && likely(init_done) ) { - unsigned int bd0 = bdf & ~PCI_FUNC(~0); + unsigned int bd0 = sbdf.bdf & ~PCI_FUNC(~0); - if ( ivrs_mappings[bd0].iommu && ivrs_mappings[bd0].iommu->bdf != bdf ) + if ( ivrs_mappings[bd0].iommu && ivrs_mappings[bd0].iommu->bdf != sbdf.bdf ) { struct ivrs_mappings tmp = ivrs_mappings[bd0]; tmp.iommu = NULL; if ( tmp.dte_requestor_id == bd0 ) - tmp.dte_requestor_id = bdf; - ivrs_mappings[bdf] = tmp; + tmp.dte_requestor_id = sbdf.bdf; + ivrs_mappings[sbdf.bdf] = tmp; printk(XENLOG_WARNING "%pp not found in ACPI tables;" - " using same IOMMU as function 0\n", &PCI_SBDF(seg, bdf)); + " using same IOMMU as function 0\n", &sbdf); /* write iommu field last */ - ivrs_mappings[bdf].iommu = ivrs_mappings[bd0].iommu; + ivrs_mappings[sbdf.bdf].iommu = ivrs_mappings[bd0].iommu; } } - return ivrs_mappings[bdf].iommu; + return ivrs_mappings[sbdf.bdf].iommu; } /* @@ -107,7 +107,7 @@ static bool any_pdev_behind_iommu(const struct domain *d, if ( pdev == exclude ) continue; - if ( find_iommu_for_device(pdev->seg, pdev->sbdf.bdf) == iommu ) + if ( find_iommu_for_device(pdev->sbdf) == iommu ) return true; } @@ -468,7 +468,7 @@ static int cf_check reassign_device( struct amd_iommu *iommu; int rc; - iommu = find_iommu_for_device(pdev->seg, pdev->sbdf.bdf); + iommu = find_iommu_for_device(pdev->sbdf); if ( !iommu ) { AMD_IOMMU_WARN("failed to find IOMMU: %pp cannot be assigned to %pd\n", @@ -578,10 +578,10 @@ static int cf_check amd_iommu_add_device(u8 devfn, struct pci_dev *pdev) return -EINVAL; for_each_amd_iommu(iommu) - if ( pdev->seg == iommu->seg && pdev->sbdf.bdf == iommu->bdf ) + if ( pdev->sbdf.sbdf == iommu->sbdf.sbdf ) return is_hardware_domain(pdev->domain) ? 0 : -ENODEV; - iommu = find_iommu_for_device(pdev->seg, pdev->sbdf.bdf); + iommu = find_iommu_for_device(pdev->sbdf); if ( unlikely(!iommu) ) { /* Filter bridge devices. */ @@ -666,7 +666,7 @@ static int cf_check amd_iommu_remove_device(u8 devfn, struct pci_dev *pdev) if ( !pdev->domain ) return -EINVAL; - iommu = find_iommu_for_device(pdev->seg, pdev->sbdf.bdf); + iommu = find_iommu_for_device(pdev->sbdf); if ( !iommu ) { AMD_IOMMU_WARN("failed to find IOMMU: %pp cannot be removed from %pd\n",
Following on from 250d87dc, struct amd_iommu has its seg and bdf fields backwards with relation to pci_sbdf_t. Swap them around, and simplify the expressions regenerating an sbdf_t from seg+bdf. Simplify ioapic_sbdf and bpet_sbdf along the way. Adjust functions taking seg and bdf fields of these structs to take pci_sbdf_t instead. Simplify comparisons similarly. Bloat-o-meter reports: ``` add/remove: 0/0 grow/shrink: 13/21 up/down: 352/-576 (-224) Function old new delta _einittext 22028 22220 +192 amd_iommu_prepare 853 897 +44 _invalidate_all_devices 133 154 +21 amd_iommu_domain_destroy 46 63 +17 disable_fmt 12336 12352 +16 _acpi_module_name 416 432 +16 amd_iommu_get_reserved_device_memory 521 536 +15 parse_ivrs_table 3955 3966 +11 amd_iommu_assign_device 271 282 +11 find_iommu_for_device 242 246 +4 get_intremap_requestor_id 49 52 +3 amd_iommu_free_intremap_table 360 361 +1 allocate_domain_resources 82 83 +1 reassign_device 843 838 -5 amd_iommu_remove_device 352 347 -5 amd_iommu_flush_iotlb 359 354 -5 iov_supports_xt 270 264 -6 amd_iommu_setup_domain_device 1478 1472 -6 amd_setup_hpet_msi 232 224 -8 amd_iommu_ioapic_update_ire 572 564 -8 _hvm_dpci_msi_eoi 157 149 -8 amd_iommu_msi_enable 33 20 -13 register_range_for_device 297 281 -16 amd_iommu_add_device 856 839 -17 update_intremap_entry_from_msi_msg 879 861 -18 amd_iommu_read_ioapic_from_ire 347 323 -24 amd_iommu_msi_msg_update_ire 472 431 -41 flush_command_buffer 460 417 -43 set_iommu_interrupt_handler 421 377 -44 amd_iommu_detect_one_acpi 918 868 -50 amd_iommu_get_supported_ivhd_type 86 31 -55 iterate_ivrs_mappings 169 113 -56 parse_ivmd_block 1339 1271 -68 enable_iommu 1745 1665 -80 ``` Resolves: https://gitlab.com/xen-project/xen/-/issues/198 Reported-by: Andrew Cooper <andrew.cooper3@citrix.com> Signed-off-by: Andrii Sultanov <sultanovandriy@gmail.com> --- xen/drivers/passthrough/amd/iommu.h | 29 +++++++++-- xen/drivers/passthrough/amd/iommu_acpi.c | 24 ++++----- xen/drivers/passthrough/amd/iommu_cmd.c | 6 +-- xen/drivers/passthrough/amd/iommu_detect.c | 2 +- xen/drivers/passthrough/amd/iommu_init.c | 31 +++++------ xen/drivers/passthrough/amd/iommu_intr.c | 57 ++++++++++----------- xen/drivers/passthrough/amd/iommu_map.c | 21 ++++---- xen/drivers/passthrough/amd/pci_amd_iommu.c | 32 ++++++------ 8 files changed, 108 insertions(+), 94 deletions(-)