Message ID | 3dd8c36177ac52d9a87655badb000d11785a5a4a.1564702313.git.sathyanarayanan.kuppuswamy@linux.intel.com (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | Fix PF/VF dependency issue | expand |
On Thu, Aug 01, 2019 at 05:05:59PM -0700, sathyanarayanan.kuppuswamy@linux.intel.com wrote: > From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > > Currently, PRI Capability checks are repeated across all PRI API's. > Instead, cache the capability check result in pci_pri_init() and use it > in other PRI API's. Also, since PRI is a shared resource between PF/VF, > initialize default values for common PRI features in pci_pri_init(). This patch does two things, and it would be better if they were split: 1) Cache the PRI capability offset 2) Separate the PF and VF paths > Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > --- > drivers/pci/ats.c | 80 ++++++++++++++++++++++++++++------------- > include/linux/pci-ats.h | 5 +++ > include/linux/pci.h | 1 + > 3 files changed, 61 insertions(+), 25 deletions(-) > > diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c > index cdd936d10f68..280be911f190 100644 > --- a/drivers/pci/ats.c > +++ b/drivers/pci/ats.c > @@ -28,6 +28,8 @@ void pci_ats_init(struct pci_dev *dev) > return; > > dev->ats_cap = pos; > + > + pci_pri_init(dev); > } > > /** > @@ -170,36 +172,72 @@ int pci_ats_page_aligned(struct pci_dev *pdev) > EXPORT_SYMBOL_GPL(pci_ats_page_aligned); > > #ifdef CONFIG_PCI_PRI > + > +void pci_pri_init(struct pci_dev *pdev) > +{ > + u32 max_requests; > + int pos; > + > + /* > + * As per PCIe r4.0, sec 9.3.7.11, only PF is permitted to > + * implement PRI and all associated VFs can only use it. > + * Since PF already initialized the PRI parameters there is > + * no need to proceed further. > + */ > + if (pdev->is_virtfn) > + return; > + > + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); > + if (!pos) > + return; > + > + pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ, &max_requests); > + > + /* > + * Since PRI is a shared resource between PF and VF, we must not > + * configure Outstanding Page Allocation Quota as a per device > + * resource in pci_enable_pri(). So use maximum value possible > + * as default value. > + */ > + pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, max_requests); > + > + pdev->pri_reqs_alloc = max_requests; > + pdev->pri_cap = pos; > +} > + > /** > * pci_enable_pri - Enable PRI capability > * @ pdev: PCI device structure > * > * Returns 0 on success, negative value on error > + * > + * TODO: Since PRI is a shared resource between PF/VF, don't update > + * Outstanding Page Allocation Quota in the same API as a per device > + * feature. > */ > int pci_enable_pri(struct pci_dev *pdev, u32 reqs) > { > u16 control, status; > u32 max_requests; > - int pos; > > if (WARN_ON(pdev->pri_enabled)) > return -EBUSY; > > - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); > - if (!pos) > + if (!pdev->pri_cap) > return -EINVAL; > > - pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status); > + pci_read_config_word(pdev, pdev->pri_cap + PCI_PRI_STATUS, &status); > if (!(status & PCI_PRI_STATUS_STOPPED)) > return -EBUSY; > > - pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ, &max_requests); > + pci_read_config_dword(pdev, pdev->pri_cap + PCI_PRI_MAX_REQ, > + &max_requests); > reqs = min(max_requests, reqs); > pdev->pri_reqs_alloc = reqs; > - pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, reqs); > + pci_write_config_dword(pdev, pdev->pri_cap + PCI_PRI_ALLOC_REQ, reqs); The comment above says "don't update Outstanding Page Allocation Quota" but it looks like that's what this is doing. > control = PCI_PRI_CTRL_ENABLE; > - pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); > + pci_write_config_word(pdev, pdev->pri_cap + PCI_PRI_CTRL, control); > > pdev->pri_enabled = 1; > > @@ -216,18 +254,16 @@ EXPORT_SYMBOL_GPL(pci_enable_pri); > void pci_disable_pri(struct pci_dev *pdev) > { > u16 control; > - int pos; > > if (WARN_ON(!pdev->pri_enabled)) > return; > > - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); > - if (!pos) > + if (!pdev->pri_cap) > return; > > - pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control); > + pci_read_config_word(pdev, pdev->pri_cap + PCI_PRI_CTRL, &control); > control &= ~PCI_PRI_CTRL_ENABLE; > - pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); > + pci_write_config_word(pdev, pdev->pri_cap + PCI_PRI_CTRL, control); > > pdev->pri_enabled = 0; > } > @@ -241,17 +277,15 @@ void pci_restore_pri_state(struct pci_dev *pdev) > { > u16 control = PCI_PRI_CTRL_ENABLE; > u32 reqs = pdev->pri_reqs_alloc; > - int pos; > > if (!pdev->pri_enabled) > return; > > - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); > - if (!pos) > + if (!pdev->pri_cap) > return; > > - pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, reqs); > - pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); > + pci_write_config_dword(pdev, pdev->pri_cap + PCI_PRI_ALLOC_REQ, reqs); > + pci_write_config_word(pdev, pdev->pri_cap + PCI_PRI_CTRL, control); > } > EXPORT_SYMBOL_GPL(pci_restore_pri_state); > > @@ -265,17 +299,15 @@ EXPORT_SYMBOL_GPL(pci_restore_pri_state); > int pci_reset_pri(struct pci_dev *pdev) > { > u16 control; > - int pos; > > if (WARN_ON(pdev->pri_enabled)) > return -EBUSY; > > - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); > - if (!pos) > + if (!pdev->pri_cap) > return -EINVAL; > > control = PCI_PRI_CTRL_RESET; > - pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); > + pci_write_config_word(pdev, pdev->pri_cap + PCI_PRI_CTRL, control); > > return 0; > } > @@ -410,13 +442,11 @@ EXPORT_SYMBOL_GPL(pci_pasid_features); > int pci_prg_resp_pasid_required(struct pci_dev *pdev) > { > u16 status; > - int pos; > > - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); > - if (!pos) > + if (!pdev->pri_cap) > return 0; > > - pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status); > + pci_read_config_word(pdev, pdev->pri_cap + PCI_PRI_STATUS, &status); > > if (status & PCI_PRI_STATUS_PASID) > return 1; > diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h > index 1a0bdaee2f32..33653d4ca94f 100644 > --- a/include/linux/pci-ats.h > +++ b/include/linux/pci-ats.h > @@ -6,6 +6,7 @@ > > #ifdef CONFIG_PCI_PRI > > +void pci_pri_init(struct pci_dev *pdev); I think this could be moved to drivers/pci/pci.h, since it doesn't need to be visible outside drivers/pci/. > int pci_enable_pri(struct pci_dev *pdev, u32 reqs); > void pci_disable_pri(struct pci_dev *pdev); > void pci_restore_pri_state(struct pci_dev *pdev); > @@ -13,6 +14,10 @@ int pci_reset_pri(struct pci_dev *pdev); > > #else /* CONFIG_PCI_PRI */ > > +static inline void pci_pri_init(struct pci_dev *pdev) > +{ > +} > + > static inline int pci_enable_pri(struct pci_dev *pdev, u32 reqs) > { > return -ENODEV; > diff --git a/include/linux/pci.h b/include/linux/pci.h > index 9e700d9f9f28..56b55db099fc 100644 > --- a/include/linux/pci.h > +++ b/include/linux/pci.h > @@ -455,6 +455,7 @@ struct pci_dev { > atomic_t ats_ref_cnt; /* Number of VFs with ATS enabled */ > #endif > #ifdef CONFIG_PCI_PRI > + u16 pri_cap; /* PRI Capability offset */ > u32 pri_reqs_alloc; /* Number of PRI requests allocated */ > #endif > #ifdef CONFIG_PCI_PASID > -- > 2.21.0 >
Hi, Thanks for the review. On 8/12/19 1:04 PM, Bjorn Helgaas wrote: > On Thu, Aug 01, 2019 at 05:05:59PM -0700, sathyanarayanan.kuppuswamy@linux.intel.com wrote: >> From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> >> >> Currently, PRI Capability checks are repeated across all PRI API's. >> Instead, cache the capability check result in pci_pri_init() and use it >> in other PRI API's. Also, since PRI is a shared resource between PF/VF, >> initialize default values for common PRI features in pci_pri_init(). > This patch does two things, and it would be better if they were split: > > 1) Cache the PRI capability offset > 2) Separate the PF and VF paths Ok. I will split it into two patches in next version. > >> Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> >> --- >> drivers/pci/ats.c | 80 ++++++++++++++++++++++++++++------------- >> include/linux/pci-ats.h | 5 +++ >> include/linux/pci.h | 1 + >> 3 files changed, 61 insertions(+), 25 deletions(-) >> >> diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c >> index cdd936d10f68..280be911f190 100644 >> --- a/drivers/pci/ats.c >> +++ b/drivers/pci/ats.c >> @@ -28,6 +28,8 @@ void pci_ats_init(struct pci_dev *dev) >> return; >> >> dev->ats_cap = pos; >> + >> + pci_pri_init(dev); >> } >> >> /** >> @@ -170,36 +172,72 @@ int pci_ats_page_aligned(struct pci_dev *pdev) >> EXPORT_SYMBOL_GPL(pci_ats_page_aligned); >> >> #ifdef CONFIG_PCI_PRI >> + >> +void pci_pri_init(struct pci_dev *pdev) >> +{ >> + u32 max_requests; >> + int pos; >> + >> + /* >> + * As per PCIe r4.0, sec 9.3.7.11, only PF is permitted to >> + * implement PRI and all associated VFs can only use it. >> + * Since PF already initialized the PRI parameters there is >> + * no need to proceed further. >> + */ >> + if (pdev->is_virtfn) >> + return; >> + >> + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); >> + if (!pos) >> + return; >> + >> + pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ, &max_requests); >> + >> + /* >> + * Since PRI is a shared resource between PF and VF, we must not >> + * configure Outstanding Page Allocation Quota as a per device >> + * resource in pci_enable_pri(). So use maximum value possible >> + * as default value. >> + */ >> + pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, max_requests); >> + >> + pdev->pri_reqs_alloc = max_requests; >> + pdev->pri_cap = pos; >> +} >> + >> /** >> * pci_enable_pri - Enable PRI capability >> * @ pdev: PCI device structure >> * >> * Returns 0 on success, negative value on error >> + * >> + * TODO: Since PRI is a shared resource between PF/VF, don't update >> + * Outstanding Page Allocation Quota in the same API as a per device >> + * feature. >> */ >> int pci_enable_pri(struct pci_dev *pdev, u32 reqs) >> { >> u16 control, status; >> u32 max_requests; >> - int pos; >> >> if (WARN_ON(pdev->pri_enabled)) >> return -EBUSY; >> >> - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); >> - if (!pos) >> + if (!pdev->pri_cap) >> return -EINVAL; >> >> - pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status); >> + pci_read_config_word(pdev, pdev->pri_cap + PCI_PRI_STATUS, &status); >> if (!(status & PCI_PRI_STATUS_STOPPED)) >> return -EBUSY; >> >> - pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ, &max_requests); >> + pci_read_config_dword(pdev, pdev->pri_cap + PCI_PRI_MAX_REQ, >> + &max_requests); >> reqs = min(max_requests, reqs); >> pdev->pri_reqs_alloc = reqs; >> - pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, reqs); >> + pci_write_config_dword(pdev, pdev->pri_cap + PCI_PRI_ALLOC_REQ, reqs); > The comment above says "don't update Outstanding Page Allocation > Quota" but it looks like that's what this is doing. I don't want to fix it in the current patch-set. It needs further scrutiny. That's why I have added the TODO comment for it. Currently, intel-iommu and amd-iommu drivers (only users of pci_enable_pri()) hard-codes 32 as a default value for Outstanding Page Allocation Quota. Only exception is, amd-iommu sets this value as 1 for devices with erratum AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE. There is no comment or spec reference that explains why 32 is chosen as default value. Also configuring 32 as per device max value will break for PF/VF devices since they share the PRI interface. So without clear history, I don't want to make any changes which might affect their functionality. IMO, the correct way is to configure the Outstanding Page Allocation Quota with maximum value in pci_pri_init(). So, even if IOMMU can't handle more than 32 page request per device, it can fail properly and it should not affect the functionality. I have added proper configuration for Outstanding Page Allocation Quota in pci_pri_init(), but it does not serve any purpose until we fix the part of the issue in pci_enable_pri(). If you want, I can remove it for now, and add it when fixing the issue in pci_enable_pri(). > >> control = PCI_PRI_CTRL_ENABLE; >> - pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); >> + pci_write_config_word(pdev, pdev->pri_cap + PCI_PRI_CTRL, control); >> >> pdev->pri_enabled = 1; >> >> @@ -216,18 +254,16 @@ EXPORT_SYMBOL_GPL(pci_enable_pri); >> void pci_disable_pri(struct pci_dev *pdev) >> { >> u16 control; >> - int pos; >> >> if (WARN_ON(!pdev->pri_enabled)) >> return; >> >> - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); >> - if (!pos) >> + if (!pdev->pri_cap) >> return; >> >> - pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control); >> + pci_read_config_word(pdev, pdev->pri_cap + PCI_PRI_CTRL, &control); >> control &= ~PCI_PRI_CTRL_ENABLE; >> - pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); >> + pci_write_config_word(pdev, pdev->pri_cap + PCI_PRI_CTRL, control); >> >> pdev->pri_enabled = 0; >> } >> @@ -241,17 +277,15 @@ void pci_restore_pri_state(struct pci_dev *pdev) >> { >> u16 control = PCI_PRI_CTRL_ENABLE; >> u32 reqs = pdev->pri_reqs_alloc; >> - int pos; >> >> if (!pdev->pri_enabled) >> return; >> >> - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); >> - if (!pos) >> + if (!pdev->pri_cap) >> return; >> >> - pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, reqs); >> - pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); >> + pci_write_config_dword(pdev, pdev->pri_cap + PCI_PRI_ALLOC_REQ, reqs); >> + pci_write_config_word(pdev, pdev->pri_cap + PCI_PRI_CTRL, control); >> } >> EXPORT_SYMBOL_GPL(pci_restore_pri_state); >> >> @@ -265,17 +299,15 @@ EXPORT_SYMBOL_GPL(pci_restore_pri_state); >> int pci_reset_pri(struct pci_dev *pdev) >> { >> u16 control; >> - int pos; >> >> if (WARN_ON(pdev->pri_enabled)) >> return -EBUSY; >> >> - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); >> - if (!pos) >> + if (!pdev->pri_cap) >> return -EINVAL; >> >> control = PCI_PRI_CTRL_RESET; >> - pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); >> + pci_write_config_word(pdev, pdev->pri_cap + PCI_PRI_CTRL, control); >> >> return 0; >> } >> @@ -410,13 +442,11 @@ EXPORT_SYMBOL_GPL(pci_pasid_features); >> int pci_prg_resp_pasid_required(struct pci_dev *pdev) >> { >> u16 status; >> - int pos; >> >> - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); >> - if (!pos) >> + if (!pdev->pri_cap) >> return 0; >> >> - pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status); >> + pci_read_config_word(pdev, pdev->pri_cap + PCI_PRI_STATUS, &status); >> >> if (status & PCI_PRI_STATUS_PASID) >> return 1; >> diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h >> index 1a0bdaee2f32..33653d4ca94f 100644 >> --- a/include/linux/pci-ats.h >> +++ b/include/linux/pci-ats.h >> @@ -6,6 +6,7 @@ >> >> #ifdef CONFIG_PCI_PRI >> >> +void pci_pri_init(struct pci_dev *pdev); > I think this could be moved to drivers/pci/pci.h, since it doesn't > need to be visible outside drivers/pci/. Makes sense. I will move it. > >> int pci_enable_pri(struct pci_dev *pdev, u32 reqs); >> void pci_disable_pri(struct pci_dev *pdev); >> void pci_restore_pri_state(struct pci_dev *pdev); >> @@ -13,6 +14,10 @@ int pci_reset_pri(struct pci_dev *pdev); >> >> #else /* CONFIG_PCI_PRI */ >> >> +static inline void pci_pri_init(struct pci_dev *pdev) >> +{ >> +} >> + >> static inline int pci_enable_pri(struct pci_dev *pdev, u32 reqs) >> { >> return -ENODEV; >> diff --git a/include/linux/pci.h b/include/linux/pci.h >> index 9e700d9f9f28..56b55db099fc 100644 >> --- a/include/linux/pci.h >> +++ b/include/linux/pci.h >> @@ -455,6 +455,7 @@ struct pci_dev { >> atomic_t ats_ref_cnt; /* Number of VFs with ATS enabled */ >> #endif >> #ifdef CONFIG_PCI_PRI >> + u16 pri_cap; /* PRI Capability offset */ >> u32 pri_reqs_alloc; /* Number of PRI requests allocated */ >> #endif >> #ifdef CONFIG_PCI_PASID >> -- >> 2.21.0 >>
On Mon, Aug 12, 2019 at 02:35:32PM -0700, sathyanarayanan kuppuswamy wrote: > On 8/12/19 1:04 PM, Bjorn Helgaas wrote: > > On Thu, Aug 01, 2019 at 05:05:59PM -0700, sathyanarayanan.kuppuswamy@linux.intel.com wrote: > > > From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > > > > > > Currently, PRI Capability checks are repeated across all PRI API's. > > > Instead, cache the capability check result in pci_pri_init() and use it > > > in other PRI API's. Also, since PRI is a shared resource between PF/VF, > > > initialize default values for common PRI features in pci_pri_init(). > > This patch does two things, and it would be better if they were split: > > > > 1) Cache the PRI capability offset > > 2) Separate the PF and VF paths > Ok. I will split it into two patches in next version. > > > > > Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > > > --- > > > drivers/pci/ats.c | 80 ++++++++++++++++++++++++++++------------- > > > include/linux/pci-ats.h | 5 +++ > > > include/linux/pci.h | 1 + > > > 3 files changed, 61 insertions(+), 25 deletions(-) > > > > > > diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c > > > index cdd936d10f68..280be911f190 100644 > > > --- a/drivers/pci/ats.c > > > +++ b/drivers/pci/ats.c > > > @@ -28,6 +28,8 @@ void pci_ats_init(struct pci_dev *dev) > > > return; > > > dev->ats_cap = pos; > > > + > > > + pci_pri_init(dev); > > > } > > > /** > > > @@ -170,36 +172,72 @@ int pci_ats_page_aligned(struct pci_dev *pdev) > > > EXPORT_SYMBOL_GPL(pci_ats_page_aligned); > > > #ifdef CONFIG_PCI_PRI > > > + > > > +void pci_pri_init(struct pci_dev *pdev) > > > +{ > > > + u32 max_requests; > > > + int pos; > > > + > > > + /* > > > + * As per PCIe r4.0, sec 9.3.7.11, only PF is permitted to > > > + * implement PRI and all associated VFs can only use it. > > > + * Since PF already initialized the PRI parameters there is > > > + * no need to proceed further. > > > + */ > > > + if (pdev->is_virtfn) > > > + return; > > > + > > > + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); > > > + if (!pos) > > > + return; > > > + > > > + pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ, &max_requests); > > > + > > > + /* > > > + * Since PRI is a shared resource between PF and VF, we must not > > > + * configure Outstanding Page Allocation Quota as a per device > > > + * resource in pci_enable_pri(). So use maximum value possible > > > + * as default value. > > > + */ > > > + pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, max_requests); > > > + > > > + pdev->pri_reqs_alloc = max_requests; > > > + pdev->pri_cap = pos; > > > +} > > > + > > > /** > > > * pci_enable_pri - Enable PRI capability > > > * @ pdev: PCI device structure > > > * > > > * Returns 0 on success, negative value on error > > > + * > > > + * TODO: Since PRI is a shared resource between PF/VF, don't update > > > + * Outstanding Page Allocation Quota in the same API as a per device > > > + * feature. > > > */ > > > int pci_enable_pri(struct pci_dev *pdev, u32 reqs) > > > { > > > u16 control, status; > > > u32 max_requests; > > > - int pos; > > > if (WARN_ON(pdev->pri_enabled)) > > > return -EBUSY; > > > - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); > > > - if (!pos) > > > + if (!pdev->pri_cap) > > > return -EINVAL; > > > - pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status); > > > + pci_read_config_word(pdev, pdev->pri_cap + PCI_PRI_STATUS, &status); > > > if (!(status & PCI_PRI_STATUS_STOPPED)) > > > return -EBUSY; > > > - pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ, &max_requests); > > > + pci_read_config_dword(pdev, pdev->pri_cap + PCI_PRI_MAX_REQ, > > > + &max_requests); > > > reqs = min(max_requests, reqs); > > > pdev->pri_reqs_alloc = reqs; > > > - pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, reqs); > > > + pci_write_config_dword(pdev, pdev->pri_cap + PCI_PRI_ALLOC_REQ, reqs); > > The comment above says "don't update Outstanding Page Allocation > > Quota" but it looks like that's what this is doing. > > I don't want to fix it in the current patch-set. It needs further scrutiny. > That's why I have added the TODO comment for it. You don't have to fix everything in this patch set, but the comment should match what the code does. If you desire, it can go on to explain why the current behavior is incorrect. But the current comment is confusing. I think the series would read better if the patch that changed from trying to use the PRI capability on the VF (which always fails) to using the one on the PF were *first*, i.e., if this change: - pci_write_config_dword(pdev, ... + PCI_PRI_ALLOC_REQ, reqs); + pci_write_config_dword(pf, ... + PCI_PRI_ALLOC_REQ, reqs); were before adding the pri_cap cache: - pci_write_config_dword(pf, pos + PCI_PRI_ALLOC_REQ, reqs); + pci_write_config_dword(pf, pf->pri_cap + PCI_PRI_ALLOC_REQ, reqs); In the current order, you add the cache to several lines of code that are never executed. > Currently, intel-iommu and amd-iommu drivers (only users of > pci_enable_pri()) hard-codes 32 as a default value for Outstanding Page > Allocation Quota. Only exception is, amd-iommu sets this value as 1 for > devices with erratum AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE. There is no comment > or spec reference that explains why 32 is chosen as default value. Also > configuring 32 as per device max value will break for PF/VF devices since > they share the PRI interface. So without clear history, I don't want to make > any changes which might affect their functionality. > > IMO, the correct way is to configure the Outstanding Page Allocation Quota > with maximum value in pci_pri_init(). So, even if IOMMU can't handle more > than 32 page request per device, it can fail properly and it should not > affect the functionality. > > I have added proper configuration for Outstanding Page Allocation Quota in > pci_pri_init(), but it does not serve any purpose until we fix the part of > the issue in pci_enable_pri(). If you want, I can remove it for now, and add > it when fixing the issue in pci_enable_pri(). If it doesn't serve any purpose, please remove it for now. I think it's better to keep all the pieces related to that fix together so we can test them together and backport them together. > > > control = PCI_PRI_CTRL_ENABLE; > > > - pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); > > > + pci_write_config_word(pdev, pdev->pri_cap + PCI_PRI_CTRL, control); > > > pdev->pri_enabled = 1; > > > @@ -216,18 +254,16 @@ EXPORT_SYMBOL_GPL(pci_enable_pri); > > > void pci_disable_pri(struct pci_dev *pdev) > > > { > > > u16 control; > > > - int pos; > > > if (WARN_ON(!pdev->pri_enabled)) > > > return; > > > - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); > > > - if (!pos) > > > + if (!pdev->pri_cap) > > > return; > > > - pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control); > > > + pci_read_config_word(pdev, pdev->pri_cap + PCI_PRI_CTRL, &control); > > > control &= ~PCI_PRI_CTRL_ENABLE; > > > - pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); > > > + pci_write_config_word(pdev, pdev->pri_cap + PCI_PRI_CTRL, control); > > > pdev->pri_enabled = 0; > > > } > > > @@ -241,17 +277,15 @@ void pci_restore_pri_state(struct pci_dev *pdev) > > > { > > > u16 control = PCI_PRI_CTRL_ENABLE; > > > u32 reqs = pdev->pri_reqs_alloc; > > > - int pos; > > > if (!pdev->pri_enabled) > > > return; > > > - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); > > > - if (!pos) > > > + if (!pdev->pri_cap) > > > return; > > > - pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, reqs); > > > - pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); > > > + pci_write_config_dword(pdev, pdev->pri_cap + PCI_PRI_ALLOC_REQ, reqs); > > > + pci_write_config_word(pdev, pdev->pri_cap + PCI_PRI_CTRL, control); > > > } > > > EXPORT_SYMBOL_GPL(pci_restore_pri_state); > > > @@ -265,17 +299,15 @@ EXPORT_SYMBOL_GPL(pci_restore_pri_state); > > > int pci_reset_pri(struct pci_dev *pdev) > > > { > > > u16 control; > > > - int pos; > > > if (WARN_ON(pdev->pri_enabled)) > > > return -EBUSY; > > > - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); > > > - if (!pos) > > > + if (!pdev->pri_cap) > > > return -EINVAL; > > > control = PCI_PRI_CTRL_RESET; > > > - pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); > > > + pci_write_config_word(pdev, pdev->pri_cap + PCI_PRI_CTRL, control); > > > return 0; > > > } > > > @@ -410,13 +442,11 @@ EXPORT_SYMBOL_GPL(pci_pasid_features); > > > int pci_prg_resp_pasid_required(struct pci_dev *pdev) > > > { > > > u16 status; > > > - int pos; > > > - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); > > > - if (!pos) > > > + if (!pdev->pri_cap) > > > return 0; > > > - pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status); > > > + pci_read_config_word(pdev, pdev->pri_cap + PCI_PRI_STATUS, &status); > > > if (status & PCI_PRI_STATUS_PASID) > > > return 1; > > > diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h > > > index 1a0bdaee2f32..33653d4ca94f 100644 > > > --- a/include/linux/pci-ats.h > > > +++ b/include/linux/pci-ats.h > > > @@ -6,6 +6,7 @@ > > > #ifdef CONFIG_PCI_PRI > > > +void pci_pri_init(struct pci_dev *pdev); > > I think this could be moved to drivers/pci/pci.h, since it doesn't > > need to be visible outside drivers/pci/. > Makes sense. I will move it. > > > > > int pci_enable_pri(struct pci_dev *pdev, u32 reqs); > > > void pci_disable_pri(struct pci_dev *pdev); > > > void pci_restore_pri_state(struct pci_dev *pdev); > > > @@ -13,6 +14,10 @@ int pci_reset_pri(struct pci_dev *pdev); > > > #else /* CONFIG_PCI_PRI */ > > > +static inline void pci_pri_init(struct pci_dev *pdev) > > > +{ > > > +} > > > + > > > static inline int pci_enable_pri(struct pci_dev *pdev, u32 reqs) > > > { > > > return -ENODEV; > > > diff --git a/include/linux/pci.h b/include/linux/pci.h > > > index 9e700d9f9f28..56b55db099fc 100644 > > > --- a/include/linux/pci.h > > > +++ b/include/linux/pci.h > > > @@ -455,6 +455,7 @@ struct pci_dev { > > > atomic_t ats_ref_cnt; /* Number of VFs with ATS enabled */ > > > #endif > > > #ifdef CONFIG_PCI_PRI > > > + u16 pri_cap; /* PRI Capability offset */ > > > u32 pri_reqs_alloc; /* Number of PRI requests allocated */ > > > #endif > > > #ifdef CONFIG_PCI_PASID > > > -- > > > 2.21.0 > > > > -- > Sathyanarayanan Kuppuswamy > Linux kernel developer >
On Thu, Aug 01, 2019 at 05:05:59PM -0700, sathyanarayanan.kuppuswamy@linux.intel.com wrote: > From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > > Currently, PRI Capability checks are repeated across all PRI API's. > Instead, cache the capability check result in pci_pri_init() and use it > in other PRI API's. Also, since PRI is a shared resource between PF/VF, > initialize default values for common PRI features in pci_pri_init(). > > Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > --- > drivers/pci/ats.c | 80 ++++++++++++++++++++++++++++------------- > include/linux/pci-ats.h | 5 +++ > include/linux/pci.h | 1 + > 3 files changed, 61 insertions(+), 25 deletions(-) > > diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c > index cdd936d10f68..280be911f190 100644 > --- a/drivers/pci/ats.c > +++ b/drivers/pci/ats.c > @@ -28,6 +28,8 @@ void pci_ats_init(struct pci_dev *dev) > return; > > dev->ats_cap = pos; > + > + pci_pri_init(dev); > } > > /** > @@ -170,36 +172,72 @@ int pci_ats_page_aligned(struct pci_dev *pdev) > EXPORT_SYMBOL_GPL(pci_ats_page_aligned); > > #ifdef CONFIG_PCI_PRI > + > +void pci_pri_init(struct pci_dev *pdev) > +{ > ... > +} > diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h > index 1a0bdaee2f32..33653d4ca94f 100644 > --- a/include/linux/pci-ats.h > +++ b/include/linux/pci-ats.h > @@ -6,6 +6,7 @@ > > #ifdef CONFIG_PCI_PRI > > +void pci_pri_init(struct pci_dev *pdev); pci_pri_init() is implemented and called in drivers/pci/ats.c. Unless there's a need to call this from outside ats.c, it should be static and should not be declared here. If you can make it static, please also reorder the code so you don't need a forward declaration in ats.c. > int pci_enable_pri(struct pci_dev *pdev, u32 reqs); > void pci_disable_pri(struct pci_dev *pdev); > void pci_restore_pri_state(struct pci_dev *pdev); > @@ -13,6 +14,10 @@ int pci_reset_pri(struct pci_dev *pdev); > > #else /* CONFIG_PCI_PRI */ > > +static inline void pci_pri_init(struct pci_dev *pdev) > +{ > +} > + > static inline int pci_enable_pri(struct pci_dev *pdev, u32 reqs) > { > return -ENODEV;
On Wed, Aug 14, 2019 at 11:46:57PM -0500, Bjorn Helgaas wrote: > On Thu, Aug 01, 2019 at 05:05:59PM -0700, sathyanarayanan.kuppuswamy@linux.intel.com wrote: > > From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > > > > Currently, PRI Capability checks are repeated across all PRI API's. > > Instead, cache the capability check result in pci_pri_init() and use it > > in other PRI API's. Also, since PRI is a shared resource between PF/VF, > > initialize default values for common PRI features in pci_pri_init(). > > > > Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > > --- > > drivers/pci/ats.c | 80 ++++++++++++++++++++++++++++------------- > > include/linux/pci-ats.h | 5 +++ > > include/linux/pci.h | 1 + > > 3 files changed, 61 insertions(+), 25 deletions(-) > > > > > diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c > > index cdd936d10f68..280be911f190 100644 > > --- a/drivers/pci/ats.c > > +++ b/drivers/pci/ats.c > > > @@ -28,6 +28,8 @@ void pci_ats_init(struct pci_dev *dev) > > return; > > > > dev->ats_cap = pos; > > + > > + pci_pri_init(dev); > > } > > > > /** > > @@ -170,36 +172,72 @@ int pci_ats_page_aligned(struct pci_dev *pdev) > > EXPORT_SYMBOL_GPL(pci_ats_page_aligned); > > > > #ifdef CONFIG_PCI_PRI > > + > > +void pci_pri_init(struct pci_dev *pdev) > > +{ > > ... > > +} > > > diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h > > index 1a0bdaee2f32..33653d4ca94f 100644 > > --- a/include/linux/pci-ats.h > > +++ b/include/linux/pci-ats.h > > @@ -6,6 +6,7 @@ > > > > #ifdef CONFIG_PCI_PRI > > > > +void pci_pri_init(struct pci_dev *pdev); > > pci_pri_init() is implemented and called in drivers/pci/ats.c. Unless > there's a need to call this from outside ats.c, it should be static > and should not be declared here. > > If you can make it static, please also reorder the code so you don't > need a forward declaration in ats.c. Initially I did implement it as static function in drivers/pci/ats.c and protected the calling of pci_pri_init() with #ifdef CONFIG_PCI_PRI. But Keith did not like the implementation using #ifdefs and asked me to define empty functions. That's the reason for moving it to header file. In your previous review to this patch, since this is not used outside ats.c you asked me to move the declaraion to drivers/pci/pci.h. So I was planing to move it to drivers/pci/pci.h in next version. Let me know if you are in agreement. > > > int pci_enable_pri(struct pci_dev *pdev, u32 reqs); > > void pci_disable_pri(struct pci_dev *pdev); > > void pci_restore_pri_state(struct pci_dev *pdev); > > @@ -13,6 +14,10 @@ int pci_reset_pri(struct pci_dev *pdev); > > > > #else /* CONFIG_PCI_PRI */ > > > > +static inline void pci_pri_init(struct pci_dev *pdev) > > +{ > > +} > > + > > static inline int pci_enable_pri(struct pci_dev *pdev, u32 reqs) > > { > > return -ENODEV;
On Thu, Aug 15, 2019 at 10:30:03AM -0700, Kuppuswamy Sathyanarayanan wrote: > On Wed, Aug 14, 2019 at 11:46:57PM -0500, Bjorn Helgaas wrote: > > On Thu, Aug 01, 2019 at 05:05:59PM -0700, sathyanarayanan.kuppuswamy@linux.intel.com wrote: > > > From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > > > > > > Currently, PRI Capability checks are repeated across all PRI API's. > > > Instead, cache the capability check result in pci_pri_init() and use it > > > in other PRI API's. Also, since PRI is a shared resource between PF/VF, > > > initialize default values for common PRI features in pci_pri_init(). > > > > > > Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > > > --- > > > drivers/pci/ats.c | 80 ++++++++++++++++++++++++++++------------- > > > include/linux/pci-ats.h | 5 +++ > > > include/linux/pci.h | 1 + > > > 3 files changed, 61 insertions(+), 25 deletions(-) > > > > > > > > diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c > > > index cdd936d10f68..280be911f190 100644 > > > --- a/drivers/pci/ats.c > > > +++ b/drivers/pci/ats.c > > > > > @@ -28,6 +28,8 @@ void pci_ats_init(struct pci_dev *dev) > > > return; > > > > > > dev->ats_cap = pos; > > > + > > > + pci_pri_init(dev); > > > } > > > > > > /** > > > @@ -170,36 +172,72 @@ int pci_ats_page_aligned(struct pci_dev *pdev) > > > EXPORT_SYMBOL_GPL(pci_ats_page_aligned); > > > > > > #ifdef CONFIG_PCI_PRI > > > + > > > +void pci_pri_init(struct pci_dev *pdev) > > > +{ > > > ... > > > +} > > > > > diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h > > > index 1a0bdaee2f32..33653d4ca94f 100644 > > > --- a/include/linux/pci-ats.h > > > +++ b/include/linux/pci-ats.h > > > @@ -6,6 +6,7 @@ > > > > > > #ifdef CONFIG_PCI_PRI > > > > > > +void pci_pri_init(struct pci_dev *pdev); > > > > pci_pri_init() is implemented and called in drivers/pci/ats.c. Unless > > there's a need to call this from outside ats.c, it should be static > > and should not be declared here. > > > > If you can make it static, please also reorder the code so you don't > > need a forward declaration in ats.c. > Initially I did implement it as static function in drivers/pci/ats.c > and protected the calling of pci_pri_init() with #ifdef CONFIG_PCI_PRI. > But Keith did not like the implementation using #ifdefs and asked me to > define empty functions. That's the reason for moving it to header file. Defining empty functions doesn't mean it has to be in a header file. It's only needed inside ats.c, so the whole thing should be static there. You can easily #ifdef the implementation, e.g., do the following in ats.c: static void pci_pri_init(struct pci_dev *pdev) { #ifdef CONFIG_PCI_PRI ... #endif }
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index cdd936d10f68..280be911f190 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -28,6 +28,8 @@ void pci_ats_init(struct pci_dev *dev) return; dev->ats_cap = pos; + + pci_pri_init(dev); } /** @@ -170,36 +172,72 @@ int pci_ats_page_aligned(struct pci_dev *pdev) EXPORT_SYMBOL_GPL(pci_ats_page_aligned); #ifdef CONFIG_PCI_PRI + +void pci_pri_init(struct pci_dev *pdev) +{ + u32 max_requests; + int pos; + + /* + * As per PCIe r4.0, sec 9.3.7.11, only PF is permitted to + * implement PRI and all associated VFs can only use it. + * Since PF already initialized the PRI parameters there is + * no need to proceed further. + */ + if (pdev->is_virtfn) + return; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); + if (!pos) + return; + + pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ, &max_requests); + + /* + * Since PRI is a shared resource between PF and VF, we must not + * configure Outstanding Page Allocation Quota as a per device + * resource in pci_enable_pri(). So use maximum value possible + * as default value. + */ + pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, max_requests); + + pdev->pri_reqs_alloc = max_requests; + pdev->pri_cap = pos; +} + /** * pci_enable_pri - Enable PRI capability * @ pdev: PCI device structure * * Returns 0 on success, negative value on error + * + * TODO: Since PRI is a shared resource between PF/VF, don't update + * Outstanding Page Allocation Quota in the same API as a per device + * feature. */ int pci_enable_pri(struct pci_dev *pdev, u32 reqs) { u16 control, status; u32 max_requests; - int pos; if (WARN_ON(pdev->pri_enabled)) return -EBUSY; - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); - if (!pos) + if (!pdev->pri_cap) return -EINVAL; - pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status); + pci_read_config_word(pdev, pdev->pri_cap + PCI_PRI_STATUS, &status); if (!(status & PCI_PRI_STATUS_STOPPED)) return -EBUSY; - pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ, &max_requests); + pci_read_config_dword(pdev, pdev->pri_cap + PCI_PRI_MAX_REQ, + &max_requests); reqs = min(max_requests, reqs); pdev->pri_reqs_alloc = reqs; - pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, reqs); + pci_write_config_dword(pdev, pdev->pri_cap + PCI_PRI_ALLOC_REQ, reqs); control = PCI_PRI_CTRL_ENABLE; - pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); + pci_write_config_word(pdev, pdev->pri_cap + PCI_PRI_CTRL, control); pdev->pri_enabled = 1; @@ -216,18 +254,16 @@ EXPORT_SYMBOL_GPL(pci_enable_pri); void pci_disable_pri(struct pci_dev *pdev) { u16 control; - int pos; if (WARN_ON(!pdev->pri_enabled)) return; - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); - if (!pos) + if (!pdev->pri_cap) return; - pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control); + pci_read_config_word(pdev, pdev->pri_cap + PCI_PRI_CTRL, &control); control &= ~PCI_PRI_CTRL_ENABLE; - pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); + pci_write_config_word(pdev, pdev->pri_cap + PCI_PRI_CTRL, control); pdev->pri_enabled = 0; } @@ -241,17 +277,15 @@ void pci_restore_pri_state(struct pci_dev *pdev) { u16 control = PCI_PRI_CTRL_ENABLE; u32 reqs = pdev->pri_reqs_alloc; - int pos; if (!pdev->pri_enabled) return; - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); - if (!pos) + if (!pdev->pri_cap) return; - pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, reqs); - pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); + pci_write_config_dword(pdev, pdev->pri_cap + PCI_PRI_ALLOC_REQ, reqs); + pci_write_config_word(pdev, pdev->pri_cap + PCI_PRI_CTRL, control); } EXPORT_SYMBOL_GPL(pci_restore_pri_state); @@ -265,17 +299,15 @@ EXPORT_SYMBOL_GPL(pci_restore_pri_state); int pci_reset_pri(struct pci_dev *pdev) { u16 control; - int pos; if (WARN_ON(pdev->pri_enabled)) return -EBUSY; - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); - if (!pos) + if (!pdev->pri_cap) return -EINVAL; control = PCI_PRI_CTRL_RESET; - pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); + pci_write_config_word(pdev, pdev->pri_cap + PCI_PRI_CTRL, control); return 0; } @@ -410,13 +442,11 @@ EXPORT_SYMBOL_GPL(pci_pasid_features); int pci_prg_resp_pasid_required(struct pci_dev *pdev) { u16 status; - int pos; - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); - if (!pos) + if (!pdev->pri_cap) return 0; - pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status); + pci_read_config_word(pdev, pdev->pri_cap + PCI_PRI_STATUS, &status); if (status & PCI_PRI_STATUS_PASID) return 1; diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index 1a0bdaee2f32..33653d4ca94f 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -6,6 +6,7 @@ #ifdef CONFIG_PCI_PRI +void pci_pri_init(struct pci_dev *pdev); int pci_enable_pri(struct pci_dev *pdev, u32 reqs); void pci_disable_pri(struct pci_dev *pdev); void pci_restore_pri_state(struct pci_dev *pdev); @@ -13,6 +14,10 @@ int pci_reset_pri(struct pci_dev *pdev); #else /* CONFIG_PCI_PRI */ +static inline void pci_pri_init(struct pci_dev *pdev) +{ +} + static inline int pci_enable_pri(struct pci_dev *pdev, u32 reqs) { return -ENODEV; diff --git a/include/linux/pci.h b/include/linux/pci.h index 9e700d9f9f28..56b55db099fc 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -455,6 +455,7 @@ struct pci_dev { atomic_t ats_ref_cnt; /* Number of VFs with ATS enabled */ #endif #ifdef CONFIG_PCI_PRI + u16 pri_cap; /* PRI Capability offset */ u32 pri_reqs_alloc; /* Number of PRI requests allocated */ #endif #ifdef CONFIG_PCI_PASID