diff mbox

[1/2] device: Stop requiring that struct device is embedded in struct pci_dev

Message ID 20170307003549.3872-2-bart.vanassche@sandisk.com (mailing list archive)
State Superseded
Headers show

Commit Message

Bart Van Assche March 7, 2017, 12:35 a.m. UTC
The dma mapping operations of several architectures and also of
several I/O MMU implementations need to translate a struct
device pointer into a struct pci_dev pointer. This translation
is performed by to_pci_dev(). That macro assumes that struct
device is embedded in struct pci_dev. However, that is not the
case for the device structure in struct ib_device. Since that
device structure is passed to DMA mapping operations since kernel
v4.11-rc1, introduce a pointer in struct device to make the
translation from struct device into struct pci_dev more flexible.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Russell King <linux@armlinux.org.uk>
---
 drivers/pci/probe.c    | 1 +
 include/linux/device.h | 5 +++++
 include/linux/pci.h    | 5 ++++-
 3 files changed, 10 insertions(+), 1 deletion(-)

Comments

Parav Pandit March 7, 2017, 2:41 a.m. UTC | #1
Hi Bart,

> -----Original Message-----
> From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-
> owner@vger.kernel.org] On Behalf Of Bart Van Assche
> Sent: Monday, March 6, 2017 6:36 PM
> To: Doug Ledford <dledford@redhat.com>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>; Sebastian Ott
> <sebott@linux.vnet.ibm.com>; Parav Pandit <parav@mellanox.com>; linux-
> rdma@vger.kernel.org; linux-kernel@vger.kernel.org; Bart Van Assche
> <bart.vanassche@sandisk.com>; Bjorn Helgaas <bhelgaas@google.com>;
> Benjamin Herrenschmidt <benh@kernel.crashing.org>; David Woodhouse
> <dwmw2@infradead.org>; H . Peter Anvin <hpa@zytor.com>; Ingo Molnar
> <mingo@redhat.com>; Russell King <linux@armlinux.org.uk>
> Subject: [PATCH 1/2] device: Stop requiring that struct device is embedded in
> struct pci_dev
> 
> The dma mapping operations of several architectures and also of several I/O
> MMU implementations need to translate a struct device pointer into a struct
> pci_dev pointer. This translation is performed by to_pci_dev(). That macro
> assumes that struct device is embedded in struct pci_dev. However, that is
> not the case for the device structure in struct ib_device. Since that device
> structure is passed to DMA mapping operations since kernel v4.11-rc1,
> introduce a pointer in struct device to make the translation from struct
> device into struct pci_dev more flexible.
> 
> Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Cc: Bjorn Helgaas <bhelgaas@google.com>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: David Woodhouse <dwmw2@infradead.org>
> Cc: H. Peter Anvin <hpa@zytor.com>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Russell King <linux@armlinux.org.uk>
> ---
>  drivers/pci/probe.c    | 1 +
>  include/linux/device.h | 5 +++++
>  include/linux/pci.h    | 5 ++++-
>  3 files changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/device.h b/include/linux/device.h index
> 30c4570e928d..c18afd376d2a 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -42,6 +42,7 @@ struct fwnode_handle;
>  struct iommu_ops;
>  struct iommu_group;
>  struct iommu_fwspec;
> +struct pci_dev;
> 
>  struct bus_attribute {
>  	struct attribute	attr;
> @@ -860,6 +861,9 @@ struct dev_links_info {
>   * 		segment limitations.
>   * @dma_pools:	Dma pools (if dma'ble device).
>   * @dma_mem:	Internal for coherent mem override.
> + * @pci_dev:	PCI device associated with this device. Used by DMA
> mapping
> + *		operations on architectures that need access to PCI device
> + *		members that are not in struct device.
>   * @cma_area:	Contiguous memory area for dma allocations
>   * @archdata:	For arch-specific additions.
>   * @of_node:	Associated device tree node.
> @@ -940,6 +944,7 @@ struct device {
> 
>  	struct dma_coherent_mem	*dma_mem; /* internal for coherent
> mem
>  					     override */
> +	struct pci_dev		*pci_dev; /* for DMA mapping operations */

Compilation would break when CONFIG_PCI is not defined for some embedded platforms.
More than that, including specific pci_dev structure pointer in generic structure such as device just doesn't sound right.
I tested equivalent patch that you sent, but I don't think this is right direction to fix this bug.

>  #ifdef CONFIG_DMA_CMA
>  	struct cma *cma_area;		/* contiguous memory area for dma
>  					   allocations */
> diff --git a/include/linux/pci.h b/include/linux/pci.h index
> eb3da1a04e6c..eca790eaae20 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -409,7 +409,10 @@ static inline struct pci_dev *pci_physfn(struct
> pci_dev *dev)
> 
>  struct pci_dev *pci_alloc_dev(struct pci_bus *bus);
> 
> -#define	to_pci_dev(n) container_of(n, struct pci_dev, dev)
> +static inline struct pci_dev *to_pci_dev(const struct device *dev) {
> +	return dev->pci_dev;
> +}
>  #define for_each_pci_dev(d) while ((d = pci_get_device(PCI_ANY_ID,
> PCI_ANY_ID, d)) != NULL)
> 
>  static inline int pci_channel_offline(struct pci_dev *pdev)
> --
> 2.12.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the
> body of a message to majordomo@vger.kernel.org More majordomo info at
> http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bart Van Assche March 7, 2017, 2:44 a.m. UTC | #2
On Tue, 2017-03-07 at 02:41 +0000, Parav Pandit wrote:
> Compilation would break when CONFIG_PCI is not defined for some embedded platforms.

> More than that, including specific pci_dev structure pointer in generic structure such as device just doesn't sound right.

> I tested equivalent patch that you sent, but I don't think this is right direction to fix this bug.


You are welcome to voice your opinion. But unless anyone proposes a better
alternative I propose to proceed with this approach.

Bart.
Parav Pandit March 7, 2017, 3:21 a.m. UTC | #3
Hi Bart,

> -----Original Message-----
> From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-
> owner@vger.kernel.org] On Behalf Of Bart Van Assche
> Sent: Monday, March 6, 2017 6:36 PM
> To: Doug Ledford <dledford@redhat.com>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>; Sebastian Ott
> <sebott@linux.vnet.ibm.com>; Parav Pandit <parav@mellanox.com>; linux-
> rdma@vger.kernel.org; linux-kernel@vger.kernel.org; Bart Van Assche
> <bart.vanassche@sandisk.com>; Bjorn Helgaas <bhelgaas@google.com>;
> Benjamin Herrenschmidt <benh@kernel.crashing.org>; David Woodhouse
> <dwmw2@infradead.org>; H . Peter Anvin <hpa@zytor.com>; Ingo Molnar
> <mingo@redhat.com>; Russell King <linux@armlinux.org.uk>
> Subject: [PATCH 1/2] device: Stop requiring that struct device is embedded in
> struct pci_dev
> 
> The dma mapping operations of several architectures and also of several I/O
> MMU implementations need to translate a struct device pointer into a struct
> pci_dev pointer. This translation is performed by to_pci_dev(). That macro
> assumes that struct device is embedded in struct pci_dev. However, that is
> not the case for the device structure in struct ib_device. Since that device

Why can't ib subsystem pass device structure that is embedded in pci_dev when it makes calls to dma_map APIs?
The whole point of clean up was to avoid an if() condition in hot datapath.
If we invoke dma_map_ and friend functions with right device, shouldn't it work?
That avoids the if() condition as well and avoids changing core of Linux like done in this bug fix.
I think ib_device should store the right struct device pointer that needs to go to dma_apis, rather than including pci_dev structure pointer in device core 
layer.

Pseudo example code:
struct ib_device {
	struct device *dma_device;
};

ib_dma_unmap_single() which had if(), that got removed with dma_unmap_single() with cleanup patch.

Instead of,
static inline void ib_dma_unmap_single(struct ib_device *dev,
                                       u64 addr, size_t size,
                                       enum dma_data_direction direction)
{
	dma_unmap_single(&dev->dev, addr, size, direction);
}

Why can't we do this?

static inline void ib_dma_unmap_single(struct ib_device *dev,
                                       u64 addr, size_t size,
                                       enum dma_data_direction direction)
{
	dma_unmap_single(dev->dma_device, addr, size, direction);
}

This avoids increasing all device size by 8 bytes in system.
It also clean approach where core device structure doesn't have to bother for pci_dev.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Greg Kroah-Hartman March 7, 2017, 4:50 a.m. UTC | #4
On Tue, Mar 07, 2017 at 02:44:28AM +0000, Bart Van Assche wrote:
> On Tue, 2017-03-07 at 02:41 +0000, Parav Pandit wrote:
> > Compilation would break when CONFIG_PCI is not defined for some embedded platforms.
> > More than that, including specific pci_dev structure pointer in generic structure such as device just doesn't sound right.
> > I tested equivalent patch that you sent, but I don't think this is right direction to fix this bug.
> 
> You are welcome to voice your opinion. But unless anyone proposes a better
> alternative I propose to proceed with this approach.

That's not how development happens.  You don't just do "here's a
something I came up with, if you don't like it, tough!"  If people
object, you need to resolve those objections, not ignore them.

Especially when you are breaking the build!!!

come on now, you know better than this.

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Greg Kroah-Hartman March 7, 2017, 4:52 a.m. UTC | #5
On Mon, Mar 06, 2017 at 04:35:48PM -0800, Bart Van Assche wrote:
> The dma mapping operations of several architectures and also of
> several I/O MMU implementations need to translate a struct
> device pointer into a struct pci_dev pointer. This translation
> is performed by to_pci_dev(). That macro assumes that struct
> device is embedded in struct pci_dev. However, that is not the
> case for the device structure in struct ib_device.

Then don't blindly cast it backwards!  Fix that up, an ib device should
have access to the dma structures that the PCI device it depends on has.
If not, you need to set that up properly in the IB core, don't mess with
the driver core for this at all.

Somehow all other subsystems work just fine, don't instantly think that
the driver core needs to bend to the will of the IB code, because you
are somehow "special".  Hint, you aren't :)

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Parav Pandit March 7, 2017, 5:08 a.m. UTC | #6
Hi Greg,

> -----Original Message-----
> From: Greg Kroah-Hartman [mailto:gregkh@linuxfoundation.org]
> Sent: Monday, March 6, 2017 10:53 PM
> To: Bart Van Assche <bart.vanassche@sandisk.com>
> Cc: Doug Ledford <dledford@redhat.com>; Sebastian Ott
> <sebott@linux.vnet.ibm.com>; Parav Pandit <parav@mellanox.com>; linux-
> rdma@vger.kernel.org; linux-kernel@vger.kernel.org; Bjorn Helgaas
> <bhelgaas@google.com>; Benjamin Herrenschmidt
> <benh@kernel.crashing.org>; David Woodhouse <dwmw2@infradead.org>;
> H . Peter Anvin <hpa@zytor.com>; Ingo Molnar <mingo@redhat.com>;
> Russell King <linux@armlinux.org.uk>
> Subject: Re: [PATCH 1/2] device: Stop requiring that struct device is
> embedded in struct pci_dev
> 
> On Mon, Mar 06, 2017 at 04:35:48PM -0800, Bart Van Assche wrote:
> > The dma mapping operations of several architectures and also of
> > several I/O MMU implementations need to translate a struct device
> > pointer into a struct pci_dev pointer. This translation is performed
> > by to_pci_dev(). That macro assumes that struct device is embedded in
> > struct pci_dev. However, that is not the case for the device structure
> > in struct ib_device.
> 
> Then don't blindly cast it backwards!  Fix that up, an ib device should have
> access to the dma structures that the PCI device it depends on has.
> If not, you need to set that up properly in the IB core, don't mess with the
> driver core for this at all.
> 
I replied with pseudo code in previous reply to Bart to bring back dma_device member in the ib_device.
dma_device member was already present in near past of few weeks.
It should be able to work using it without performance impact and without touching driver core layer like in this patch.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bart Van Assche March 7, 2017, 5:13 a.m. UTC | #7
T24gVHVlLCAyMDE3LTAzLTA3IGF0IDA1OjA4ICswMDAwLCBQYXJhdiBQYW5kaXQgd3JvdGU6DQo+
IEkgcmVwbGllZCB3aXRoIHBzZXVkbyBjb2RlIGluIHByZXZpb3VzIHJlcGx5IHRvIEJhcnQgdG8g
YnJpbmcgYmFjayBkbWFfZGV2aWNlIG1lbWJlciBpbiB0aGUgaWJfZGV2aWNlLg0KPiBkbWFfZGV2
aWNlIG1lbWJlciB3YXMgYWxyZWFkeSBwcmVzZW50IGluIG5lYXIgcGFzdCBvZiBmZXcgd2Vla3Mu
DQo+IEl0IHNob3VsZCBiZSBhYmxlIHRvIHdvcmsgdXNpbmcgaXQgd2l0aG91dCBwZXJmb3JtYW5j
ZSBpbXBhY3QgYW5kIHdpdGhvdXQgdG91Y2hpbmcgZHJpdmVyIGNvcmUgbGF5ZXIgbGlrZSBpbiB0
aGlzIHBhdGNoLg0KDQpUaGF0J3MgY29uZnVzaW5nIGFuZCB3YXMgYSBzb3VyY2Ugb2YgYnVncyBh
bmQgaW5jb25zaXN0ZW5jaWVzLiBXZSBkbyBub3QNCndhbnQgdHdvIGRldmljZSBzdHJ1Y3R1cmVz
IGluIHN0cnVjdCBpYl9kZXZpY2UgKHN0cnVjdCBkZXZpY2UgZGV2IGFuZCBzdHJ1Y3QNCmRldmlj
ZSAqZG1hX2RldmljZSkuDQoNCkJhcnQu
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Parav Pandit March 7, 2017, 5:20 a.m. UTC | #8
Hi Bart,

> -----Original Message-----

> From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-

> owner@vger.kernel.org] On Behalf Of Bart Van Assche

> Sent: Monday, March 6, 2017 11:13 PM

> To: Parav Pandit <parav@mellanox.com>; gregkh@linuxfoundation.org

> Cc: linux-kernel@vger.kernel.org; linux-rdma@vger.kernel.org;

> sebott@linux.vnet.ibm.com; linux@armlinux.org.uk; hpa@zytor.com;

> mingo@redhat.com; dwmw2@infradead.org; bhelgaas@google.com;

> dledford@redhat.com; benh@kernel.crashing.org

> Subject: Re: [PATCH 1/2] device: Stop requiring that struct device is

> embedded in struct pci_dev

> 

> On Tue, 2017-03-07 at 05:08 +0000, Parav Pandit wrote:

> > I replied with pseudo code in previous reply to Bart to bring back

> dma_device member in the ib_device.

> > dma_device member was already present in near past of few weeks.

> > It should be able to work using it without performance impact and without

> touching driver core layer like in this patch.

> 

> That's confusing and was a source of bugs and inconsistencies. We do not

> want two device structures in struct ib_device (struct device dev and struct

> device *dma_device).


device dev represents, dev structure of the ib_device.
While dma_device is for the actual device as you know.

If you look at net_device, 
It has device dev.
vendor drivers store pci_dev pointer and access device of pci_dev etc.
Every net_device driver has to do that.

Ib_device simplifies that work for ib stack by storing dma_device.
I think this is less confusing.

> 

> Bart.N     r  y   b X  ǧv ^ )޺{.n +    {  ٚ {ay ʇڙ ,j   f   h   z  w       j:+v   w j m         zZ+

> ݢj"  ! i
Bart Van Assche March 7, 2017, 4:54 p.m. UTC | #9
On Tue, 2017-03-07 at 05:52 +0100, Greg Kroah-Hartman wrote:
> Somehow all other subsystems work just fine, don't instantly think that
> the driver core needs to bend to the will of the IB code, because you
> are somehow "special".  Hint, you aren't :)

Hi Greg,

In another e-mail Parav compared IB drivers with networking drivers. But I
think that's a bad comparison: in the networking stack it's the network
driver itself that sets up and triggers DMA while in the IB stack it's the
upper layer protocol (ULP) driver that calls the functions defined in struct
dma_ops. For some IB HW drivers (hfi1, qib and rdma_rxe) the ULP driver must
use the DMA mapping operations from lib/dma-virt.c while for all other IB HW
drivers the ULP driver must use the PCI DMA mapping functions. The ib_dma_*()
functions select the right DMA mapping operations - either the PCI DMA
mapping operations or those from lib/dma-virt.c. My question to you is how we
should organize struct ib_device such that we can get rid of the ib_dma_*()
helper functions. How to make sure that the to_pci_dev() translation works
correctly for the device structure that is embedded in struct ib_device?
Should a pointer to struct pci_dev be embedded in struct device (as done in
patch 1/2 in this series) or should the struct device in ib_device be changed
into a struct pci_dev and should the pci_dev information from
/sys/devices/pci*/*/* be duplicated into the pci_dev information in struct
ib_device (/sys/devices/pci*/*/*/infiniband/*)? For the latter approach, would
there be a risk that the duplicated information becomes inconsistent?

Thanks,

Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Greg Kroah-Hartman March 7, 2017, 5:14 p.m. UTC | #10
On Tue, Mar 07, 2017 at 04:54:58PM +0000, Bart Van Assche wrote:
> On Tue, 2017-03-07 at 05:52 +0100, Greg Kroah-Hartman wrote:
> > Somehow all other subsystems work just fine, don't instantly think that
> > the driver core needs to bend to the will of the IB code, because you
> > are somehow "special".  Hint, you aren't :)
> 
> Hi Greg,
> 
> In another e-mail Parav compared IB drivers with networking drivers.

Great, then notice that networking drivers don't need to do this type of
crud :)

> But I think that's a bad comparison: in the networking stack it's the
> network driver itself that sets up and triggers DMA while in the IB
> stack it's the upper layer protocol (ULP) driver that calls the
> functions defined in struct dma_ops. For some IB HW drivers (hfi1, qib
> and rdma_rxe) the ULP driver must
> use the DMA mapping operations from lib/dma-virt.c while for all other IB HW
> drivers the ULP driver must use the PCI DMA mapping functions. The ib_dma_*()
> functions select the right DMA mapping operations - either the PCI DMA
> mapping operations or those from lib/dma-virt.c. My question to you is how we
> should organize struct ib_device such that we can get rid of the ib_dma_*()
> helper functions. How to make sure that the to_pci_dev() translation works
> correctly for the device structure that is embedded in struct ib_device?
> Should a pointer to struct pci_dev be embedded in struct device (as done in
> patch 1/2 in this series)

I already said no to this, why do you think that it is still ok?

> or should the struct device in ib_device be changed
> into a struct pci_dev

Ick, no.

> and should the pci_dev information from /sys/devices/pci*/*/* be
> duplicated into the pci_dev information in struct ib_device
> (/sys/devices/pci*/*/*/infiniband/*)?

I don't think you really thought that one through :)

> For the latter approach, would
> there be a risk that the duplicated information becomes inconsistent?

No, it just wouldn't work :)

Why not just save off a pointer to your pci_dev in your ib_device
structure?  That way you know what the type is, and you have access to
everything you need.

But hey, I know nothing about IB and I really want to keep it that way.
You do what you want to, as long as you don't abuse the driver model,
like your patch 1/2 did.  Remember, not all the world is IB.

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Parav Pandit March 7, 2017, 6:27 p.m. UTC | #11
> -----Original Message-----
> From: gregkh@linuxfoundation.org [mailto:gregkh@linuxfoundation.org]
> Sent: Tuesday, March 7, 2017 11:14 AM
> To: Bart Van Assche <Bart.VanAssche@sandisk.com>
> Cc: linux-kernel@vger.kernel.org; linux-rdma@vger.kernel.org; Parav Pandit
> <parav@mellanox.com>; sebott@linux.vnet.ibm.com;
> linux@armlinux.org.uk; hpa@zytor.com; mingo@redhat.com;
> dwmw2@infradead.org; bhelgaas@google.com; dledford@redhat.com;
> benh@kernel.crashing.org
> Subject: Re: [PATCH 1/2] device: Stop requiring that struct device is
> embedded in struct pci_dev
> 
> On Tue, Mar 07, 2017 at 04:54:58PM +0000, Bart Van Assche wrote:
> > On Tue, 2017-03-07 at 05:52 +0100, Greg Kroah-Hartman wrote:
> > > Somehow all other subsystems work just fine, don't instantly think
> > > that the driver core needs to bend to the will of the IB code,
> > > because you are somehow "special".  Hint, you aren't :)
> >
> > Hi Greg,
> >
> > In another e-mail Parav compared IB drivers with networking drivers.
> 
> Great, then notice that networking drivers don't need to do this type of crud
> :)
> 

Well what I compared is:
netdev has struct device and it also has underlying pci_dev based device.
ibdev has struct device and it also has underlying pci_dev based device.
So let us try to treat them in same way wherever possible and keep setup needed in ib drivers.

> > But I think that's a bad comparison: in the networking stack it's the
> > network driver itself that sets up and triggers DMA while in the IB
> > stack it's the upper layer protocol (ULP) driver that calls the
> > functions defined in struct dma_ops. For some IB HW drivers (hfi1, qib
> > and rdma_rxe) the ULP driver must use the DMA mapping operations from

DMA mapping and allocation is done in different layer for its own reason unrelated to this change.
If rxe, qib, hfi1 point to right dma_device, can't we remove the ib_dma_*()?

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt March 8, 2017, 1:52 a.m. UTC | #12
On Tue, 2017-03-07 at 05:52 +0100, Greg Kroah-Hartman wrote:
> On Mon, Mar 06, 2017 at 04:35:48PM -0800, Bart Van Assche wrote:
> > The dma mapping operations of several architectures and also of
> > several I/O MMU implementations need to translate a struct
> > device pointer into a struct pci_dev pointer. This translation
> > is performed by to_pci_dev(). That macro assumes that struct
> > device is embedded in struct pci_dev. However, that is not the
> > case for the device structure in struct ib_device.
> 
> Then don't blindly cast it backwards!  Fix that up, an ib device should
> have access to the dma structures that the PCI device it depends on has.
> If not, you need to set that up properly in the IB core, don't mess with
> the driver core for this at all.
> 
> Somehow all other subsystems work just fine, don't instantly think that
> the driver core needs to bend to the will of the IB code, because you
> are somehow "special".  Hint, you aren't :)

Right, in his case, Bart, you can either pass the the struct device for
use for DMA to the ib devices, which is easy but a bit gross, or have
the ib core provide a set of dma_ops for the ib_device that are
"wrappers" calling back to the "parent" device.

Any struct device whose dma_ops haven't been setup by the architecture
core cannot be used for DMA as-is without such "reflector" dma_ops
provided by the creator of that struct device.

The architecture core only knows about some "directly" attached things
like PCI, some cases of platform devices etc... and has no way of
setting things up for subsystem specific thigns like ib_device.

Thus the subsystem must take care of it and provide its own dma_ops
that "reflect" the calls to the original parent device that was setup
by the architecture.

Cheers,
Ben.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index dfc9a2794141..60d739b59520 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1736,6 +1736,7 @@  struct pci_dev *pci_alloc_dev(struct pci_bus *bus)
 
 	INIT_LIST_HEAD(&dev->bus_list);
 	dev->dev.type = &pci_dev_type;
+	dev->dev.pci_dev = dev;
 	dev->bus = pci_bus_get(bus);
 
 	return dev;
diff --git a/include/linux/device.h b/include/linux/device.h
index 30c4570e928d..c18afd376d2a 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -42,6 +42,7 @@  struct fwnode_handle;
 struct iommu_ops;
 struct iommu_group;
 struct iommu_fwspec;
+struct pci_dev;
 
 struct bus_attribute {
 	struct attribute	attr;
@@ -860,6 +861,9 @@  struct dev_links_info {
  * 		segment limitations.
  * @dma_pools:	Dma pools (if dma'ble device).
  * @dma_mem:	Internal for coherent mem override.
+ * @pci_dev:	PCI device associated with this device. Used by DMA mapping
+ *		operations on architectures that need access to PCI device
+ *		members that are not in struct device.
  * @cma_area:	Contiguous memory area for dma allocations
  * @archdata:	For arch-specific additions.
  * @of_node:	Associated device tree node.
@@ -940,6 +944,7 @@  struct device {
 
 	struct dma_coherent_mem	*dma_mem; /* internal for coherent mem
 					     override */
+	struct pci_dev		*pci_dev; /* for DMA mapping operations */
 #ifdef CONFIG_DMA_CMA
 	struct cma *cma_area;		/* contiguous memory area for dma
 					   allocations */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index eb3da1a04e6c..eca790eaae20 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -409,7 +409,10 @@  static inline struct pci_dev *pci_physfn(struct pci_dev *dev)
 
 struct pci_dev *pci_alloc_dev(struct pci_bus *bus);
 
-#define	to_pci_dev(n) container_of(n, struct pci_dev, dev)
+static inline struct pci_dev *to_pci_dev(const struct device *dev)
+{
+	return dev->pci_dev;
+}
 #define for_each_pci_dev(d) while ((d = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, d)) != NULL)
 
 static inline int pci_channel_offline(struct pci_dev *pdev)