Message ID | 1552563922-23685-4-git-send-email-galpress@amazon.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | RDMA/efa: Elastic Fabric Adapter (EFA) driver | expand |
On Thu, Mar 14, 2019 at 01:45:14PM +0200, Gal Pressman wrote: > Add EFA driver generic header file defining driver's device independent > internal data structures and definitions. > > Signed-off-by: Gal Pressman <galpress@amazon.com> > Reviewed-by: Steve Wise <swise@opengridcomputing.com> > --- > drivers/infiniband/hw/efa/efa.h | 191 ++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 191 insertions(+) > create mode 100644 drivers/infiniband/hw/efa/efa.h > > diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h > new file mode 100644 > index 000000000000..fac08b0f59df > --- /dev/null > +++ b/drivers/infiniband/hw/efa/efa.h > @@ -0,0 +1,191 @@ > +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ > +/* > + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. > + */ > + > +#ifndef _EFA_H_ > +#define _EFA_H_ > + > +#include <linux/bitops.h> > +#include <linux/idr.h> > +#include <linux/interrupt.h> > +#include <linux/pci.h> > +#include <linux/sched.h> > + > +#include <rdma/efa-abi.h> > +#include <rdma/ib_verbs.h> > + > +#include "efa_com_cmd.h" > + > +#define DRV_MODULE_NAME "efa" > +#define DEVICE_NAME "Elastic Fabric Adapter (EFA)" > + > +#define EFA_IRQNAME_SIZE 40 > + > +/* 1 for AENQ + ADMIN */ > +#define EFA_NUM_MSIX_VEC 1 > +#define EFA_MGMNT_MSIX_VEC_IDX 0 > + > +#define efa_dbg(_dev, format, ...) \ > + dev_dbg(_dev, "(pid %d) %s: " format, current->pid, \ > + __func__, ##__VA_ARGS__) > +#define efa_info(_dev, format, ...) \ > + dev_info(_dev, "(pid %d) %s: " format, current->pid, \ > + __func__, ##__VA_ARGS__) > +#define efa_warn(_dev, format, ...) \ > + dev_warn(_dev, "(pid %d) %s: " format, current->pid, \ > + __func__, ##__VA_ARGS__) > +#define efa_err(_dev, format, ...) \ > + dev_err(_dev, "(pid %d) %s: " format, current->pid, \ > + __func__, ##__VA_ARGS__) > +#define efa_err_rl(_dev, format, ...) \ > + dev_err_ratelimited(_dev, "(pid %d) %s: " format, current->pid, \ > + __func__, ##__VA_ARGS__) Every time when I see such debug prints, it makes me wonder if they actually needed. Anyway "current->pid" will print wrong output for any kernel threads. I know that you are not supporting kverbs, but still don't think that it is right thing to print. > + > +enum { > + EFA_DEVICE_RUNNING_BIT, Doesn't RDMA/core manage the state of device running/not running? Thanks
On 14-Mar-19 16:54, Leon Romanovsky wrote: > On Thu, Mar 14, 2019 at 01:45:14PM +0200, Gal Pressman wrote: >> Add EFA driver generic header file defining driver's device independent >> internal data structures and definitions. >> >> Signed-off-by: Gal Pressman <galpress@amazon.com> >> Reviewed-by: Steve Wise <swise@opengridcomputing.com> >> --- >> drivers/infiniband/hw/efa/efa.h | 191 ++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 191 insertions(+) >> create mode 100644 drivers/infiniband/hw/efa/efa.h >> >> diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h >> new file mode 100644 >> index 000000000000..fac08b0f59df >> --- /dev/null >> +++ b/drivers/infiniband/hw/efa/efa.h >> @@ -0,0 +1,191 @@ >> +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ >> +/* >> + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. >> + */ >> + >> +#ifndef _EFA_H_ >> +#define _EFA_H_ >> + >> +#include <linux/bitops.h> >> +#include <linux/idr.h> >> +#include <linux/interrupt.h> >> +#include <linux/pci.h> >> +#include <linux/sched.h> >> + >> +#include <rdma/efa-abi.h> >> +#include <rdma/ib_verbs.h> >> + >> +#include "efa_com_cmd.h" >> + >> +#define DRV_MODULE_NAME "efa" >> +#define DEVICE_NAME "Elastic Fabric Adapter (EFA)" >> + >> +#define EFA_IRQNAME_SIZE 40 >> + >> +/* 1 for AENQ + ADMIN */ >> +#define EFA_NUM_MSIX_VEC 1 >> +#define EFA_MGMNT_MSIX_VEC_IDX 0 >> + >> +#define efa_dbg(_dev, format, ...) \ >> + dev_dbg(_dev, "(pid %d) %s: " format, current->pid, \ >> + __func__, ##__VA_ARGS__) >> +#define efa_info(_dev, format, ...) \ >> + dev_info(_dev, "(pid %d) %s: " format, current->pid, \ >> + __func__, ##__VA_ARGS__) >> +#define efa_warn(_dev, format, ...) \ >> + dev_warn(_dev, "(pid %d) %s: " format, current->pid, \ >> + __func__, ##__VA_ARGS__) >> +#define efa_err(_dev, format, ...) \ >> + dev_err(_dev, "(pid %d) %s: " format, current->pid, \ >> + __func__, ##__VA_ARGS__) >> +#define efa_err_rl(_dev, format, ...) \ >> + dev_err_ratelimited(_dev, "(pid %d) %s: " format, current->pid, \ >> + __func__, ##__VA_ARGS__) > > Every time when I see such debug prints, it makes me wonder if they > actually needed. Anyway "current->pid" will print wrong output for any > kernel threads. I know that you are not supporting kverbs, but still > don't think that it is right thing to print. What's the reason pid is wrong for kernel threads? I found it quite useful to see the process id while debugging, at least for userspace applications. Is there anything other we can use instead of current->pid that would work for both? > >> + >> +enum { >> + EFA_DEVICE_RUNNING_BIT, > > Doesn't RDMA/core manage the state of device running/not running? This bit is protecting the driver from handling interrupts before the device probe is finished (after request_irq is called). This could happen if the device sends a keep-alive AENQ message before probe finish for example.
On Thu, Mar 14, 2019 at 05:09:22PM +0200, Gal Pressman wrote: > >> +#define efa_dbg(_dev, format, ...) \ > >> + dev_dbg(_dev, "(pid %d) %s: " format, current->pid, \ > >> + __func__, ##__VA_ARGS__) > >> +#define efa_info(_dev, format, ...) \ > >> + dev_info(_dev, "(pid %d) %s: " format, current->pid, \ > >> + __func__, ##__VA_ARGS__) > >> +#define efa_warn(_dev, format, ...) \ > >> + dev_warn(_dev, "(pid %d) %s: " format, current->pid, \ > >> + __func__, ##__VA_ARGS__) > >> +#define efa_err(_dev, format, ...) \ > >> + dev_err(_dev, "(pid %d) %s: " format, current->pid, \ > >> + __func__, ##__VA_ARGS__) > >> +#define efa_err_rl(_dev, format, ...) \ > >> + dev_err_ratelimited(_dev, "(pid %d) %s: " format, current->pid, \ > >> + __func__, ##__VA_ARGS__) > > > > Every time when I see such debug prints, it makes me wonder if they > > actually needed. Anyway "current->pid" will print wrong output for any > > kernel threads. I know that you are not supporting kverbs, but still > > don't think that it is right thing to print. > > What's the reason pid is wrong for kernel threads? > I found it quite useful to see the process id while debugging, at least for > userspace applications. Is there anything other we can use instead of > current->pid that would work for both? Again, I'd really like it if the three new drivers could get together and have core code that does this stuff sensibly and consistently. netdev has stuff like this already If pid logging makes sense here then it does for all.. > >> + > >> +enum { > >> + EFA_DEVICE_RUNNING_BIT, > > > > Doesn't RDMA/core manage the state of device running/not running? > > This bit is protecting the driver from handling interrupts before the device > probe is finished (after request_irq is called). This could happen if the device > sends a keep-alive AENQ message before probe finish for example. I was looking at this as well with some suspicion.. Generally I expect request_irq to happen only once the device is fully ready to take IRQs. What reason is there to request earlier and oddly mask the IRQ with a test_bit? Seems really weird. Jason
On Thu, Mar 14, 2019 at 05:09:22PM +0200, Gal Pressman wrote: > On 14-Mar-19 16:54, Leon Romanovsky wrote: > > On Thu, Mar 14, 2019 at 01:45:14PM +0200, Gal Pressman wrote: > >> Add EFA driver generic header file defining driver's device independent > >> internal data structures and definitions. > >> > >> Signed-off-by: Gal Pressman <galpress@amazon.com> > >> Reviewed-by: Steve Wise <swise@opengridcomputing.com> > >> --- > >> drivers/infiniband/hw/efa/efa.h | 191 ++++++++++++++++++++++++++++++++++++++++ > >> 1 file changed, 191 insertions(+) > >> create mode 100644 drivers/infiniband/hw/efa/efa.h > >> > >> diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h > >> new file mode 100644 > >> index 000000000000..fac08b0f59df > >> --- /dev/null > >> +++ b/drivers/infiniband/hw/efa/efa.h > >> @@ -0,0 +1,191 @@ > >> +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ > >> +/* > >> + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. > >> + */ > >> + > >> +#ifndef _EFA_H_ > >> +#define _EFA_H_ > >> + > >> +#include <linux/bitops.h> > >> +#include <linux/idr.h> > >> +#include <linux/interrupt.h> > >> +#include <linux/pci.h> > >> +#include <linux/sched.h> > >> + > >> +#include <rdma/efa-abi.h> > >> +#include <rdma/ib_verbs.h> > >> + > >> +#include "efa_com_cmd.h" > >> + > >> +#define DRV_MODULE_NAME "efa" > >> +#define DEVICE_NAME "Elastic Fabric Adapter (EFA)" > >> + > >> +#define EFA_IRQNAME_SIZE 40 > >> + > >> +/* 1 for AENQ + ADMIN */ > >> +#define EFA_NUM_MSIX_VEC 1 > >> +#define EFA_MGMNT_MSIX_VEC_IDX 0 > >> + > >> +#define efa_dbg(_dev, format, ...) \ > >> + dev_dbg(_dev, "(pid %d) %s: " format, current->pid, \ > >> + __func__, ##__VA_ARGS__) > >> +#define efa_info(_dev, format, ...) \ > >> + dev_info(_dev, "(pid %d) %s: " format, current->pid, \ > >> + __func__, ##__VA_ARGS__) > >> +#define efa_warn(_dev, format, ...) \ > >> + dev_warn(_dev, "(pid %d) %s: " format, current->pid, \ > >> + __func__, ##__VA_ARGS__) > >> +#define efa_err(_dev, format, ...) \ > >> + dev_err(_dev, "(pid %d) %s: " format, current->pid, \ > >> + __func__, ##__VA_ARGS__) > >> +#define efa_err_rl(_dev, format, ...) \ > >> + dev_err_ratelimited(_dev, "(pid %d) %s: " format, current->pid, \ > >> + __func__, ##__VA_ARGS__) > > > > Every time when I see such debug prints, it makes me wonder if they > > actually needed. Anyway "current->pid" will print wrong output for any > > kernel threads. I know that you are not supporting kverbs, but still > > don't think that it is right thing to print. > > What's the reason pid is wrong for kernel threads? > I found it quite useful to see the process id while debugging, at least for > userspace applications. Is there anything other we can use instead of > current->pid that would work for both? Kernel threads are running in kernel context, but "current" points to user context. For example any print from workqueue will display wrong current-pid. > > > > >> + > >> +enum { > >> + EFA_DEVICE_RUNNING_BIT, > > > > Doesn't RDMA/core manage the state of device running/not running? > > This bit is protecting the driver from handling interrupts before the device > probe is finished (after request_irq is called). This could happen if the device > sends a keep-alive AENQ message before probe finish for example. I arbitrary checked a couple of drivers in drivers/infiniband and they don't do anything special before/after request_irq. Where can I read about such need to protect request_irq? Thanks
On 14-Mar-19 17:31, Jason Gunthorpe wrote: > On Thu, Mar 14, 2019 at 05:09:22PM +0200, Gal Pressman wrote: >>>> +#define efa_dbg(_dev, format, ...) \ >>>> + dev_dbg(_dev, "(pid %d) %s: " format, current->pid, \ >>>> + __func__, ##__VA_ARGS__) >>>> +#define efa_info(_dev, format, ...) \ >>>> + dev_info(_dev, "(pid %d) %s: " format, current->pid, \ >>>> + __func__, ##__VA_ARGS__) >>>> +#define efa_warn(_dev, format, ...) \ >>>> + dev_warn(_dev, "(pid %d) %s: " format, current->pid, \ >>>> + __func__, ##__VA_ARGS__) >>>> +#define efa_err(_dev, format, ...) \ >>>> + dev_err(_dev, "(pid %d) %s: " format, current->pid, \ >>>> + __func__, ##__VA_ARGS__) >>>> +#define efa_err_rl(_dev, format, ...) \ >>>> + dev_err_ratelimited(_dev, "(pid %d) %s: " format, current->pid, \ >>>> + __func__, ##__VA_ARGS__) >>> >>> Every time when I see such debug prints, it makes me wonder if they >>> actually needed. Anyway "current->pid" will print wrong output for any >>> kernel threads. I know that you are not supporting kverbs, but still >>> don't think that it is right thing to print. >> >> What's the reason pid is wrong for kernel threads? >> I found it quite useful to see the process id while debugging, at least for >> userspace applications. Is there anything other we can use instead of >> current->pid that would work for both? > > Again, I'd really like it if the three new drivers could get together > and have core code that does this stuff sensibly and > consistently. netdev has stuff like this already > > If pid logging makes sense here then it does for all.. I'm fine with that, is this an acceptable format for the subsystem? Should I remove the pid? > >>>> + >>>> +enum { >>>> + EFA_DEVICE_RUNNING_BIT, >>> >>> Doesn't RDMA/core manage the state of device running/not running? >> >> This bit is protecting the driver from handling interrupts before the device >> probe is finished (after request_irq is called). This could happen if the device >> sends a keep-alive AENQ message before probe finish for example. > > I was looking at this as well with some suspicion.. > > Generally I expect request_irq to happen only once the device is fully > ready to take IRQs. > > What reason is there to request earlier and oddly mask the IRQ with a > test_bit? Seems really weird. The bit was added to be on the safe side with the async notifications, it can probably be removed though. Will check.
On Mon, Mar 18, 2019 at 11:47:32PM +0200, Gal Pressman wrote: > On 14-Mar-19 17:31, Jason Gunthorpe wrote: > > On Thu, Mar 14, 2019 at 05:09:22PM +0200, Gal Pressman wrote: > >>>> +#define efa_dbg(_dev, format, ...) \ > >>>> + dev_dbg(_dev, "(pid %d) %s: " format, current->pid, \ > >>>> + __func__, ##__VA_ARGS__) > >>>> +#define efa_info(_dev, format, ...) \ > >>>> + dev_info(_dev, "(pid %d) %s: " format, current->pid, \ > >>>> + __func__, ##__VA_ARGS__) > >>>> +#define efa_warn(_dev, format, ...) \ > >>>> + dev_warn(_dev, "(pid %d) %s: " format, current->pid, \ > >>>> + __func__, ##__VA_ARGS__) > >>>> +#define efa_err(_dev, format, ...) \ > >>>> + dev_err(_dev, "(pid %d) %s: " format, current->pid, \ > >>>> + __func__, ##__VA_ARGS__) > >>>> +#define efa_err_rl(_dev, format, ...) \ > >>>> + dev_err_ratelimited(_dev, "(pid %d) %s: " format, current->pid, \ > >>>> + __func__, ##__VA_ARGS__) > >>> > >>> Every time when I see such debug prints, it makes me wonder if they > >>> actually needed. Anyway "current->pid" will print wrong output for any > >>> kernel threads. I know that you are not supporting kverbs, but still > >>> don't think that it is right thing to print. > >> > >> What's the reason pid is wrong for kernel threads? > >> I found it quite useful to see the process id while debugging, at least for > >> userspace applications. Is there anything other we can use instead of > >> current->pid that would work for both? > > > > Again, I'd really like it if the three new drivers could get together > > and have core code that does this stuff sensibly and > > consistently. netdev has stuff like this already > > > > If pid logging makes sense here then it does for all.. > > I'm fine with that, is this an acceptable format for the subsystem? Should I > remove the pid? I haven't seen it yet, but I guess we have some core kernel code doing the pid now? Jason
On 19-Mar-19 21:33, Jason Gunthorpe wrote: > On Mon, Mar 18, 2019 at 11:47:32PM +0200, Gal Pressman wrote: >> On 14-Mar-19 17:31, Jason Gunthorpe wrote: >>> On Thu, Mar 14, 2019 at 05:09:22PM +0200, Gal Pressman wrote: >>>>>> +#define efa_dbg(_dev, format, ...) \ >>>>>> + dev_dbg(_dev, "(pid %d) %s: " format, current->pid, \ >>>>>> + __func__, ##__VA_ARGS__) >>>>>> +#define efa_info(_dev, format, ...) \ >>>>>> + dev_info(_dev, "(pid %d) %s: " format, current->pid, \ >>>>>> + __func__, ##__VA_ARGS__) >>>>>> +#define efa_warn(_dev, format, ...) \ >>>>>> + dev_warn(_dev, "(pid %d) %s: " format, current->pid, \ >>>>>> + __func__, ##__VA_ARGS__) >>>>>> +#define efa_err(_dev, format, ...) \ >>>>>> + dev_err(_dev, "(pid %d) %s: " format, current->pid, \ >>>>>> + __func__, ##__VA_ARGS__) >>>>>> +#define efa_err_rl(_dev, format, ...) \ >>>>>> + dev_err_ratelimited(_dev, "(pid %d) %s: " format, current->pid, \ >>>>>> + __func__, ##__VA_ARGS__) >>>>> >>>>> Every time when I see such debug prints, it makes me wonder if they >>>>> actually needed. Anyway "current->pid" will print wrong output for any >>>>> kernel threads. I know that you are not supporting kverbs, but still >>>>> don't think that it is right thing to print. >>>> >>>> What's the reason pid is wrong for kernel threads? >>>> I found it quite useful to see the process id while debugging, at least for >>>> userspace applications. Is there anything other we can use instead of >>>> current->pid that would work for both? >>> >>> Again, I'd really like it if the three new drivers could get together >>> and have core code that does this stuff sensibly and >>> consistently. netdev has stuff like this already >>> >>> If pid logging makes sense here then it does for all.. >> >> I'm fine with that, is this an acceptable format for the subsystem? Should I >> remove the pid? > > I haven't seen it yet, but I guess we have some core kernel code doing > the pid now? Right, I'll start working on ibdev_{err,warn,...} helpers.
On Wed, Mar 20, 2019 at 08:17:34PM +0200, Gal Pressman wrote: > > I haven't seen it yet, but I guess we have some core kernel code doing > > the pid now? > > Right, I'll start working on ibdev_{err,warn,...} helpers. Just copying the pattern in netdev is probably the thing to do, including the bit about deducing the device name under various conditions I also think we should have a ibdev_uapi_debug() which should be called for all the cases drivers have for uverbs calls checking arguments/etc Then we could have a global way to switch them on perhaps Jason
On 20-Mar-19 21:13, Jason Gunthorpe wrote: > On Wed, Mar 20, 2019 at 08:17:34PM +0200, Gal Pressman wrote: > >>> I haven't seen it yet, but I guess we have some core kernel code doing >>> the pid now? >> >> Right, I'll start working on ibdev_{err,warn,...} helpers. > > Just copying the pattern in netdev is probably the thing to do, > including the bit about deducing the device name under various > conditions > > I also think we should have a ibdev_uapi_debug() which should be > called for all the cases drivers have for uverbs calls checking > arguments/etc > > Then we could have a global way to switch them on perhaps Global switch other than dynamic debug?
On Wed, Mar 20, 2019 at 09:17:56PM +0200, Gal Pressman wrote: > On 20-Mar-19 21:13, Jason Gunthorpe wrote: > > On Wed, Mar 20, 2019 at 08:17:34PM +0200, Gal Pressman wrote: > > > >>> I haven't seen it yet, but I guess we have some core kernel code doing > >>> the pid now? > >> > >> Right, I'll start working on ibdev_{err,warn,...} helpers. > > > > Just copying the pattern in netdev is probably the thing to do, > > including the bit about deducing the device name under various > > conditions > > > > I also think we should have a ibdev_uapi_debug() which should be > > called for all the cases drivers have for uverbs calls checking > > arguments/etc > > > > Then we could have a global way to switch them on perhaps > > Global switch other than dynamic debug? Maybe a way to trigger them all at once in existing dynamic debug? I haven't looked Jason
On 20-Mar-19 21:23, Jason Gunthorpe wrote: > On Wed, Mar 20, 2019 at 09:17:56PM +0200, Gal Pressman wrote: >> On 20-Mar-19 21:13, Jason Gunthorpe wrote: >>> On Wed, Mar 20, 2019 at 08:17:34PM +0200, Gal Pressman wrote: >>> >>>>> I haven't seen it yet, but I guess we have some core kernel code doing >>>>> the pid now? >>>> >>>> Right, I'll start working on ibdev_{err,warn,...} helpers. >>> >>> Just copying the pattern in netdev is probably the thing to do, >>> including the bit about deducing the device name under various >>> conditions >>> >>> I also think we should have a ibdev_uapi_debug() which should be >>> called for all the cases drivers have for uverbs calls checking >>> arguments/etc >>> >>> Then we could have a global way to switch them on perhaps >> >> Global switch other than dynamic debug? > > Maybe a way to trigger them all at once in existing dynamic debug? > > I haven't looked This is what I have in mind (without the uapi switch): --- diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index a9f29156e486..c7432902481a 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -147,6 +147,49 @@ static int ib_security_change(struct notifier_block *nb, unsigned long event, static void ib_policy_change_task(struct work_struct *work); static DECLARE_WORK(ib_policy_change_work, ib_policy_change_task); +static void __ibdev_printk(const char *level, const struct ib_device *ibdev, + struct va_format *vaf) +{ + if (ibdev && ibdev->dev.parent) + dev_printk_emit(level[1] - '0', + ibdev->dev.parent, + "%s %s %s: %pV", + dev_driver_string(ibdev->dev.parent), + dev_name(ibdev->dev.parent), + dev_name(&ibdev->dev), + vaf); + else if (ibdev) + printk("%s%s: %pV", + level, dev_name(&ibdev->dev), vaf); + else + printk("%s(NULL ib_device): %pV", level, vaf); +} + +#define define_ibdev_printk_level(func, level) \ +void func(const struct ib_device *ibdev, const char *fmt, ...) \ +{ \ + struct va_format vaf; \ + va_list args; \ + \ + va_start(args, fmt); \ + \ + vaf.fmt = fmt; \ + vaf.va = &args; \ + \ + __ibdev_printk(level, ibdev, &vaf); \ + \ + va_end(args); \ +} \ +EXPORT_SYMBOL(func); + +define_ibdev_printk_level(ibdev_emerg, KERN_EMERG); +define_ibdev_printk_level(ibdev_alert, KERN_ALERT); +define_ibdev_printk_level(ibdev_crit, KERN_CRIT); +define_ibdev_printk_level(ibdev_err, KERN_ERR); +define_ibdev_printk_level(ibdev_warn, KERN_WARNING); +define_ibdev_printk_level(ibdev_notice, KERN_NOTICE); +define_ibdev_printk_level(ibdev_info, KERN_INFO); + static struct notifier_block ibdev_lsm_nb = { .notifier_call = ib_security_change, }; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b5be093c5474..45a9422e0578 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -72,6 +72,21 @@ extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; extern struct workqueue_struct *ib_comp_unbound_wq; +__printf(2, 3) __cold +void ibdev_emerg(const struct ib_device *dev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_alert(const struct ib_device *dev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_crit(const struct ib_device *dev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_err(const struct ib_device *dev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_warn(const struct ib_device *dev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_notice(const struct ib_device *dev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_info(const struct ib_device *dev, const char *format, ...); + union ib_gid { u8 raw[16]; struct {
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h new file mode 100644 index 000000000000..fac08b0f59df --- /dev/null +++ b/drivers/infiniband/hw/efa/efa.h @@ -0,0 +1,191 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_H_ +#define _EFA_H_ + +#include <linux/bitops.h> +#include <linux/idr.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/sched.h> + +#include <rdma/efa-abi.h> +#include <rdma/ib_verbs.h> + +#include "efa_com_cmd.h" + +#define DRV_MODULE_NAME "efa" +#define DEVICE_NAME "Elastic Fabric Adapter (EFA)" + +#define EFA_IRQNAME_SIZE 40 + +/* 1 for AENQ + ADMIN */ +#define EFA_NUM_MSIX_VEC 1 +#define EFA_MGMNT_MSIX_VEC_IDX 0 + +#define efa_dbg(_dev, format, ...) \ + dev_dbg(_dev, "(pid %d) %s: " format, current->pid, \ + __func__, ##__VA_ARGS__) +#define efa_info(_dev, format, ...) \ + dev_info(_dev, "(pid %d) %s: " format, current->pid, \ + __func__, ##__VA_ARGS__) +#define efa_warn(_dev, format, ...) \ + dev_warn(_dev, "(pid %d) %s: " format, current->pid, \ + __func__, ##__VA_ARGS__) +#define efa_err(_dev, format, ...) \ + dev_err(_dev, "(pid %d) %s: " format, current->pid, \ + __func__, ##__VA_ARGS__) +#define efa_err_rl(_dev, format, ...) \ + dev_err_ratelimited(_dev, "(pid %d) %s: " format, current->pid, \ + __func__, ##__VA_ARGS__) + +enum { + EFA_DEVICE_RUNNING_BIT, + EFA_MSIX_ENABLED_BIT +}; + +struct efa_irq { + irq_handler_t handler; + void *data; + int cpu; + u32 vector; + cpumask_t affinity_hint_mask; + char name[EFA_IRQNAME_SIZE]; +}; + +struct efa_sw_stats { + atomic64_t alloc_pd_err; + atomic64_t create_qp_err; + atomic64_t create_cq_err; + atomic64_t reg_mr_err; + atomic64_t alloc_ucontext_err; + atomic64_t create_ah_err; +}; + +/* Don't use anything other than atomic64 */ +struct efa_stats { + struct efa_sw_stats sw_stats; + atomic64_t keep_alive_rcvd; +}; + +struct efa_dev { + struct ib_device ibdev; + struct pci_dev *pdev; + struct efa_com_dev *edev; + struct efa_com_get_device_attr_result dev_attr; + + u64 reg_bar_addr; + u64 reg_bar_len; + u64 mem_bar_addr; + u64 mem_bar_len; + u64 db_bar_addr; + u64 db_bar_len; + u8 addr[EFA_GID_SIZE]; + u32 mtu; + + int admin_msix_vector_idx; + unsigned long state; + struct efa_irq admin_irq; + + struct efa_stats stats; +}; + +struct efa_ucontext { + struct ib_ucontext ibucontext; + /* Protects ucontext state */ + struct mutex lock; + struct list_head pending_mmaps; + u64 mmap_key; + u16 uarn; +}; + +struct efa_pd { + struct ib_pd ibpd; + u16 pdn; +}; + +struct efa_mr { + struct ib_mr ibmr; + struct ib_umem *umem; +}; + +struct efa_cq { + struct ib_cq ibcq; + struct efa_ucontext *ucontext; + dma_addr_t dma_addr; + void *cpu_addr; + size_t size; + u16 cq_idx; +}; + +struct efa_qp { + struct ib_qp ibqp; + dma_addr_t rq_dma_addr; + void *rq_cpu_addr; + size_t rq_size; + enum ib_qp_state state; + u32 qp_handle; + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; + u32 max_inline_data; +}; + +struct efa_ah { + struct ib_ah ibah; + u16 ah; + /* dest_addr */ + u8 id[EFA_GID_SIZE]; +}; + +int efa_query_device(struct ib_device *ibdev, + struct ib_device_attr *props, + struct ib_udata *udata); +int efa_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props); +int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); +int efa_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid); +int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey); +int efa_alloc_pd(struct ib_pd *ibpd, + struct ib_ucontext *ibucontext, + struct ib_udata *udata); +void efa_dealloc_pd(struct ib_pd *ibpd); +int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle); +int efa_destroy_qp(struct ib_qp *ibqp); +struct ib_qp *efa_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); +int efa_destroy_cq(struct ib_cq *ibcq); +struct ib_cq *efa_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *ibucontext, + struct ib_udata *udata); +struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata); +int efa_dereg_mr(struct ib_mr *ibmr); +int efa_get_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable); +int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata); +void efa_dealloc_ucontext(struct ib_ucontext *ibucontext); +int efa_mmap(struct ib_ucontext *ibucontext, + struct vm_area_struct *vma); +struct ib_ah *efa_create_ah(struct ib_pd *ibpd, + struct rdma_ah_attr *ah_attr, + u32 flags, + struct ib_udata *udata); +int efa_destroy_ah(struct ib_ah *ibah, u32 flags); +int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_udata *udata); +enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev, + u8 port_num); + +#endif /* _EFA_H_ */