Message ID | 20181210190948.6892-3-kamalheib1@gmail.com (mailing list archive) |
---|---|
State | Accepted |
Delegated to: | Jason Gunthorpe |
Headers | show |
Series | RDMA: Add support for ib_device_ops | expand |
On Mon, Dec 10, 2018 at 09:09:30PM +0200, Kamal Heib wrote: > This change introduce the ib_device_ops structure that defines all the > InfiniBand device operations in one place, so the code will be more > readable and clean, unlike today that the ops are mixed with ib_device > data members. > > The providers will need to define the supported operations and assign > them using ib_device_ops(), that will also make the providers code more You mean ib_set_device_ops, right? > readable and clean. > > Signed-off-by: Kamal Heib <kamalheib1@gmail.com> > --- > drivers/infiniband/core/device.c | 98 +++++++++++++ > include/rdma/ib_verbs.h | 245 +++++++++++++++++++++++++++++++ > 2 files changed, 343 insertions(+) > > diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c > index 0027b0d79b09..3589894c46b8 100644 > --- a/drivers/infiniband/core/device.c > +++ b/drivers/infiniband/core/device.c > @@ -1216,6 +1216,104 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, > } > EXPORT_SYMBOL(ib_get_net_dev_by_params); > > +void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) > +{ > +#define SET_DEVICE_OP(ptr, name) \ > + do { \ > + if (ops->name) \ > + if (!((ptr)->name)) \ > + (ptr)->name = ops->name; \ > + } while (0) > + > + SET_DEVICE_OP(dev, add_gid); > + SET_DEVICE_OP(dev, alloc_dm); > + SET_DEVICE_OP(dev, alloc_fmr); > + SET_DEVICE_OP(dev, alloc_hw_stats); > + SET_DEVICE_OP(dev, alloc_mr); > + SET_DEVICE_OP(dev, alloc_mw); > + SET_DEVICE_OP(dev, alloc_pd); > + SET_DEVICE_OP(dev, alloc_rdma_netdev); > + SET_DEVICE_OP(dev, alloc_ucontext); > + SET_DEVICE_OP(dev, alloc_xrcd); > + SET_DEVICE_OP(dev, attach_mcast); > + SET_DEVICE_OP(dev, check_mr_status); > + SET_DEVICE_OP(dev, create_ah); > + SET_DEVICE_OP(dev, create_counters); > + SET_DEVICE_OP(dev, create_cq); > + SET_DEVICE_OP(dev, create_flow); > + SET_DEVICE_OP(dev, create_flow_action_esp); > + SET_DEVICE_OP(dev, create_qp); > + SET_DEVICE_OP(dev, create_rwq_ind_table); > + SET_DEVICE_OP(dev, create_srq); > + SET_DEVICE_OP(dev, create_wq); > + SET_DEVICE_OP(dev, dealloc_dm); > + SET_DEVICE_OP(dev, dealloc_fmr); > + SET_DEVICE_OP(dev, dealloc_mw); > + SET_DEVICE_OP(dev, dealloc_pd); > + SET_DEVICE_OP(dev, dealloc_ucontext); > + SET_DEVICE_OP(dev, dealloc_xrcd); > + SET_DEVICE_OP(dev, del_gid); > + SET_DEVICE_OP(dev, dereg_mr); > + SET_DEVICE_OP(dev, destroy_ah); > + SET_DEVICE_OP(dev, destroy_counters); > + SET_DEVICE_OP(dev, destroy_cq); > + SET_DEVICE_OP(dev, destroy_flow); > + SET_DEVICE_OP(dev, destroy_flow_action); > + SET_DEVICE_OP(dev, destroy_qp); > + SET_DEVICE_OP(dev, destroy_rwq_ind_table); > + SET_DEVICE_OP(dev, destroy_srq); > + SET_DEVICE_OP(dev, destroy_wq); > + SET_DEVICE_OP(dev, detach_mcast); > + SET_DEVICE_OP(dev, disassociate_ucontext); > + SET_DEVICE_OP(dev, drain_rq); > + SET_DEVICE_OP(dev, drain_sq); > + SET_DEVICE_OP(dev, get_dev_fw_str); > + SET_DEVICE_OP(dev, get_dma_mr); > + SET_DEVICE_OP(dev, get_hw_stats); > + SET_DEVICE_OP(dev, get_link_layer); > + SET_DEVICE_OP(dev, get_netdev); > + SET_DEVICE_OP(dev, get_port_immutable); > + SET_DEVICE_OP(dev, get_vector_affinity); > + SET_DEVICE_OP(dev, get_vf_config); > + SET_DEVICE_OP(dev, get_vf_stats); > + SET_DEVICE_OP(dev, map_mr_sg); > + SET_DEVICE_OP(dev, map_phys_fmr); > + SET_DEVICE_OP(dev, mmap); > + SET_DEVICE_OP(dev, modify_ah); > + SET_DEVICE_OP(dev, modify_cq); > + SET_DEVICE_OP(dev, modify_device); > + SET_DEVICE_OP(dev, modify_flow_action_esp); > + SET_DEVICE_OP(dev, modify_port); > + SET_DEVICE_OP(dev, modify_qp); > + SET_DEVICE_OP(dev, modify_srq); > + SET_DEVICE_OP(dev, modify_wq); > + SET_DEVICE_OP(dev, peek_cq); > + SET_DEVICE_OP(dev, poll_cq); > + SET_DEVICE_OP(dev, post_recv); > + SET_DEVICE_OP(dev, post_send); > + SET_DEVICE_OP(dev, post_srq_recv); > + SET_DEVICE_OP(dev, process_mad); > + SET_DEVICE_OP(dev, query_ah); > + SET_DEVICE_OP(dev, query_device); > + SET_DEVICE_OP(dev, query_gid); > + SET_DEVICE_OP(dev, query_pkey); > + SET_DEVICE_OP(dev, query_port); > + SET_DEVICE_OP(dev, query_qp); > + SET_DEVICE_OP(dev, query_srq); > + SET_DEVICE_OP(dev, rdma_netdev_get_params); > + SET_DEVICE_OP(dev, read_counters); > + SET_DEVICE_OP(dev, reg_dm_mr); > + SET_DEVICE_OP(dev, reg_user_mr); > + SET_DEVICE_OP(dev, req_ncomp_notif); > + SET_DEVICE_OP(dev, req_notify_cq); > + SET_DEVICE_OP(dev, rereg_user_mr); > + SET_DEVICE_OP(dev, resize_cq); > + SET_DEVICE_OP(dev, set_vf_guid); > + SET_DEVICE_OP(dev, set_vf_link_state); > + SET_DEVICE_OP(dev, unmap_fmr); > +} > +EXPORT_SYMBOL(ib_set_device_ops); > + > static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { > [RDMA_NL_LS_OP_RESOLVE] = { > .doit = ib_nl_handle_resolve_resp, > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h > index 85021451eee0..3a02c72e7620 100644 > --- a/include/rdma/ib_verbs.h > +++ b/include/rdma/ib_verbs.h > @@ -2257,6 +2257,249 @@ struct ib_counters_read_attr { > > struct uverbs_attr_bundle; > > +/** > + * struct ib_device_ops - InfiniBand device operations > + * This structure defines all the InfiniBand device operations, providers will > + * need to define the supported operations, otherwise they will be set to null. > + */ > +struct ib_device_ops { > + int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr, > + const struct ib_send_wr **bad_send_wr); > + int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, > + const struct ib_recv_wr **bad_recv_wr); > + void (*drain_rq)(struct ib_qp *qp); > + void (*drain_sq)(struct ib_qp *qp); > + int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc); > + int (*peek_cq)(struct ib_cq *cq, int wc_cnt); > + int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags); > + int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt); > + int (*post_srq_recv)(struct ib_srq *srq, > + const struct ib_recv_wr *recv_wr, > + const struct ib_recv_wr **bad_recv_wr); > + int (*process_mad)(struct ib_device *device, int process_mad_flags, > + u8 port_num, const struct ib_wc *in_wc, > + const struct ib_grh *in_grh, > + const struct ib_mad_hdr *in_mad, size_t in_mad_size, > + struct ib_mad_hdr *out_mad, size_t *out_mad_size, > + u16 *out_mad_pkey_index); > + int (*query_device)(struct ib_device *device, > + struct ib_device_attr *device_attr, > + struct ib_udata *udata); > + int (*modify_device)(struct ib_device *device, int device_modify_mask, > + struct ib_device_modify *device_modify); > + void (*get_dev_fw_str)(struct ib_device *, char *str); > + const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev, > + int comp_vector); > + int (*query_port)(struct ib_device *device, u8 port_num, > + struct ib_port_attr *port_attr); > + int (*modify_port)(struct ib_device *device, u8 port_num, > + int port_modify_mask, > + struct ib_port_modify *port_modify); > + /** > + * The following mandatory functions are used only at device > + * registration. Keep functions such as these at the end of this > + * structure to avoid cache line misses when accessing struct ib_device > + * in fast paths. > + */ > + int (*get_port_immutable)(struct ib_device *, u8, > + struct ib_port_immutable *); > + enum rdma_link_layer (*get_link_layer)(struct ib_device *device, > + u8 port_num); > + /** > + * When calling get_netdev, the HW vendor's driver should return the > + * net device of device @device at port @port_num or NULL if such > + * a net device doesn't exist. The vendor driver should call dev_hold > + * on this net device. The HW vendor's device driver must guarantee > + * that this function returns NULL before the net device has finished > + * NETDEV_UNREGISTER state. > + */ > + struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num); > + /** > + * rdma netdev operation > + * > + * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params > + * must return -EOPNOTSUPP if it doesn't support the specified type. > + */ > + struct net_device *(*alloc_rdma_netdev)( > + struct ib_device *device, > + u8 port_num, > + enum rdma_netdev_t type, > + const char *name, > + unsigned char name_assign_type, > + void (*setup)(struct net_device *)); > + > + int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num, > + enum rdma_netdev_t type, > + struct rdma_netdev_alloc_params *params); > + /** > + * query_gid should be return GID value for @device, when @port_num > + * link layer is either IB or iWarp. It is no-op if @port_num port > + * is RoCE link layer. > + */ > + int (*query_gid)(struct ib_device *device, u8 port_num, int index, > + union ib_gid *gid); > + /** > + * When calling add_gid, the HW vendor's driver should add the gid > + * of device of port at gid index available at @attr. Meta-info of > + * that gid (for example, the network device related to this gid) is > + * available at @attr. @context allows the HW vendor driver to store > + * extra information together with a GID entry. The HW vendor driver may > + * allocate memory to contain this information and store it in @context > + * when a new GID entry is written to. Params are consistent until the > + * next call of add_gid or delete_gid. The function should return 0 on > + * success or error otherwise. The function could be called > + * concurrently for different ports. This function is only called when > + * roce_gid_table is used. > + */ > + int (*add_gid)(const struct ib_gid_attr *attr, void **context); > + /** > + * When calling del_gid, the HW vendor's driver should delete the > + * gid of device @device at gid index gid_index of port port_num > + * available in @attr. > + * Upon the deletion of a GID entry, the HW vendor must free any > + * allocated memory. The caller will clear @context afterwards. > + * This function is only called when roce_gid_table is used. > + */ > + int (*del_gid)(const struct ib_gid_attr *attr, void **context); > + int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index, > + u16 *pkey); > + struct ib_ucontext *(*alloc_ucontext)(struct ib_device *device, > + struct ib_udata *udata); > + int (*dealloc_ucontext)(struct ib_ucontext *context); > + int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); > + void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); > + struct ib_pd *(*alloc_pd)(struct ib_device *device, > + struct ib_ucontext *context, > + struct ib_udata *udata); > + int (*dealloc_pd)(struct ib_pd *pd); > + struct ib_ah *(*create_ah)(struct ib_pd *pd, > + struct rdma_ah_attr *ah_attr, > + struct ib_udata *udata); > + int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); > + int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); > + int (*destroy_ah)(struct ib_ah *ah); > + struct ib_srq *(*create_srq)(struct ib_pd *pd, > + struct ib_srq_init_attr *srq_init_attr, > + struct ib_udata *udata); > + int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr, > + enum ib_srq_attr_mask srq_attr_mask, > + struct ib_udata *udata); > + int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr); > + int (*destroy_srq)(struct ib_srq *srq); > + struct ib_qp *(*create_qp)(struct ib_pd *pd, > + struct ib_qp_init_attr *qp_init_attr, > + struct ib_udata *udata); > + int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, > + int qp_attr_mask, struct ib_udata *udata); > + int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, > + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); > + int (*destroy_qp)(struct ib_qp *qp); > + struct ib_cq *(*create_cq)(struct ib_device *device, > + const struct ib_cq_init_attr *attr, > + struct ib_ucontext *context, > + struct ib_udata *udata); > + int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); > + int (*destroy_cq)(struct ib_cq *cq); > + int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); > + struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags); > + struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, > + u64 virt_addr, int mr_access_flags, > + struct ib_udata *udata); > + int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, > + u64 virt_addr, int mr_access_flags, > + struct ib_pd *pd, struct ib_udata *udata); > + int (*dereg_mr)(struct ib_mr *mr); > + struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, > + u32 max_num_sg); > + int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, > + unsigned int *sg_offset); > + int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, > + struct ib_mr_status *mr_status); > + struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type, > + struct ib_udata *udata); > + int (*dealloc_mw)(struct ib_mw *mw); > + struct ib_fmr *(*alloc_fmr)(struct ib_pd *pd, int mr_access_flags, > + struct ib_fmr_attr *fmr_attr); > + int (*map_phys_fmr)(struct ib_fmr *fmr, u64 *page_list, int list_len, > + u64 iova); > + int (*unmap_fmr)(struct list_head *fmr_list); > + int (*dealloc_fmr)(struct ib_fmr *fmr); > + int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); > + int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); > + struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device, > + struct ib_ucontext *ucontext, > + struct ib_udata *udata); > + int (*dealloc_xrcd)(struct ib_xrcd *xrcd); > + struct ib_flow *(*create_flow)(struct ib_qp *qp, > + struct ib_flow_attr *flow_attr, > + int domain, struct ib_udata *udata); > + int (*destroy_flow)(struct ib_flow *flow_id); > + struct ib_flow_action *(*create_flow_action_esp)( > + struct ib_device *device, > + const struct ib_flow_action_attrs_esp *attr, > + struct uverbs_attr_bundle *attrs); > + int (*destroy_flow_action)(struct ib_flow_action *action); > + int (*modify_flow_action_esp)( > + struct ib_flow_action *action, > + const struct ib_flow_action_attrs_esp *attr, > + struct uverbs_attr_bundle *attrs); > + int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port, > + int state); > + int (*get_vf_config)(struct ib_device *device, int vf, u8 port, > + struct ifla_vf_info *ivf); > + int (*get_vf_stats)(struct ib_device *device, int vf, u8 port, > + struct ifla_vf_stats *stats); > + int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, > + int type); > + struct ib_wq *(*create_wq)(struct ib_pd *pd, > + struct ib_wq_init_attr *init_attr, > + struct ib_udata *udata); > + int (*destroy_wq)(struct ib_wq *wq); > + int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr, > + u32 wq_attr_mask, struct ib_udata *udata); > + struct ib_rwq_ind_table *(*create_rwq_ind_table)( > + struct ib_device *device, > + struct ib_rwq_ind_table_init_attr *init_attr, > + struct ib_udata *udata); > + int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table); > + struct ib_dm *(*alloc_dm)(struct ib_device *device, > + struct ib_ucontext *context, > + struct ib_dm_alloc_attr *attr, > + struct uverbs_attr_bundle *attrs); > + int (*dealloc_dm)(struct ib_dm *dm); > + struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm, > + struct ib_dm_mr_attr *attr, > + struct uverbs_attr_bundle *attrs); > + struct ib_counters *(*create_counters)( > + struct ib_device *device, struct uverbs_attr_bundle *attrs); > + int (*destroy_counters)(struct ib_counters *counters); > + int (*read_counters)(struct ib_counters *counters, > + struct ib_counters_read_attr *counters_read_attr, > + struct uverbs_attr_bundle *attrs); > + /** > + * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the > + * driver initialized data. The struct is kfree()'ed by the sysfs > + * core when the device is removed. A lifespan of -1 in the return > + * struct tells the core to set a default lifespan. > + */ > + struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device, > + u8 port_num); > + /** > + * get_hw_stats - Fill in the counter value(s) in the stats struct. > + * @index - The index in the value array we wish to have updated, or > + * num_counters if we want all stats updated > + * Return codes - > + * < 0 - Error, no counters updated > + * index - Updated the single counter pointed to by index > + * num_counters - Updated all counters (will reset the timestamp > + * and prevent further calls for lifespan milliseconds) > + * Drivers are allowed to update all counters in leiu of just the > + * one given in index at their option > + */ > + int (*get_hw_stats)(struct ib_device *device, > + struct rdma_hw_stats *stats, u8 port, int index); > +}; > + So now we have each function declared both in ib_device and in ib_device_ops? Will it be easier to remove all declarations from ib_device and add new member struct ib_device_ops *ops to ib_device this way objective will be achieved (code more readable) while still preserving single place of declaration? > struct ib_device { > /* Do not access @dma_device directly from ULP nor from HW drivers. */ > struct device *dma_device; > @@ -2660,6 +2903,8 @@ void ib_unregister_client(struct ib_client *client); > void *ib_get_client_data(struct ib_device *device, struct ib_client *client); > void ib_set_client_data(struct ib_device *device, struct ib_client *client, > void *data); > +void ib_set_device_ops(struct ib_device *device, > + const struct ib_device_ops *ops); > > #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) > int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, > -- > 2.19.2 >
On Mon, Dec 10, 2018 at 10:46:48PM +0200, Yuval Shaia wrote: > On Mon, Dec 10, 2018 at 09:09:30PM +0200, Kamal Heib wrote: > > This change introduce the ib_device_ops structure that defines all the > > InfiniBand device operations in one place, so the code will be more > > readable and clean, unlike today that the ops are mixed with ib_device > > data members. > > > > The providers will need to define the supported operations and assign > > them using ib_device_ops(), that will also make the providers code more > > You mean ib_set_device_ops, right? Correct, it should be ib_set_device_ops(). Jason, Please let me know if I need to re-send this patch with a fixes commit message? > > > readable and clean. > > > > Signed-off-by: Kamal Heib <kamalheib1@gmail.com> > > --- > > drivers/infiniband/core/device.c | 98 +++++++++++++ > > include/rdma/ib_verbs.h | 245 +++++++++++++++++++++++++++++++ > > 2 files changed, 343 insertions(+) > > > > diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c > > index 0027b0d79b09..3589894c46b8 100644 > > --- a/drivers/infiniband/core/device.c > > +++ b/drivers/infiniband/core/device.c > > @@ -1216,6 +1216,104 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, > > } > > EXPORT_SYMBOL(ib_get_net_dev_by_params); > > > > +void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) > > +{ > > +#define SET_DEVICE_OP(ptr, name) \ > > + do { \ > > + if (ops->name) \ > > + if (!((ptr)->name)) \ > > + (ptr)->name = ops->name; \ > > + } while (0) > > + > > + SET_DEVICE_OP(dev, add_gid); > > + SET_DEVICE_OP(dev, alloc_dm); > > + SET_DEVICE_OP(dev, alloc_fmr); > > + SET_DEVICE_OP(dev, alloc_hw_stats); > > + SET_DEVICE_OP(dev, alloc_mr); > > + SET_DEVICE_OP(dev, alloc_mw); > > + SET_DEVICE_OP(dev, alloc_pd); > > + SET_DEVICE_OP(dev, alloc_rdma_netdev); > > + SET_DEVICE_OP(dev, alloc_ucontext); > > + SET_DEVICE_OP(dev, alloc_xrcd); > > + SET_DEVICE_OP(dev, attach_mcast); > > + SET_DEVICE_OP(dev, check_mr_status); > > + SET_DEVICE_OP(dev, create_ah); > > + SET_DEVICE_OP(dev, create_counters); > > + SET_DEVICE_OP(dev, create_cq); > > + SET_DEVICE_OP(dev, create_flow); > > + SET_DEVICE_OP(dev, create_flow_action_esp); > > + SET_DEVICE_OP(dev, create_qp); > > + SET_DEVICE_OP(dev, create_rwq_ind_table); > > + SET_DEVICE_OP(dev, create_srq); > > + SET_DEVICE_OP(dev, create_wq); > > + SET_DEVICE_OP(dev, dealloc_dm); > > + SET_DEVICE_OP(dev, dealloc_fmr); > > + SET_DEVICE_OP(dev, dealloc_mw); > > + SET_DEVICE_OP(dev, dealloc_pd); > > + SET_DEVICE_OP(dev, dealloc_ucontext); > > + SET_DEVICE_OP(dev, dealloc_xrcd); > > + SET_DEVICE_OP(dev, del_gid); > > + SET_DEVICE_OP(dev, dereg_mr); > > + SET_DEVICE_OP(dev, destroy_ah); > > + SET_DEVICE_OP(dev, destroy_counters); > > + SET_DEVICE_OP(dev, destroy_cq); > > + SET_DEVICE_OP(dev, destroy_flow); > > + SET_DEVICE_OP(dev, destroy_flow_action); > > + SET_DEVICE_OP(dev, destroy_qp); > > + SET_DEVICE_OP(dev, destroy_rwq_ind_table); > > + SET_DEVICE_OP(dev, destroy_srq); > > + SET_DEVICE_OP(dev, destroy_wq); > > + SET_DEVICE_OP(dev, detach_mcast); > > + SET_DEVICE_OP(dev, disassociate_ucontext); > > + SET_DEVICE_OP(dev, drain_rq); > > + SET_DEVICE_OP(dev, drain_sq); > > + SET_DEVICE_OP(dev, get_dev_fw_str); > > + SET_DEVICE_OP(dev, get_dma_mr); > > + SET_DEVICE_OP(dev, get_hw_stats); > > + SET_DEVICE_OP(dev, get_link_layer); > > + SET_DEVICE_OP(dev, get_netdev); > > + SET_DEVICE_OP(dev, get_port_immutable); > > + SET_DEVICE_OP(dev, get_vector_affinity); > > + SET_DEVICE_OP(dev, get_vf_config); > > + SET_DEVICE_OP(dev, get_vf_stats); > > + SET_DEVICE_OP(dev, map_mr_sg); > > + SET_DEVICE_OP(dev, map_phys_fmr); > > + SET_DEVICE_OP(dev, mmap); > > + SET_DEVICE_OP(dev, modify_ah); > > + SET_DEVICE_OP(dev, modify_cq); > > + SET_DEVICE_OP(dev, modify_device); > > + SET_DEVICE_OP(dev, modify_flow_action_esp); > > + SET_DEVICE_OP(dev, modify_port); > > + SET_DEVICE_OP(dev, modify_qp); > > + SET_DEVICE_OP(dev, modify_srq); > > + SET_DEVICE_OP(dev, modify_wq); > > + SET_DEVICE_OP(dev, peek_cq); > > + SET_DEVICE_OP(dev, poll_cq); > > + SET_DEVICE_OP(dev, post_recv); > > + SET_DEVICE_OP(dev, post_send); > > + SET_DEVICE_OP(dev, post_srq_recv); > > + SET_DEVICE_OP(dev, process_mad); > > + SET_DEVICE_OP(dev, query_ah); > > + SET_DEVICE_OP(dev, query_device); > > + SET_DEVICE_OP(dev, query_gid); > > + SET_DEVICE_OP(dev, query_pkey); > > + SET_DEVICE_OP(dev, query_port); > > + SET_DEVICE_OP(dev, query_qp); > > + SET_DEVICE_OP(dev, query_srq); > > + SET_DEVICE_OP(dev, rdma_netdev_get_params); > > + SET_DEVICE_OP(dev, read_counters); > > + SET_DEVICE_OP(dev, reg_dm_mr); > > + SET_DEVICE_OP(dev, reg_user_mr); > > + SET_DEVICE_OP(dev, req_ncomp_notif); > > + SET_DEVICE_OP(dev, req_notify_cq); > > + SET_DEVICE_OP(dev, rereg_user_mr); > > + SET_DEVICE_OP(dev, resize_cq); > > + SET_DEVICE_OP(dev, set_vf_guid); > > + SET_DEVICE_OP(dev, set_vf_link_state); > > + SET_DEVICE_OP(dev, unmap_fmr); > > +} > > +EXPORT_SYMBOL(ib_set_device_ops); > > + > > static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { > > [RDMA_NL_LS_OP_RESOLVE] = { > > .doit = ib_nl_handle_resolve_resp, > > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h > > index 85021451eee0..3a02c72e7620 100644 > > --- a/include/rdma/ib_verbs.h > > +++ b/include/rdma/ib_verbs.h > > @@ -2257,6 +2257,249 @@ struct ib_counters_read_attr { > > > > struct uverbs_attr_bundle; > > > > +/** > > + * struct ib_device_ops - InfiniBand device operations > > + * This structure defines all the InfiniBand device operations, providers will > > + * need to define the supported operations, otherwise they will be set to null. > > + */ > > +struct ib_device_ops { > > + int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr, > > + const struct ib_send_wr **bad_send_wr); > > + int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, > > + const struct ib_recv_wr **bad_recv_wr); > > + void (*drain_rq)(struct ib_qp *qp); > > + void (*drain_sq)(struct ib_qp *qp); > > + int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc); > > + int (*peek_cq)(struct ib_cq *cq, int wc_cnt); > > + int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags); > > + int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt); > > + int (*post_srq_recv)(struct ib_srq *srq, > > + const struct ib_recv_wr *recv_wr, > > + const struct ib_recv_wr **bad_recv_wr); > > + int (*process_mad)(struct ib_device *device, int process_mad_flags, > > + u8 port_num, const struct ib_wc *in_wc, > > + const struct ib_grh *in_grh, > > + const struct ib_mad_hdr *in_mad, size_t in_mad_size, > > + struct ib_mad_hdr *out_mad, size_t *out_mad_size, > > + u16 *out_mad_pkey_index); > > + int (*query_device)(struct ib_device *device, > > + struct ib_device_attr *device_attr, > > + struct ib_udata *udata); > > + int (*modify_device)(struct ib_device *device, int device_modify_mask, > > + struct ib_device_modify *device_modify); > > + void (*get_dev_fw_str)(struct ib_device *, char *str); > > + const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev, > > + int comp_vector); > > + int (*query_port)(struct ib_device *device, u8 port_num, > > + struct ib_port_attr *port_attr); > > + int (*modify_port)(struct ib_device *device, u8 port_num, > > + int port_modify_mask, > > + struct ib_port_modify *port_modify); > > + /** > > + * The following mandatory functions are used only at device > > + * registration. Keep functions such as these at the end of this > > + * structure to avoid cache line misses when accessing struct ib_device > > + * in fast paths. > > + */ > > + int (*get_port_immutable)(struct ib_device *, u8, > > + struct ib_port_immutable *); > > + enum rdma_link_layer (*get_link_layer)(struct ib_device *device, > > + u8 port_num); > > + /** > > + * When calling get_netdev, the HW vendor's driver should return the > > + * net device of device @device at port @port_num or NULL if such > > + * a net device doesn't exist. The vendor driver should call dev_hold > > + * on this net device. The HW vendor's device driver must guarantee > > + * that this function returns NULL before the net device has finished > > + * NETDEV_UNREGISTER state. > > + */ > > + struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num); > > + /** > > + * rdma netdev operation > > + * > > + * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params > > + * must return -EOPNOTSUPP if it doesn't support the specified type. > > + */ > > + struct net_device *(*alloc_rdma_netdev)( > > + struct ib_device *device, > > + u8 port_num, > > + enum rdma_netdev_t type, > > + const char *name, > > + unsigned char name_assign_type, > > + void (*setup)(struct net_device *)); > > + > > + int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num, > > + enum rdma_netdev_t type, > > + struct rdma_netdev_alloc_params *params); > > + /** > > + * query_gid should be return GID value for @device, when @port_num > > + * link layer is either IB or iWarp. It is no-op if @port_num port > > + * is RoCE link layer. > > + */ > > + int (*query_gid)(struct ib_device *device, u8 port_num, int index, > > + union ib_gid *gid); > > + /** > > + * When calling add_gid, the HW vendor's driver should add the gid > > + * of device of port at gid index available at @attr. Meta-info of > > + * that gid (for example, the network device related to this gid) is > > + * available at @attr. @context allows the HW vendor driver to store > > + * extra information together with a GID entry. The HW vendor driver may > > + * allocate memory to contain this information and store it in @context > > + * when a new GID entry is written to. Params are consistent until the > > + * next call of add_gid or delete_gid. The function should return 0 on > > + * success or error otherwise. The function could be called > > + * concurrently for different ports. This function is only called when > > + * roce_gid_table is used. > > + */ > > + int (*add_gid)(const struct ib_gid_attr *attr, void **context); > > + /** > > + * When calling del_gid, the HW vendor's driver should delete the > > + * gid of device @device at gid index gid_index of port port_num > > + * available in @attr. > > + * Upon the deletion of a GID entry, the HW vendor must free any > > + * allocated memory. The caller will clear @context afterwards. > > + * This function is only called when roce_gid_table is used. > > + */ > > + int (*del_gid)(const struct ib_gid_attr *attr, void **context); > > + int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index, > > + u16 *pkey); > > + struct ib_ucontext *(*alloc_ucontext)(struct ib_device *device, > > + struct ib_udata *udata); > > + int (*dealloc_ucontext)(struct ib_ucontext *context); > > + int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); > > + void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); > > + struct ib_pd *(*alloc_pd)(struct ib_device *device, > > + struct ib_ucontext *context, > > + struct ib_udata *udata); > > + int (*dealloc_pd)(struct ib_pd *pd); > > + struct ib_ah *(*create_ah)(struct ib_pd *pd, > > + struct rdma_ah_attr *ah_attr, > > + struct ib_udata *udata); > > + int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); > > + int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); > > + int (*destroy_ah)(struct ib_ah *ah); > > + struct ib_srq *(*create_srq)(struct ib_pd *pd, > > + struct ib_srq_init_attr *srq_init_attr, > > + struct ib_udata *udata); > > + int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr, > > + enum ib_srq_attr_mask srq_attr_mask, > > + struct ib_udata *udata); > > + int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr); > > + int (*destroy_srq)(struct ib_srq *srq); > > + struct ib_qp *(*create_qp)(struct ib_pd *pd, > > + struct ib_qp_init_attr *qp_init_attr, > > + struct ib_udata *udata); > > + int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, > > + int qp_attr_mask, struct ib_udata *udata); > > + int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, > > + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); > > + int (*destroy_qp)(struct ib_qp *qp); > > + struct ib_cq *(*create_cq)(struct ib_device *device, > > + const struct ib_cq_init_attr *attr, > > + struct ib_ucontext *context, > > + struct ib_udata *udata); > > + int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); > > + int (*destroy_cq)(struct ib_cq *cq); > > + int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); > > + struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags); > > + struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, > > + u64 virt_addr, int mr_access_flags, > > + struct ib_udata *udata); > > + int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, > > + u64 virt_addr, int mr_access_flags, > > + struct ib_pd *pd, struct ib_udata *udata); > > + int (*dereg_mr)(struct ib_mr *mr); > > + struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, > > + u32 max_num_sg); > > + int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, > > + unsigned int *sg_offset); > > + int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, > > + struct ib_mr_status *mr_status); > > + struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type, > > + struct ib_udata *udata); > > + int (*dealloc_mw)(struct ib_mw *mw); > > + struct ib_fmr *(*alloc_fmr)(struct ib_pd *pd, int mr_access_flags, > > + struct ib_fmr_attr *fmr_attr); > > + int (*map_phys_fmr)(struct ib_fmr *fmr, u64 *page_list, int list_len, > > + u64 iova); > > + int (*unmap_fmr)(struct list_head *fmr_list); > > + int (*dealloc_fmr)(struct ib_fmr *fmr); > > + int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); > > + int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); > > + struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device, > > + struct ib_ucontext *ucontext, > > + struct ib_udata *udata); > > + int (*dealloc_xrcd)(struct ib_xrcd *xrcd); > > + struct ib_flow *(*create_flow)(struct ib_qp *qp, > > + struct ib_flow_attr *flow_attr, > > + int domain, struct ib_udata *udata); > > + int (*destroy_flow)(struct ib_flow *flow_id); > > + struct ib_flow_action *(*create_flow_action_esp)( > > + struct ib_device *device, > > + const struct ib_flow_action_attrs_esp *attr, > > + struct uverbs_attr_bundle *attrs); > > + int (*destroy_flow_action)(struct ib_flow_action *action); > > + int (*modify_flow_action_esp)( > > + struct ib_flow_action *action, > > + const struct ib_flow_action_attrs_esp *attr, > > + struct uverbs_attr_bundle *attrs); > > + int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port, > > + int state); > > + int (*get_vf_config)(struct ib_device *device, int vf, u8 port, > > + struct ifla_vf_info *ivf); > > + int (*get_vf_stats)(struct ib_device *device, int vf, u8 port, > > + struct ifla_vf_stats *stats); > > + int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, > > + int type); > > + struct ib_wq *(*create_wq)(struct ib_pd *pd, > > + struct ib_wq_init_attr *init_attr, > > + struct ib_udata *udata); > > + int (*destroy_wq)(struct ib_wq *wq); > > + int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr, > > + u32 wq_attr_mask, struct ib_udata *udata); > > + struct ib_rwq_ind_table *(*create_rwq_ind_table)( > > + struct ib_device *device, > > + struct ib_rwq_ind_table_init_attr *init_attr, > > + struct ib_udata *udata); > > + int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table); > > + struct ib_dm *(*alloc_dm)(struct ib_device *device, > > + struct ib_ucontext *context, > > + struct ib_dm_alloc_attr *attr, > > + struct uverbs_attr_bundle *attrs); > > + int (*dealloc_dm)(struct ib_dm *dm); > > + struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm, > > + struct ib_dm_mr_attr *attr, > > + struct uverbs_attr_bundle *attrs); > > + struct ib_counters *(*create_counters)( > > + struct ib_device *device, struct uverbs_attr_bundle *attrs); > > + int (*destroy_counters)(struct ib_counters *counters); > > + int (*read_counters)(struct ib_counters *counters, > > + struct ib_counters_read_attr *counters_read_attr, > > + struct uverbs_attr_bundle *attrs); > > + /** > > + * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the > > + * driver initialized data. The struct is kfree()'ed by the sysfs > > + * core when the device is removed. A lifespan of -1 in the return > > + * struct tells the core to set a default lifespan. > > + */ > > + struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device, > > + u8 port_num); > > + /** > > + * get_hw_stats - Fill in the counter value(s) in the stats struct. > > + * @index - The index in the value array we wish to have updated, or > > + * num_counters if we want all stats updated > > + * Return codes - > > + * < 0 - Error, no counters updated > > + * index - Updated the single counter pointed to by index > > + * num_counters - Updated all counters (will reset the timestamp > > + * and prevent further calls for lifespan milliseconds) > > + * Drivers are allowed to update all counters in leiu of just the > > + * one given in index at their option > > + */ > > + int (*get_hw_stats)(struct ib_device *device, > > + struct rdma_hw_stats *stats, u8 port, int index); > > +}; > > + > > So now we have each function declared both in ib_device and in > ib_device_ops? No, the last patch in the patch set removes the functions within the ib_device. > > Will it be easier to remove all declarations from ib_device and add new > member struct ib_device_ops *ops to ib_device this way objective will be > achieved (code more readable) while still preserving single place of > declaration? Please take a look on the last patch in this patch set, it removes the functions within the ib_device and add the ib_device_ops as a member to the ib_device, this was done to make it easier to review this huge patch set. > > > struct ib_device { > > /* Do not access @dma_device directly from ULP nor from HW drivers. */ > > struct device *dma_device; > > @@ -2660,6 +2903,8 @@ void ib_unregister_client(struct ib_client *client); > > void *ib_get_client_data(struct ib_device *device, struct ib_client *client); > > void ib_set_client_data(struct ib_device *device, struct ib_client *client, > > void *data); > > +void ib_set_device_ops(struct ib_device *device, > > + const struct ib_device_ops *ops); > > > > #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) > > int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, > > -- > > 2.19.2 > >
On Mon, Dec 10, 2018 at 11:14:17PM +0200, Kamal Heib wrote: > On Mon, Dec 10, 2018 at 10:46:48PM +0200, Yuval Shaia wrote: > > On Mon, Dec 10, 2018 at 09:09:30PM +0200, Kamal Heib wrote: > > > This change introduce the ib_device_ops structure that defines all the > > > InfiniBand device operations in one place, so the code will be more > > > readable and clean, unlike today that the ops are mixed with ib_device > > > data members. > > > > > > The providers will need to define the supported operations and assign > > > them using ib_device_ops(), that will also make the providers code more > > > > You mean ib_set_device_ops, right? > > Correct, it should be ib_set_device_ops(). > > Jason, Please let me know if I need to re-send this patch with a fixes > commit message? > > > > > > readable and clean. > > > > > > Signed-off-by: Kamal Heib <kamalheib1@gmail.com> > > > --- > > > drivers/infiniband/core/device.c | 98 +++++++++++++ > > > include/rdma/ib_verbs.h | 245 +++++++++++++++++++++++++++++++ > > > 2 files changed, 343 insertions(+) > > > > > > diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c > > > index 0027b0d79b09..3589894c46b8 100644 > > > --- a/drivers/infiniband/core/device.c > > > +++ b/drivers/infiniband/core/device.c > > > @@ -1216,6 +1216,104 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, > > > } > > > EXPORT_SYMBOL(ib_get_net_dev_by_params); > > > > > > +void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) > > > +{ > > > +#define SET_DEVICE_OP(ptr, name) \ > > > + do { \ > > > + if (ops->name) \ > > > + if (!((ptr)->name)) \ > > > + (ptr)->name = ops->name; \ > > > + } while (0) > > > + > > > + SET_DEVICE_OP(dev, add_gid); > > > + SET_DEVICE_OP(dev, alloc_dm); > > > + SET_DEVICE_OP(dev, alloc_fmr); > > > + SET_DEVICE_OP(dev, alloc_hw_stats); > > > + SET_DEVICE_OP(dev, alloc_mr); > > > + SET_DEVICE_OP(dev, alloc_mw); > > > + SET_DEVICE_OP(dev, alloc_pd); > > > + SET_DEVICE_OP(dev, alloc_rdma_netdev); > > > + SET_DEVICE_OP(dev, alloc_ucontext); > > > + SET_DEVICE_OP(dev, alloc_xrcd); > > > + SET_DEVICE_OP(dev, attach_mcast); > > > + SET_DEVICE_OP(dev, check_mr_status); > > > + SET_DEVICE_OP(dev, create_ah); > > > + SET_DEVICE_OP(dev, create_counters); > > > + SET_DEVICE_OP(dev, create_cq); > > > + SET_DEVICE_OP(dev, create_flow); > > > + SET_DEVICE_OP(dev, create_flow_action_esp); > > > + SET_DEVICE_OP(dev, create_qp); > > > + SET_DEVICE_OP(dev, create_rwq_ind_table); > > > + SET_DEVICE_OP(dev, create_srq); > > > + SET_DEVICE_OP(dev, create_wq); > > > + SET_DEVICE_OP(dev, dealloc_dm); > > > + SET_DEVICE_OP(dev, dealloc_fmr); > > > + SET_DEVICE_OP(dev, dealloc_mw); > > > + SET_DEVICE_OP(dev, dealloc_pd); > > > + SET_DEVICE_OP(dev, dealloc_ucontext); > > > + SET_DEVICE_OP(dev, dealloc_xrcd); > > > + SET_DEVICE_OP(dev, del_gid); > > > + SET_DEVICE_OP(dev, dereg_mr); > > > + SET_DEVICE_OP(dev, destroy_ah); > > > + SET_DEVICE_OP(dev, destroy_counters); > > > + SET_DEVICE_OP(dev, destroy_cq); > > > + SET_DEVICE_OP(dev, destroy_flow); > > > + SET_DEVICE_OP(dev, destroy_flow_action); > > > + SET_DEVICE_OP(dev, destroy_qp); > > > + SET_DEVICE_OP(dev, destroy_rwq_ind_table); > > > + SET_DEVICE_OP(dev, destroy_srq); > > > + SET_DEVICE_OP(dev, destroy_wq); > > > + SET_DEVICE_OP(dev, detach_mcast); > > > + SET_DEVICE_OP(dev, disassociate_ucontext); > > > + SET_DEVICE_OP(dev, drain_rq); > > > + SET_DEVICE_OP(dev, drain_sq); > > > + SET_DEVICE_OP(dev, get_dev_fw_str); > > > + SET_DEVICE_OP(dev, get_dma_mr); > > > + SET_DEVICE_OP(dev, get_hw_stats); > > > + SET_DEVICE_OP(dev, get_link_layer); > > > + SET_DEVICE_OP(dev, get_netdev); > > > + SET_DEVICE_OP(dev, get_port_immutable); > > > + SET_DEVICE_OP(dev, get_vector_affinity); > > > + SET_DEVICE_OP(dev, get_vf_config); > > > + SET_DEVICE_OP(dev, get_vf_stats); > > > + SET_DEVICE_OP(dev, map_mr_sg); > > > + SET_DEVICE_OP(dev, map_phys_fmr); > > > + SET_DEVICE_OP(dev, mmap); > > > + SET_DEVICE_OP(dev, modify_ah); > > > + SET_DEVICE_OP(dev, modify_cq); > > > + SET_DEVICE_OP(dev, modify_device); > > > + SET_DEVICE_OP(dev, modify_flow_action_esp); > > > + SET_DEVICE_OP(dev, modify_port); > > > + SET_DEVICE_OP(dev, modify_qp); > > > + SET_DEVICE_OP(dev, modify_srq); > > > + SET_DEVICE_OP(dev, modify_wq); > > > + SET_DEVICE_OP(dev, peek_cq); > > > + SET_DEVICE_OP(dev, poll_cq); > > > + SET_DEVICE_OP(dev, post_recv); > > > + SET_DEVICE_OP(dev, post_send); > > > + SET_DEVICE_OP(dev, post_srq_recv); > > > + SET_DEVICE_OP(dev, process_mad); > > > + SET_DEVICE_OP(dev, query_ah); > > > + SET_DEVICE_OP(dev, query_device); > > > + SET_DEVICE_OP(dev, query_gid); > > > + SET_DEVICE_OP(dev, query_pkey); > > > + SET_DEVICE_OP(dev, query_port); > > > + SET_DEVICE_OP(dev, query_qp); > > > + SET_DEVICE_OP(dev, query_srq); > > > + SET_DEVICE_OP(dev, rdma_netdev_get_params); > > > + SET_DEVICE_OP(dev, read_counters); > > > + SET_DEVICE_OP(dev, reg_dm_mr); > > > + SET_DEVICE_OP(dev, reg_user_mr); > > > + SET_DEVICE_OP(dev, req_ncomp_notif); > > > + SET_DEVICE_OP(dev, req_notify_cq); > > > + SET_DEVICE_OP(dev, rereg_user_mr); > > > + SET_DEVICE_OP(dev, resize_cq); > > > + SET_DEVICE_OP(dev, set_vf_guid); > > > + SET_DEVICE_OP(dev, set_vf_link_state); > > > + SET_DEVICE_OP(dev, unmap_fmr); > > > +} > > > +EXPORT_SYMBOL(ib_set_device_ops); > > > + > > > static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { > > > [RDMA_NL_LS_OP_RESOLVE] = { > > > .doit = ib_nl_handle_resolve_resp, > > > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h > > > index 85021451eee0..3a02c72e7620 100644 > > > --- a/include/rdma/ib_verbs.h > > > +++ b/include/rdma/ib_verbs.h > > > @@ -2257,6 +2257,249 @@ struct ib_counters_read_attr { > > > > > > struct uverbs_attr_bundle; > > > > > > +/** > > > + * struct ib_device_ops - InfiniBand device operations > > > + * This structure defines all the InfiniBand device operations, providers will > > > + * need to define the supported operations, otherwise they will be set to null. > > > + */ > > > +struct ib_device_ops { > > > + int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr, > > > + const struct ib_send_wr **bad_send_wr); > > > + int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, > > > + const struct ib_recv_wr **bad_recv_wr); > > > + void (*drain_rq)(struct ib_qp *qp); > > > + void (*drain_sq)(struct ib_qp *qp); > > > + int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc); > > > + int (*peek_cq)(struct ib_cq *cq, int wc_cnt); > > > + int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags); > > > + int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt); > > > + int (*post_srq_recv)(struct ib_srq *srq, > > > + const struct ib_recv_wr *recv_wr, > > > + const struct ib_recv_wr **bad_recv_wr); > > > + int (*process_mad)(struct ib_device *device, int process_mad_flags, > > > + u8 port_num, const struct ib_wc *in_wc, > > > + const struct ib_grh *in_grh, > > > + const struct ib_mad_hdr *in_mad, size_t in_mad_size, > > > + struct ib_mad_hdr *out_mad, size_t *out_mad_size, > > > + u16 *out_mad_pkey_index); > > > + int (*query_device)(struct ib_device *device, > > > + struct ib_device_attr *device_attr, > > > + struct ib_udata *udata); > > > + int (*modify_device)(struct ib_device *device, int device_modify_mask, > > > + struct ib_device_modify *device_modify); > > > + void (*get_dev_fw_str)(struct ib_device *, char *str); > > > + const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev, > > > + int comp_vector); > > > + int (*query_port)(struct ib_device *device, u8 port_num, > > > + struct ib_port_attr *port_attr); > > > + int (*modify_port)(struct ib_device *device, u8 port_num, > > > + int port_modify_mask, > > > + struct ib_port_modify *port_modify); > > > + /** > > > + * The following mandatory functions are used only at device > > > + * registration. Keep functions such as these at the end of this > > > + * structure to avoid cache line misses when accessing struct ib_device > > > + * in fast paths. > > > + */ > > > + int (*get_port_immutable)(struct ib_device *, u8, > > > + struct ib_port_immutable *); > > > + enum rdma_link_layer (*get_link_layer)(struct ib_device *device, > > > + u8 port_num); > > > + /** > > > + * When calling get_netdev, the HW vendor's driver should return the > > > + * net device of device @device at port @port_num or NULL if such > > > + * a net device doesn't exist. The vendor driver should call dev_hold > > > + * on this net device. The HW vendor's device driver must guarantee > > > + * that this function returns NULL before the net device has finished > > > + * NETDEV_UNREGISTER state. > > > + */ > > > + struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num); > > > + /** > > > + * rdma netdev operation > > > + * > > > + * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params > > > + * must return -EOPNOTSUPP if it doesn't support the specified type. > > > + */ > > > + struct net_device *(*alloc_rdma_netdev)( > > > + struct ib_device *device, > > > + u8 port_num, > > > + enum rdma_netdev_t type, > > > + const char *name, > > > + unsigned char name_assign_type, > > > + void (*setup)(struct net_device *)); > > > + > > > + int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num, > > > + enum rdma_netdev_t type, > > > + struct rdma_netdev_alloc_params *params); > > > + /** > > > + * query_gid should be return GID value for @device, when @port_num > > > + * link layer is either IB or iWarp. It is no-op if @port_num port > > > + * is RoCE link layer. > > > + */ > > > + int (*query_gid)(struct ib_device *device, u8 port_num, int index, > > > + union ib_gid *gid); > > > + /** > > > + * When calling add_gid, the HW vendor's driver should add the gid > > > + * of device of port at gid index available at @attr. Meta-info of > > > + * that gid (for example, the network device related to this gid) is > > > + * available at @attr. @context allows the HW vendor driver to store > > > + * extra information together with a GID entry. The HW vendor driver may > > > + * allocate memory to contain this information and store it in @context > > > + * when a new GID entry is written to. Params are consistent until the > > > + * next call of add_gid or delete_gid. The function should return 0 on > > > + * success or error otherwise. The function could be called > > > + * concurrently for different ports. This function is only called when > > > + * roce_gid_table is used. > > > + */ > > > + int (*add_gid)(const struct ib_gid_attr *attr, void **context); > > > + /** > > > + * When calling del_gid, the HW vendor's driver should delete the > > > + * gid of device @device at gid index gid_index of port port_num > > > + * available in @attr. > > > + * Upon the deletion of a GID entry, the HW vendor must free any > > > + * allocated memory. The caller will clear @context afterwards. > > > + * This function is only called when roce_gid_table is used. > > > + */ > > > + int (*del_gid)(const struct ib_gid_attr *attr, void **context); > > > + int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index, > > > + u16 *pkey); > > > + struct ib_ucontext *(*alloc_ucontext)(struct ib_device *device, > > > + struct ib_udata *udata); > > > + int (*dealloc_ucontext)(struct ib_ucontext *context); > > > + int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); > > > + void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); > > > + struct ib_pd *(*alloc_pd)(struct ib_device *device, > > > + struct ib_ucontext *context, > > > + struct ib_udata *udata); > > > + int (*dealloc_pd)(struct ib_pd *pd); > > > + struct ib_ah *(*create_ah)(struct ib_pd *pd, > > > + struct rdma_ah_attr *ah_attr, > > > + struct ib_udata *udata); > > > + int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); > > > + int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); > > > + int (*destroy_ah)(struct ib_ah *ah); > > > + struct ib_srq *(*create_srq)(struct ib_pd *pd, > > > + struct ib_srq_init_attr *srq_init_attr, > > > + struct ib_udata *udata); > > > + int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr, > > > + enum ib_srq_attr_mask srq_attr_mask, > > > + struct ib_udata *udata); > > > + int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr); > > > + int (*destroy_srq)(struct ib_srq *srq); > > > + struct ib_qp *(*create_qp)(struct ib_pd *pd, > > > + struct ib_qp_init_attr *qp_init_attr, > > > + struct ib_udata *udata); > > > + int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, > > > + int qp_attr_mask, struct ib_udata *udata); > > > + int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, > > > + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); > > > + int (*destroy_qp)(struct ib_qp *qp); > > > + struct ib_cq *(*create_cq)(struct ib_device *device, > > > + const struct ib_cq_init_attr *attr, > > > + struct ib_ucontext *context, > > > + struct ib_udata *udata); > > > + int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); > > > + int (*destroy_cq)(struct ib_cq *cq); > > > + int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); > > > + struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags); > > > + struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, > > > + u64 virt_addr, int mr_access_flags, > > > + struct ib_udata *udata); > > > + int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, > > > + u64 virt_addr, int mr_access_flags, > > > + struct ib_pd *pd, struct ib_udata *udata); > > > + int (*dereg_mr)(struct ib_mr *mr); > > > + struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, > > > + u32 max_num_sg); > > > + int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, > > > + unsigned int *sg_offset); > > > + int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, > > > + struct ib_mr_status *mr_status); > > > + struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type, > > > + struct ib_udata *udata); > > > + int (*dealloc_mw)(struct ib_mw *mw); > > > + struct ib_fmr *(*alloc_fmr)(struct ib_pd *pd, int mr_access_flags, > > > + struct ib_fmr_attr *fmr_attr); > > > + int (*map_phys_fmr)(struct ib_fmr *fmr, u64 *page_list, int list_len, > > > + u64 iova); > > > + int (*unmap_fmr)(struct list_head *fmr_list); > > > + int (*dealloc_fmr)(struct ib_fmr *fmr); > > > + int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); > > > + int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); > > > + struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device, > > > + struct ib_ucontext *ucontext, > > > + struct ib_udata *udata); > > > + int (*dealloc_xrcd)(struct ib_xrcd *xrcd); > > > + struct ib_flow *(*create_flow)(struct ib_qp *qp, > > > + struct ib_flow_attr *flow_attr, > > > + int domain, struct ib_udata *udata); > > > + int (*destroy_flow)(struct ib_flow *flow_id); > > > + struct ib_flow_action *(*create_flow_action_esp)( > > > + struct ib_device *device, > > > + const struct ib_flow_action_attrs_esp *attr, > > > + struct uverbs_attr_bundle *attrs); > > > + int (*destroy_flow_action)(struct ib_flow_action *action); > > > + int (*modify_flow_action_esp)( > > > + struct ib_flow_action *action, > > > + const struct ib_flow_action_attrs_esp *attr, > > > + struct uverbs_attr_bundle *attrs); > > > + int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port, > > > + int state); > > > + int (*get_vf_config)(struct ib_device *device, int vf, u8 port, > > > + struct ifla_vf_info *ivf); > > > + int (*get_vf_stats)(struct ib_device *device, int vf, u8 port, > > > + struct ifla_vf_stats *stats); > > > + int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, > > > + int type); > > > + struct ib_wq *(*create_wq)(struct ib_pd *pd, > > > + struct ib_wq_init_attr *init_attr, > > > + struct ib_udata *udata); > > > + int (*destroy_wq)(struct ib_wq *wq); > > > + int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr, > > > + u32 wq_attr_mask, struct ib_udata *udata); > > > + struct ib_rwq_ind_table *(*create_rwq_ind_table)( > > > + struct ib_device *device, > > > + struct ib_rwq_ind_table_init_attr *init_attr, > > > + struct ib_udata *udata); > > > + int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table); > > > + struct ib_dm *(*alloc_dm)(struct ib_device *device, > > > + struct ib_ucontext *context, > > > + struct ib_dm_alloc_attr *attr, > > > + struct uverbs_attr_bundle *attrs); > > > + int (*dealloc_dm)(struct ib_dm *dm); > > > + struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm, > > > + struct ib_dm_mr_attr *attr, > > > + struct uverbs_attr_bundle *attrs); > > > + struct ib_counters *(*create_counters)( > > > + struct ib_device *device, struct uverbs_attr_bundle *attrs); > > > + int (*destroy_counters)(struct ib_counters *counters); > > > + int (*read_counters)(struct ib_counters *counters, > > > + struct ib_counters_read_attr *counters_read_attr, > > > + struct uverbs_attr_bundle *attrs); > > > + /** > > > + * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the > > > + * driver initialized data. The struct is kfree()'ed by the sysfs > > > + * core when the device is removed. A lifespan of -1 in the return > > > + * struct tells the core to set a default lifespan. > > > + */ > > > + struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device, > > > + u8 port_num); > > > + /** > > > + * get_hw_stats - Fill in the counter value(s) in the stats struct. > > > + * @index - The index in the value array we wish to have updated, or > > > + * num_counters if we want all stats updated > > > + * Return codes - > > > + * < 0 - Error, no counters updated > > > + * index - Updated the single counter pointed to by index > > > + * num_counters - Updated all counters (will reset the timestamp > > > + * and prevent further calls for lifespan milliseconds) > > > + * Drivers are allowed to update all counters in leiu of just the > > > + * one given in index at their option > > > + */ > > > + int (*get_hw_stats)(struct ib_device *device, > > > + struct rdma_hw_stats *stats, u8 port, int index); > > > +}; > > > + > > > > So now we have each function declared both in ib_device and in > > ib_device_ops? > > No, the last patch in the patch set removes the functions within the ib_device. > > > > > Will it be easier to remove all declarations from ib_device and add new > > member struct ib_device_ops *ops to ib_device this way objective will be > > achieved (code more readable) while still preserving single place of > > declaration? > > Please take a look on the last patch in this patch set, it removes the functions > within the ib_device and add the ib_device_ops as a member to the ib_device, > this was done to make it easier to review this huge patch set. Ok, saw it, so wondering why two separate patches where second undo stuff made by first. I guess it was easier to maintain it this way, suggesting, just in case it is not such a trouble to merge them as it might look funny on git-bisect when it fails on this one :) Anyways, is it possible somehow to include the setting of uverbs_cmd_mask? > > > > > > struct ib_device { > > > /* Do not access @dma_device directly from ULP nor from HW drivers. */ > > > struct device *dma_device; > > > @@ -2660,6 +2903,8 @@ void ib_unregister_client(struct ib_client *client); > > > void *ib_get_client_data(struct ib_device *device, struct ib_client *client); > > > void ib_set_client_data(struct ib_device *device, struct ib_client *client, > > > void *data); > > > +void ib_set_device_ops(struct ib_device *device, > > > + const struct ib_device_ops *ops); > > > > > > #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) > > > int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, > > > -- > > > 2.19.2 > > >
On Tue, Dec 11, 2018 at 11:37:30AM +0200, Yuval Shaia wrote: > > > Will it be easier to remove all declarations from ib_device and add new > > > member struct ib_device_ops *ops to ib_device this way objective will be > > > achieved (code more readable) while still preserving single place of > > > declaration? > > > > Please take a look on the last patch in this patch set, it removes the functions > > within the ib_device and add the ib_device_ops as a member to the ib_device, > > this was done to make it easier to review this huge patch set. > > Ok, saw it, so wondering why two separate patches where second undo stuff > made by first. This arrangment has lower LOC changes > Anyways, is it possible somehow to include the setting of uverbs_cmd_mask? I want to delete this thing, I think we can now, just needs some driver auditing (maybe Kamal will do this next :)) Jason
On Tue, Dec 11, 2018 at 09:10:46AM -0700, Jason Gunthorpe wrote: > On Tue, Dec 11, 2018 at 11:37:30AM +0200, Yuval Shaia wrote: > > > > Will it be easier to remove all declarations from ib_device and add new > > > > member struct ib_device_ops *ops to ib_device this way objective will be > > > > achieved (code more readable) while still preserving single place of > > > > declaration? > > > > > > Please take a look on the last patch in this patch set, it removes the functions > > > within the ib_device and add the ib_device_ops as a member to the ib_device, > > > this was done to make it easier to review this huge patch set. > > > > Ok, saw it, so wondering why two separate patches where second undo stuff > > made by first. > > This arrangment has lower LOC changes > > > Anyways, is it possible somehow to include the setting of uverbs_cmd_mask? > > I want to delete this thing, I think we can now, just needs some > driver auditing (maybe Kamal will do this next :)) > Maybe :-) > Jason
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 0027b0d79b09..3589894c46b8 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1216,6 +1216,104 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, } EXPORT_SYMBOL(ib_get_net_dev_by_params); +void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) +{ +#define SET_DEVICE_OP(ptr, name) \ + do { \ + if (ops->name) \ + if (!((ptr)->name)) \ + (ptr)->name = ops->name; \ + } while (0) + + SET_DEVICE_OP(dev, add_gid); + SET_DEVICE_OP(dev, alloc_dm); + SET_DEVICE_OP(dev, alloc_fmr); + SET_DEVICE_OP(dev, alloc_hw_stats); + SET_DEVICE_OP(dev, alloc_mr); + SET_DEVICE_OP(dev, alloc_mw); + SET_DEVICE_OP(dev, alloc_pd); + SET_DEVICE_OP(dev, alloc_rdma_netdev); + SET_DEVICE_OP(dev, alloc_ucontext); + SET_DEVICE_OP(dev, alloc_xrcd); + SET_DEVICE_OP(dev, attach_mcast); + SET_DEVICE_OP(dev, check_mr_status); + SET_DEVICE_OP(dev, create_ah); + SET_DEVICE_OP(dev, create_counters); + SET_DEVICE_OP(dev, create_cq); + SET_DEVICE_OP(dev, create_flow); + SET_DEVICE_OP(dev, create_flow_action_esp); + SET_DEVICE_OP(dev, create_qp); + SET_DEVICE_OP(dev, create_rwq_ind_table); + SET_DEVICE_OP(dev, create_srq); + SET_DEVICE_OP(dev, create_wq); + SET_DEVICE_OP(dev, dealloc_dm); + SET_DEVICE_OP(dev, dealloc_fmr); + SET_DEVICE_OP(dev, dealloc_mw); + SET_DEVICE_OP(dev, dealloc_pd); + SET_DEVICE_OP(dev, dealloc_ucontext); + SET_DEVICE_OP(dev, dealloc_xrcd); + SET_DEVICE_OP(dev, del_gid); + SET_DEVICE_OP(dev, dereg_mr); + SET_DEVICE_OP(dev, destroy_ah); + SET_DEVICE_OP(dev, destroy_counters); + SET_DEVICE_OP(dev, destroy_cq); + SET_DEVICE_OP(dev, destroy_flow); + SET_DEVICE_OP(dev, destroy_flow_action); + SET_DEVICE_OP(dev, destroy_qp); + SET_DEVICE_OP(dev, destroy_rwq_ind_table); + SET_DEVICE_OP(dev, destroy_srq); + SET_DEVICE_OP(dev, destroy_wq); + SET_DEVICE_OP(dev, detach_mcast); + SET_DEVICE_OP(dev, disassociate_ucontext); + SET_DEVICE_OP(dev, drain_rq); + SET_DEVICE_OP(dev, drain_sq); + SET_DEVICE_OP(dev, get_dev_fw_str); + SET_DEVICE_OP(dev, get_dma_mr); + SET_DEVICE_OP(dev, get_hw_stats); + SET_DEVICE_OP(dev, get_link_layer); + SET_DEVICE_OP(dev, get_netdev); + SET_DEVICE_OP(dev, get_port_immutable); + SET_DEVICE_OP(dev, get_vector_affinity); + SET_DEVICE_OP(dev, get_vf_config); + SET_DEVICE_OP(dev, get_vf_stats); + SET_DEVICE_OP(dev, map_mr_sg); + SET_DEVICE_OP(dev, map_phys_fmr); + SET_DEVICE_OP(dev, mmap); + SET_DEVICE_OP(dev, modify_ah); + SET_DEVICE_OP(dev, modify_cq); + SET_DEVICE_OP(dev, modify_device); + SET_DEVICE_OP(dev, modify_flow_action_esp); + SET_DEVICE_OP(dev, modify_port); + SET_DEVICE_OP(dev, modify_qp); + SET_DEVICE_OP(dev, modify_srq); + SET_DEVICE_OP(dev, modify_wq); + SET_DEVICE_OP(dev, peek_cq); + SET_DEVICE_OP(dev, poll_cq); + SET_DEVICE_OP(dev, post_recv); + SET_DEVICE_OP(dev, post_send); + SET_DEVICE_OP(dev, post_srq_recv); + SET_DEVICE_OP(dev, process_mad); + SET_DEVICE_OP(dev, query_ah); + SET_DEVICE_OP(dev, query_device); + SET_DEVICE_OP(dev, query_gid); + SET_DEVICE_OP(dev, query_pkey); + SET_DEVICE_OP(dev, query_port); + SET_DEVICE_OP(dev, query_qp); + SET_DEVICE_OP(dev, query_srq); + SET_DEVICE_OP(dev, rdma_netdev_get_params); + SET_DEVICE_OP(dev, read_counters); + SET_DEVICE_OP(dev, reg_dm_mr); + SET_DEVICE_OP(dev, reg_user_mr); + SET_DEVICE_OP(dev, req_ncomp_notif); + SET_DEVICE_OP(dev, req_notify_cq); + SET_DEVICE_OP(dev, rereg_user_mr); + SET_DEVICE_OP(dev, resize_cq); + SET_DEVICE_OP(dev, set_vf_guid); + SET_DEVICE_OP(dev, set_vf_link_state); + SET_DEVICE_OP(dev, unmap_fmr); +} +EXPORT_SYMBOL(ib_set_device_ops); + static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { [RDMA_NL_LS_OP_RESOLVE] = { .doit = ib_nl_handle_resolve_resp, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 85021451eee0..3a02c72e7620 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2257,6 +2257,249 @@ struct ib_counters_read_attr { struct uverbs_attr_bundle; +/** + * struct ib_device_ops - InfiniBand device operations + * This structure defines all the InfiniBand device operations, providers will + * need to define the supported operations, otherwise they will be set to null. + */ +struct ib_device_ops { + int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr, + const struct ib_send_wr **bad_send_wr); + int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr); + void (*drain_rq)(struct ib_qp *qp); + void (*drain_sq)(struct ib_qp *qp); + int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc); + int (*peek_cq)(struct ib_cq *cq, int wc_cnt); + int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags); + int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt); + int (*post_srq_recv)(struct ib_srq *srq, + const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr); + int (*process_mad)(struct ib_device *device, int process_mad_flags, + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in_mad, size_t in_mad_size, + struct ib_mad_hdr *out_mad, size_t *out_mad_size, + u16 *out_mad_pkey_index); + int (*query_device)(struct ib_device *device, + struct ib_device_attr *device_attr, + struct ib_udata *udata); + int (*modify_device)(struct ib_device *device, int device_modify_mask, + struct ib_device_modify *device_modify); + void (*get_dev_fw_str)(struct ib_device *, char *str); + const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev, + int comp_vector); + int (*query_port)(struct ib_device *device, u8 port_num, + struct ib_port_attr *port_attr); + int (*modify_port)(struct ib_device *device, u8 port_num, + int port_modify_mask, + struct ib_port_modify *port_modify); + /** + * The following mandatory functions are used only at device + * registration. Keep functions such as these at the end of this + * structure to avoid cache line misses when accessing struct ib_device + * in fast paths. + */ + int (*get_port_immutable)(struct ib_device *, u8, + struct ib_port_immutable *); + enum rdma_link_layer (*get_link_layer)(struct ib_device *device, + u8 port_num); + /** + * When calling get_netdev, the HW vendor's driver should return the + * net device of device @device at port @port_num or NULL if such + * a net device doesn't exist. The vendor driver should call dev_hold + * on this net device. The HW vendor's device driver must guarantee + * that this function returns NULL before the net device has finished + * NETDEV_UNREGISTER state. + */ + struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num); + /** + * rdma netdev operation + * + * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params + * must return -EOPNOTSUPP if it doesn't support the specified type. + */ + struct net_device *(*alloc_rdma_netdev)( + struct ib_device *device, + u8 port_num, + enum rdma_netdev_t type, + const char *name, + unsigned char name_assign_type, + void (*setup)(struct net_device *)); + + int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num, + enum rdma_netdev_t type, + struct rdma_netdev_alloc_params *params); + /** + * query_gid should be return GID value for @device, when @port_num + * link layer is either IB or iWarp. It is no-op if @port_num port + * is RoCE link layer. + */ + int (*query_gid)(struct ib_device *device, u8 port_num, int index, + union ib_gid *gid); + /** + * When calling add_gid, the HW vendor's driver should add the gid + * of device of port at gid index available at @attr. Meta-info of + * that gid (for example, the network device related to this gid) is + * available at @attr. @context allows the HW vendor driver to store + * extra information together with a GID entry. The HW vendor driver may + * allocate memory to contain this information and store it in @context + * when a new GID entry is written to. Params are consistent until the + * next call of add_gid or delete_gid. The function should return 0 on + * success or error otherwise. The function could be called + * concurrently for different ports. This function is only called when + * roce_gid_table is used. + */ + int (*add_gid)(const struct ib_gid_attr *attr, void **context); + /** + * When calling del_gid, the HW vendor's driver should delete the + * gid of device @device at gid index gid_index of port port_num + * available in @attr. + * Upon the deletion of a GID entry, the HW vendor must free any + * allocated memory. The caller will clear @context afterwards. + * This function is only called when roce_gid_table is used. + */ + int (*del_gid)(const struct ib_gid_attr *attr, void **context); + int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index, + u16 *pkey); + struct ib_ucontext *(*alloc_ucontext)(struct ib_device *device, + struct ib_udata *udata); + int (*dealloc_ucontext)(struct ib_ucontext *context); + int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); + void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); + struct ib_pd *(*alloc_pd)(struct ib_device *device, + struct ib_ucontext *context, + struct ib_udata *udata); + int (*dealloc_pd)(struct ib_pd *pd); + struct ib_ah *(*create_ah)(struct ib_pd *pd, + struct rdma_ah_attr *ah_attr, + struct ib_udata *udata); + int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); + int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); + int (*destroy_ah)(struct ib_ah *ah); + struct ib_srq *(*create_srq)(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); + int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata); + int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr); + int (*destroy_srq)(struct ib_srq *srq); + struct ib_qp *(*create_qp)(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata); + int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_udata *udata); + int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); + int (*destroy_qp)(struct ib_qp *qp); + struct ib_cq *(*create_cq)(struct ib_device *device, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata); + int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); + int (*destroy_cq)(struct ib_cq *cq); + int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); + struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags); + struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int mr_access_flags, + struct ib_udata *udata); + int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, + u64 virt_addr, int mr_access_flags, + struct ib_pd *pd, struct ib_udata *udata); + int (*dereg_mr)(struct ib_mr *mr); + struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg); + int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, + unsigned int *sg_offset); + int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, + struct ib_mr_status *mr_status); + struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata); + int (*dealloc_mw)(struct ib_mw *mw); + struct ib_fmr *(*alloc_fmr)(struct ib_pd *pd, int mr_access_flags, + struct ib_fmr_attr *fmr_attr); + int (*map_phys_fmr)(struct ib_fmr *fmr, u64 *page_list, int list_len, + u64 iova); + int (*unmap_fmr)(struct list_head *fmr_list); + int (*dealloc_fmr)(struct ib_fmr *fmr); + int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); + int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); + struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device, + struct ib_ucontext *ucontext, + struct ib_udata *udata); + int (*dealloc_xrcd)(struct ib_xrcd *xrcd); + struct ib_flow *(*create_flow)(struct ib_qp *qp, + struct ib_flow_attr *flow_attr, + int domain, struct ib_udata *udata); + int (*destroy_flow)(struct ib_flow *flow_id); + struct ib_flow_action *(*create_flow_action_esp)( + struct ib_device *device, + const struct ib_flow_action_attrs_esp *attr, + struct uverbs_attr_bundle *attrs); + int (*destroy_flow_action)(struct ib_flow_action *action); + int (*modify_flow_action_esp)( + struct ib_flow_action *action, + const struct ib_flow_action_attrs_esp *attr, + struct uverbs_attr_bundle *attrs); + int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port, + int state); + int (*get_vf_config)(struct ib_device *device, int vf, u8 port, + struct ifla_vf_info *ivf); + int (*get_vf_stats)(struct ib_device *device, int vf, u8 port, + struct ifla_vf_stats *stats); + int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, + int type); + struct ib_wq *(*create_wq)(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata); + int (*destroy_wq)(struct ib_wq *wq); + int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr, + u32 wq_attr_mask, struct ib_udata *udata); + struct ib_rwq_ind_table *(*create_rwq_ind_table)( + struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata); + int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table); + struct ib_dm *(*alloc_dm)(struct ib_device *device, + struct ib_ucontext *context, + struct ib_dm_alloc_attr *attr, + struct uverbs_attr_bundle *attrs); + int (*dealloc_dm)(struct ib_dm *dm); + struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm, + struct ib_dm_mr_attr *attr, + struct uverbs_attr_bundle *attrs); + struct ib_counters *(*create_counters)( + struct ib_device *device, struct uverbs_attr_bundle *attrs); + int (*destroy_counters)(struct ib_counters *counters); + int (*read_counters)(struct ib_counters *counters, + struct ib_counters_read_attr *counters_read_attr, + struct uverbs_attr_bundle *attrs); + /** + * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the + * driver initialized data. The struct is kfree()'ed by the sysfs + * core when the device is removed. A lifespan of -1 in the return + * struct tells the core to set a default lifespan. + */ + struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device, + u8 port_num); + /** + * get_hw_stats - Fill in the counter value(s) in the stats struct. + * @index - The index in the value array we wish to have updated, or + * num_counters if we want all stats updated + * Return codes - + * < 0 - Error, no counters updated + * index - Updated the single counter pointed to by index + * num_counters - Updated all counters (will reset the timestamp + * and prevent further calls for lifespan milliseconds) + * Drivers are allowed to update all counters in leiu of just the + * one given in index at their option + */ + int (*get_hw_stats)(struct ib_device *device, + struct rdma_hw_stats *stats, u8 port, int index); +}; + struct ib_device { /* Do not access @dma_device directly from ULP nor from HW drivers. */ struct device *dma_device; @@ -2660,6 +2903,8 @@ void ib_unregister_client(struct ib_client *client); void *ib_get_client_data(struct ib_device *device, struct ib_client *client); void ib_set_client_data(struct ib_device *device, struct ib_client *client, void *data); +void ib_set_device_ops(struct ib_device *device, + const struct ib_device_ops *ops); #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
This change introduce the ib_device_ops structure that defines all the InfiniBand device operations in one place, so the code will be more readable and clean, unlike today that the ops are mixed with ib_device data members. The providers will need to define the supported operations and assign them using ib_device_ops(), that will also make the providers code more readable and clean. Signed-off-by: Kamal Heib <kamalheib1@gmail.com> --- drivers/infiniband/core/device.c | 98 +++++++++++++ include/rdma/ib_verbs.h | 245 +++++++++++++++++++++++++++++++ 2 files changed, 343 insertions(+)