diff mbox series

[rdma-next,v5,02/20] RDMA/core: Introduce ib_device_ops

Message ID 20181210190948.6892-3-kamalheib1@gmail.com (mailing list archive)
State Accepted
Delegated to: Jason Gunthorpe
Headers show
Series RDMA: Add support for ib_device_ops | expand

Commit Message

Kamal Heib Dec. 10, 2018, 7:09 p.m. UTC
This change introduce the ib_device_ops structure that defines all the
InfiniBand device operations in one place, so the code will be more
readable and clean, unlike today that the ops are mixed with ib_device
data members.

The providers will need to define the supported operations and assign
them using ib_device_ops(), that will also make the providers code more
readable and clean.

Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
---
 drivers/infiniband/core/device.c |  98 +++++++++++++
 include/rdma/ib_verbs.h          | 245 +++++++++++++++++++++++++++++++
 2 files changed, 343 insertions(+)

Comments

Yuval Shaia Dec. 10, 2018, 8:46 p.m. UTC | #1
On Mon, Dec 10, 2018 at 09:09:30PM +0200, Kamal Heib wrote:
> This change introduce the ib_device_ops structure that defines all the
> InfiniBand device operations in one place, so the code will be more
> readable and clean, unlike today that the ops are mixed with ib_device
> data members.
> 
> The providers will need to define the supported operations and assign
> them using ib_device_ops(), that will also make the providers code more

You mean ib_set_device_ops, right?

> readable and clean.
> 
> Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
> ---
>  drivers/infiniband/core/device.c |  98 +++++++++++++
>  include/rdma/ib_verbs.h          | 245 +++++++++++++++++++++++++++++++
>  2 files changed, 343 insertions(+)
> 
> diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
> index 0027b0d79b09..3589894c46b8 100644
> --- a/drivers/infiniband/core/device.c
> +++ b/drivers/infiniband/core/device.c
> @@ -1216,6 +1216,104 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
>  }
>  EXPORT_SYMBOL(ib_get_net_dev_by_params);
>  
> +void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
> +{
> +#define SET_DEVICE_OP(ptr, name)                                               \
> +	do {                                                                   \
> +		if (ops->name)                                                 \
> +			if (!((ptr)->name))				       \
> +				(ptr)->name = ops->name;                       \
> +	} while (0)
> +
> +	SET_DEVICE_OP(dev, add_gid);
> +	SET_DEVICE_OP(dev, alloc_dm);
> +	SET_DEVICE_OP(dev, alloc_fmr);
> +	SET_DEVICE_OP(dev, alloc_hw_stats);
> +	SET_DEVICE_OP(dev, alloc_mr);
> +	SET_DEVICE_OP(dev, alloc_mw);
> +	SET_DEVICE_OP(dev, alloc_pd);
> +	SET_DEVICE_OP(dev, alloc_rdma_netdev);
> +	SET_DEVICE_OP(dev, alloc_ucontext);
> +	SET_DEVICE_OP(dev, alloc_xrcd);
> +	SET_DEVICE_OP(dev, attach_mcast);
> +	SET_DEVICE_OP(dev, check_mr_status);
> +	SET_DEVICE_OP(dev, create_ah);
> +	SET_DEVICE_OP(dev, create_counters);
> +	SET_DEVICE_OP(dev, create_cq);
> +	SET_DEVICE_OP(dev, create_flow);
> +	SET_DEVICE_OP(dev, create_flow_action_esp);
> +	SET_DEVICE_OP(dev, create_qp);
> +	SET_DEVICE_OP(dev, create_rwq_ind_table);
> +	SET_DEVICE_OP(dev, create_srq);
> +	SET_DEVICE_OP(dev, create_wq);
> +	SET_DEVICE_OP(dev, dealloc_dm);
> +	SET_DEVICE_OP(dev, dealloc_fmr);
> +	SET_DEVICE_OP(dev, dealloc_mw);
> +	SET_DEVICE_OP(dev, dealloc_pd);
> +	SET_DEVICE_OP(dev, dealloc_ucontext);
> +	SET_DEVICE_OP(dev, dealloc_xrcd);
> +	SET_DEVICE_OP(dev, del_gid);
> +	SET_DEVICE_OP(dev, dereg_mr);
> +	SET_DEVICE_OP(dev, destroy_ah);
> +	SET_DEVICE_OP(dev, destroy_counters);
> +	SET_DEVICE_OP(dev, destroy_cq);
> +	SET_DEVICE_OP(dev, destroy_flow);
> +	SET_DEVICE_OP(dev, destroy_flow_action);
> +	SET_DEVICE_OP(dev, destroy_qp);
> +	SET_DEVICE_OP(dev, destroy_rwq_ind_table);
> +	SET_DEVICE_OP(dev, destroy_srq);
> +	SET_DEVICE_OP(dev, destroy_wq);
> +	SET_DEVICE_OP(dev, detach_mcast);
> +	SET_DEVICE_OP(dev, disassociate_ucontext);
> +	SET_DEVICE_OP(dev, drain_rq);
> +	SET_DEVICE_OP(dev, drain_sq);
> +	SET_DEVICE_OP(dev, get_dev_fw_str);
> +	SET_DEVICE_OP(dev, get_dma_mr);
> +	SET_DEVICE_OP(dev, get_hw_stats);
> +	SET_DEVICE_OP(dev, get_link_layer);
> +	SET_DEVICE_OP(dev, get_netdev);
> +	SET_DEVICE_OP(dev, get_port_immutable);
> +	SET_DEVICE_OP(dev, get_vector_affinity);
> +	SET_DEVICE_OP(dev, get_vf_config);
> +	SET_DEVICE_OP(dev, get_vf_stats);
> +	SET_DEVICE_OP(dev, map_mr_sg);
> +	SET_DEVICE_OP(dev, map_phys_fmr);
> +	SET_DEVICE_OP(dev, mmap);
> +	SET_DEVICE_OP(dev, modify_ah);
> +	SET_DEVICE_OP(dev, modify_cq);
> +	SET_DEVICE_OP(dev, modify_device);
> +	SET_DEVICE_OP(dev, modify_flow_action_esp);
> +	SET_DEVICE_OP(dev, modify_port);
> +	SET_DEVICE_OP(dev, modify_qp);
> +	SET_DEVICE_OP(dev, modify_srq);
> +	SET_DEVICE_OP(dev, modify_wq);
> +	SET_DEVICE_OP(dev, peek_cq);
> +	SET_DEVICE_OP(dev, poll_cq);
> +	SET_DEVICE_OP(dev, post_recv);
> +	SET_DEVICE_OP(dev, post_send);
> +	SET_DEVICE_OP(dev, post_srq_recv);
> +	SET_DEVICE_OP(dev, process_mad);
> +	SET_DEVICE_OP(dev, query_ah);
> +	SET_DEVICE_OP(dev, query_device);
> +	SET_DEVICE_OP(dev, query_gid);
> +	SET_DEVICE_OP(dev, query_pkey);
> +	SET_DEVICE_OP(dev, query_port);
> +	SET_DEVICE_OP(dev, query_qp);
> +	SET_DEVICE_OP(dev, query_srq);
> +	SET_DEVICE_OP(dev, rdma_netdev_get_params);
> +	SET_DEVICE_OP(dev, read_counters);
> +	SET_DEVICE_OP(dev, reg_dm_mr);
> +	SET_DEVICE_OP(dev, reg_user_mr);
> +	SET_DEVICE_OP(dev, req_ncomp_notif);
> +	SET_DEVICE_OP(dev, req_notify_cq);
> +	SET_DEVICE_OP(dev, rereg_user_mr);
> +	SET_DEVICE_OP(dev, resize_cq);
> +	SET_DEVICE_OP(dev, set_vf_guid);
> +	SET_DEVICE_OP(dev, set_vf_link_state);
> +	SET_DEVICE_OP(dev, unmap_fmr);
> +}
> +EXPORT_SYMBOL(ib_set_device_ops);
> +
>  static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
>  	[RDMA_NL_LS_OP_RESOLVE] = {
>  		.doit = ib_nl_handle_resolve_resp,
> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> index 85021451eee0..3a02c72e7620 100644
> --- a/include/rdma/ib_verbs.h
> +++ b/include/rdma/ib_verbs.h
> @@ -2257,6 +2257,249 @@ struct ib_counters_read_attr {
>  
>  struct uverbs_attr_bundle;
>  
> +/**
> + * struct ib_device_ops - InfiniBand device operations
> + * This structure defines all the InfiniBand device operations, providers will
> + * need to define the supported operations, otherwise they will be set to null.
> + */
> +struct ib_device_ops {
> +	int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr,
> +			 const struct ib_send_wr **bad_send_wr);
> +	int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
> +			 const struct ib_recv_wr **bad_recv_wr);
> +	void (*drain_rq)(struct ib_qp *qp);
> +	void (*drain_sq)(struct ib_qp *qp);
> +	int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
> +	int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
> +	int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags);
> +	int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt);
> +	int (*post_srq_recv)(struct ib_srq *srq,
> +			     const struct ib_recv_wr *recv_wr,
> +			     const struct ib_recv_wr **bad_recv_wr);
> +	int (*process_mad)(struct ib_device *device, int process_mad_flags,
> +			   u8 port_num, const struct ib_wc *in_wc,
> +			   const struct ib_grh *in_grh,
> +			   const struct ib_mad_hdr *in_mad, size_t in_mad_size,
> +			   struct ib_mad_hdr *out_mad, size_t *out_mad_size,
> +			   u16 *out_mad_pkey_index);
> +	int (*query_device)(struct ib_device *device,
> +			    struct ib_device_attr *device_attr,
> +			    struct ib_udata *udata);
> +	int (*modify_device)(struct ib_device *device, int device_modify_mask,
> +			     struct ib_device_modify *device_modify);
> +	void (*get_dev_fw_str)(struct ib_device *, char *str);
> +	const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
> +						     int comp_vector);
> +	int (*query_port)(struct ib_device *device, u8 port_num,
> +			  struct ib_port_attr *port_attr);
> +	int (*modify_port)(struct ib_device *device, u8 port_num,
> +			   int port_modify_mask,
> +			   struct ib_port_modify *port_modify);
> +	/**
> +	 * The following mandatory functions are used only at device
> +	 * registration.  Keep functions such as these at the end of this
> +	 * structure to avoid cache line misses when accessing struct ib_device
> +	 * in fast paths.
> +	 */
> +	int (*get_port_immutable)(struct ib_device *, u8,
> +				  struct ib_port_immutable *);
> +	enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
> +					       u8 port_num);
> +	/**
> +	 * When calling get_netdev, the HW vendor's driver should return the
> +	 * net device of device @device at port @port_num or NULL if such
> +	 * a net device doesn't exist. The vendor driver should call dev_hold
> +	 * on this net device. The HW vendor's device driver must guarantee
> +	 * that this function returns NULL before the net device has finished
> +	 * NETDEV_UNREGISTER state.
> +	 */
> +	struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num);
> +	/**
> +	 * rdma netdev operation
> +	 *
> +	 * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params
> +	 * must return -EOPNOTSUPP if it doesn't support the specified type.
> +	 */
> +	struct net_device *(*alloc_rdma_netdev)(
> +					struct ib_device *device,
> +					u8 port_num,
> +					enum rdma_netdev_t type,
> +					const char *name,
> +					unsigned char name_assign_type,
> +					void (*setup)(struct net_device *));
> +
> +	int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num,
> +				      enum rdma_netdev_t type,
> +				      struct rdma_netdev_alloc_params *params);
> +	/**
> +	 * query_gid should be return GID value for @device, when @port_num
> +	 * link layer is either IB or iWarp. It is no-op if @port_num port
> +	 * is RoCE link layer.
> +	 */
> +	int (*query_gid)(struct ib_device *device, u8 port_num, int index,
> +			 union ib_gid *gid);
> +	/**
> +	 * When calling add_gid, the HW vendor's driver should add the gid
> +	 * of device of port at gid index available at @attr. Meta-info of
> +	 * that gid (for example, the network device related to this gid) is
> +	 * available at @attr. @context allows the HW vendor driver to store
> +	 * extra information together with a GID entry. The HW vendor driver may
> +	 * allocate memory to contain this information and store it in @context
> +	 * when a new GID entry is written to. Params are consistent until the
> +	 * next call of add_gid or delete_gid. The function should return 0 on
> +	 * success or error otherwise. The function could be called
> +	 * concurrently for different ports. This function is only called when
> +	 * roce_gid_table is used.
> +	 */
> +	int (*add_gid)(const struct ib_gid_attr *attr, void **context);
> +	/**
> +	 * When calling del_gid, the HW vendor's driver should delete the
> +	 * gid of device @device at gid index gid_index of port port_num
> +	 * available in @attr.
> +	 * Upon the deletion of a GID entry, the HW vendor must free any
> +	 * allocated memory. The caller will clear @context afterwards.
> +	 * This function is only called when roce_gid_table is used.
> +	 */
> +	int (*del_gid)(const struct ib_gid_attr *attr, void **context);
> +	int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index,
> +			  u16 *pkey);
> +	struct ib_ucontext *(*alloc_ucontext)(struct ib_device *device,
> +					      struct ib_udata *udata);
> +	int (*dealloc_ucontext)(struct ib_ucontext *context);
> +	int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
> +	void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
> +	struct ib_pd *(*alloc_pd)(struct ib_device *device,
> +				  struct ib_ucontext *context,
> +				  struct ib_udata *udata);
> +	int (*dealloc_pd)(struct ib_pd *pd);
> +	struct ib_ah *(*create_ah)(struct ib_pd *pd,
> +				   struct rdma_ah_attr *ah_attr,
> +				   struct ib_udata *udata);
> +	int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
> +	int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
> +	int (*destroy_ah)(struct ib_ah *ah);
> +	struct ib_srq *(*create_srq)(struct ib_pd *pd,
> +				     struct ib_srq_init_attr *srq_init_attr,
> +				     struct ib_udata *udata);
> +	int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
> +			  enum ib_srq_attr_mask srq_attr_mask,
> +			  struct ib_udata *udata);
> +	int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
> +	int (*destroy_srq)(struct ib_srq *srq);
> +	struct ib_qp *(*create_qp)(struct ib_pd *pd,
> +				   struct ib_qp_init_attr *qp_init_attr,
> +				   struct ib_udata *udata);
> +	int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
> +			 int qp_attr_mask, struct ib_udata *udata);
> +	int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
> +			int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
> +	int (*destroy_qp)(struct ib_qp *qp);
> +	struct ib_cq *(*create_cq)(struct ib_device *device,
> +				   const struct ib_cq_init_attr *attr,
> +				   struct ib_ucontext *context,
> +				   struct ib_udata *udata);
> +	int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
> +	int (*destroy_cq)(struct ib_cq *cq);
> +	int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
> +	struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
> +	struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
> +				     u64 virt_addr, int mr_access_flags,
> +				     struct ib_udata *udata);
> +	int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length,
> +			     u64 virt_addr, int mr_access_flags,
> +			     struct ib_pd *pd, struct ib_udata *udata);
> +	int (*dereg_mr)(struct ib_mr *mr);
> +	struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
> +				  u32 max_num_sg);
> +	int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
> +			 unsigned int *sg_offset);
> +	int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
> +			       struct ib_mr_status *mr_status);
> +	struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type,
> +				  struct ib_udata *udata);
> +	int (*dealloc_mw)(struct ib_mw *mw);
> +	struct ib_fmr *(*alloc_fmr)(struct ib_pd *pd, int mr_access_flags,
> +				    struct ib_fmr_attr *fmr_attr);
> +	int (*map_phys_fmr)(struct ib_fmr *fmr, u64 *page_list, int list_len,
> +			    u64 iova);
> +	int (*unmap_fmr)(struct list_head *fmr_list);
> +	int (*dealloc_fmr)(struct ib_fmr *fmr);
> +	int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
> +	int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
> +	struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device,
> +				      struct ib_ucontext *ucontext,
> +				      struct ib_udata *udata);
> +	int (*dealloc_xrcd)(struct ib_xrcd *xrcd);
> +	struct ib_flow *(*create_flow)(struct ib_qp *qp,
> +				       struct ib_flow_attr *flow_attr,
> +				       int domain, struct ib_udata *udata);
> +	int (*destroy_flow)(struct ib_flow *flow_id);
> +	struct ib_flow_action *(*create_flow_action_esp)(
> +		struct ib_device *device,
> +		const struct ib_flow_action_attrs_esp *attr,
> +		struct uverbs_attr_bundle *attrs);
> +	int (*destroy_flow_action)(struct ib_flow_action *action);
> +	int (*modify_flow_action_esp)(
> +		struct ib_flow_action *action,
> +		const struct ib_flow_action_attrs_esp *attr,
> +		struct uverbs_attr_bundle *attrs);
> +	int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port,
> +				 int state);
> +	int (*get_vf_config)(struct ib_device *device, int vf, u8 port,
> +			     struct ifla_vf_info *ivf);
> +	int (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
> +			    struct ifla_vf_stats *stats);
> +	int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
> +			   int type);
> +	struct ib_wq *(*create_wq)(struct ib_pd *pd,
> +				   struct ib_wq_init_attr *init_attr,
> +				   struct ib_udata *udata);
> +	int (*destroy_wq)(struct ib_wq *wq);
> +	int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr,
> +			 u32 wq_attr_mask, struct ib_udata *udata);
> +	struct ib_rwq_ind_table *(*create_rwq_ind_table)(
> +		struct ib_device *device,
> +		struct ib_rwq_ind_table_init_attr *init_attr,
> +		struct ib_udata *udata);
> +	int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
> +	struct ib_dm *(*alloc_dm)(struct ib_device *device,
> +				  struct ib_ucontext *context,
> +				  struct ib_dm_alloc_attr *attr,
> +				  struct uverbs_attr_bundle *attrs);
> +	int (*dealloc_dm)(struct ib_dm *dm);
> +	struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
> +				   struct ib_dm_mr_attr *attr,
> +				   struct uverbs_attr_bundle *attrs);
> +	struct ib_counters *(*create_counters)(
> +		struct ib_device *device, struct uverbs_attr_bundle *attrs);
> +	int (*destroy_counters)(struct ib_counters *counters);
> +	int (*read_counters)(struct ib_counters *counters,
> +			     struct ib_counters_read_attr *counters_read_attr,
> +			     struct uverbs_attr_bundle *attrs);
> +	/**
> +	 * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the
> +	 *   driver initialized data.  The struct is kfree()'ed by the sysfs
> +	 *   core when the device is removed.  A lifespan of -1 in the return
> +	 *   struct tells the core to set a default lifespan.
> +	 */
> +	struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device,
> +						u8 port_num);
> +	/**
> +	 * get_hw_stats - Fill in the counter value(s) in the stats struct.
> +	 * @index - The index in the value array we wish to have updated, or
> +	 *   num_counters if we want all stats updated
> +	 * Return codes -
> +	 *   < 0 - Error, no counters updated
> +	 *   index - Updated the single counter pointed to by index
> +	 *   num_counters - Updated all counters (will reset the timestamp
> +	 *     and prevent further calls for lifespan milliseconds)
> +	 * Drivers are allowed to update all counters in leiu of just the
> +	 *   one given in index at their option
> +	 */
> +	int (*get_hw_stats)(struct ib_device *device,
> +			    struct rdma_hw_stats *stats, u8 port, int index);
> +};
> +

So now we have each function declared both in ib_device and in
ib_device_ops?

Will it be easier to remove all declarations from ib_device and add new
member struct ib_device_ops *ops to ib_device this way objective will be
achieved (code more readable) while still preserving single place of
declaration?

>  struct ib_device {
>  	/* Do not access @dma_device directly from ULP nor from HW drivers. */
>  	struct device                *dma_device;
> @@ -2660,6 +2903,8 @@ void ib_unregister_client(struct ib_client *client);
>  void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
>  void  ib_set_client_data(struct ib_device *device, struct ib_client *client,
>  			 void *data);
> +void ib_set_device_ops(struct ib_device *device,
> +		       const struct ib_device_ops *ops);
>  
>  #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
>  int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
> -- 
> 2.19.2
>
Kamal Heib Dec. 10, 2018, 9:14 p.m. UTC | #2
On Mon, Dec 10, 2018 at 10:46:48PM +0200, Yuval Shaia wrote:
> On Mon, Dec 10, 2018 at 09:09:30PM +0200, Kamal Heib wrote:
> > This change introduce the ib_device_ops structure that defines all the
> > InfiniBand device operations in one place, so the code will be more
> > readable and clean, unlike today that the ops are mixed with ib_device
> > data members.
> > 
> > The providers will need to define the supported operations and assign
> > them using ib_device_ops(), that will also make the providers code more
> 
> You mean ib_set_device_ops, right?

Correct, it should be ib_set_device_ops().

Jason, Please let me know if I need to re-send this patch with a fixes
commit message?

> 
> > readable and clean.
> > 
> > Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
> > ---
> >  drivers/infiniband/core/device.c |  98 +++++++++++++
> >  include/rdma/ib_verbs.h          | 245 +++++++++++++++++++++++++++++++
> >  2 files changed, 343 insertions(+)
> > 
> > diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
> > index 0027b0d79b09..3589894c46b8 100644
> > --- a/drivers/infiniband/core/device.c
> > +++ b/drivers/infiniband/core/device.c
> > @@ -1216,6 +1216,104 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
> >  }
> >  EXPORT_SYMBOL(ib_get_net_dev_by_params);
> >  
> > +void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
> > +{
> > +#define SET_DEVICE_OP(ptr, name)                                               \
> > +	do {                                                                   \
> > +		if (ops->name)                                                 \
> > +			if (!((ptr)->name))				       \
> > +				(ptr)->name = ops->name;                       \
> > +	} while (0)
> > +
> > +	SET_DEVICE_OP(dev, add_gid);
> > +	SET_DEVICE_OP(dev, alloc_dm);
> > +	SET_DEVICE_OP(dev, alloc_fmr);
> > +	SET_DEVICE_OP(dev, alloc_hw_stats);
> > +	SET_DEVICE_OP(dev, alloc_mr);
> > +	SET_DEVICE_OP(dev, alloc_mw);
> > +	SET_DEVICE_OP(dev, alloc_pd);
> > +	SET_DEVICE_OP(dev, alloc_rdma_netdev);
> > +	SET_DEVICE_OP(dev, alloc_ucontext);
> > +	SET_DEVICE_OP(dev, alloc_xrcd);
> > +	SET_DEVICE_OP(dev, attach_mcast);
> > +	SET_DEVICE_OP(dev, check_mr_status);
> > +	SET_DEVICE_OP(dev, create_ah);
> > +	SET_DEVICE_OP(dev, create_counters);
> > +	SET_DEVICE_OP(dev, create_cq);
> > +	SET_DEVICE_OP(dev, create_flow);
> > +	SET_DEVICE_OP(dev, create_flow_action_esp);
> > +	SET_DEVICE_OP(dev, create_qp);
> > +	SET_DEVICE_OP(dev, create_rwq_ind_table);
> > +	SET_DEVICE_OP(dev, create_srq);
> > +	SET_DEVICE_OP(dev, create_wq);
> > +	SET_DEVICE_OP(dev, dealloc_dm);
> > +	SET_DEVICE_OP(dev, dealloc_fmr);
> > +	SET_DEVICE_OP(dev, dealloc_mw);
> > +	SET_DEVICE_OP(dev, dealloc_pd);
> > +	SET_DEVICE_OP(dev, dealloc_ucontext);
> > +	SET_DEVICE_OP(dev, dealloc_xrcd);
> > +	SET_DEVICE_OP(dev, del_gid);
> > +	SET_DEVICE_OP(dev, dereg_mr);
> > +	SET_DEVICE_OP(dev, destroy_ah);
> > +	SET_DEVICE_OP(dev, destroy_counters);
> > +	SET_DEVICE_OP(dev, destroy_cq);
> > +	SET_DEVICE_OP(dev, destroy_flow);
> > +	SET_DEVICE_OP(dev, destroy_flow_action);
> > +	SET_DEVICE_OP(dev, destroy_qp);
> > +	SET_DEVICE_OP(dev, destroy_rwq_ind_table);
> > +	SET_DEVICE_OP(dev, destroy_srq);
> > +	SET_DEVICE_OP(dev, destroy_wq);
> > +	SET_DEVICE_OP(dev, detach_mcast);
> > +	SET_DEVICE_OP(dev, disassociate_ucontext);
> > +	SET_DEVICE_OP(dev, drain_rq);
> > +	SET_DEVICE_OP(dev, drain_sq);
> > +	SET_DEVICE_OP(dev, get_dev_fw_str);
> > +	SET_DEVICE_OP(dev, get_dma_mr);
> > +	SET_DEVICE_OP(dev, get_hw_stats);
> > +	SET_DEVICE_OP(dev, get_link_layer);
> > +	SET_DEVICE_OP(dev, get_netdev);
> > +	SET_DEVICE_OP(dev, get_port_immutable);
> > +	SET_DEVICE_OP(dev, get_vector_affinity);
> > +	SET_DEVICE_OP(dev, get_vf_config);
> > +	SET_DEVICE_OP(dev, get_vf_stats);
> > +	SET_DEVICE_OP(dev, map_mr_sg);
> > +	SET_DEVICE_OP(dev, map_phys_fmr);
> > +	SET_DEVICE_OP(dev, mmap);
> > +	SET_DEVICE_OP(dev, modify_ah);
> > +	SET_DEVICE_OP(dev, modify_cq);
> > +	SET_DEVICE_OP(dev, modify_device);
> > +	SET_DEVICE_OP(dev, modify_flow_action_esp);
> > +	SET_DEVICE_OP(dev, modify_port);
> > +	SET_DEVICE_OP(dev, modify_qp);
> > +	SET_DEVICE_OP(dev, modify_srq);
> > +	SET_DEVICE_OP(dev, modify_wq);
> > +	SET_DEVICE_OP(dev, peek_cq);
> > +	SET_DEVICE_OP(dev, poll_cq);
> > +	SET_DEVICE_OP(dev, post_recv);
> > +	SET_DEVICE_OP(dev, post_send);
> > +	SET_DEVICE_OP(dev, post_srq_recv);
> > +	SET_DEVICE_OP(dev, process_mad);
> > +	SET_DEVICE_OP(dev, query_ah);
> > +	SET_DEVICE_OP(dev, query_device);
> > +	SET_DEVICE_OP(dev, query_gid);
> > +	SET_DEVICE_OP(dev, query_pkey);
> > +	SET_DEVICE_OP(dev, query_port);
> > +	SET_DEVICE_OP(dev, query_qp);
> > +	SET_DEVICE_OP(dev, query_srq);
> > +	SET_DEVICE_OP(dev, rdma_netdev_get_params);
> > +	SET_DEVICE_OP(dev, read_counters);
> > +	SET_DEVICE_OP(dev, reg_dm_mr);
> > +	SET_DEVICE_OP(dev, reg_user_mr);
> > +	SET_DEVICE_OP(dev, req_ncomp_notif);
> > +	SET_DEVICE_OP(dev, req_notify_cq);
> > +	SET_DEVICE_OP(dev, rereg_user_mr);
> > +	SET_DEVICE_OP(dev, resize_cq);
> > +	SET_DEVICE_OP(dev, set_vf_guid);
> > +	SET_DEVICE_OP(dev, set_vf_link_state);
> > +	SET_DEVICE_OP(dev, unmap_fmr);
> > +}
> > +EXPORT_SYMBOL(ib_set_device_ops);
> > +
> >  static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
> >  	[RDMA_NL_LS_OP_RESOLVE] = {
> >  		.doit = ib_nl_handle_resolve_resp,
> > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> > index 85021451eee0..3a02c72e7620 100644
> > --- a/include/rdma/ib_verbs.h
> > +++ b/include/rdma/ib_verbs.h
> > @@ -2257,6 +2257,249 @@ struct ib_counters_read_attr {
> >  
> >  struct uverbs_attr_bundle;
> >  
> > +/**
> > + * struct ib_device_ops - InfiniBand device operations
> > + * This structure defines all the InfiniBand device operations, providers will
> > + * need to define the supported operations, otherwise they will be set to null.
> > + */
> > +struct ib_device_ops {
> > +	int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr,
> > +			 const struct ib_send_wr **bad_send_wr);
> > +	int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
> > +			 const struct ib_recv_wr **bad_recv_wr);
> > +	void (*drain_rq)(struct ib_qp *qp);
> > +	void (*drain_sq)(struct ib_qp *qp);
> > +	int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
> > +	int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
> > +	int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags);
> > +	int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt);
> > +	int (*post_srq_recv)(struct ib_srq *srq,
> > +			     const struct ib_recv_wr *recv_wr,
> > +			     const struct ib_recv_wr **bad_recv_wr);
> > +	int (*process_mad)(struct ib_device *device, int process_mad_flags,
> > +			   u8 port_num, const struct ib_wc *in_wc,
> > +			   const struct ib_grh *in_grh,
> > +			   const struct ib_mad_hdr *in_mad, size_t in_mad_size,
> > +			   struct ib_mad_hdr *out_mad, size_t *out_mad_size,
> > +			   u16 *out_mad_pkey_index);
> > +	int (*query_device)(struct ib_device *device,
> > +			    struct ib_device_attr *device_attr,
> > +			    struct ib_udata *udata);
> > +	int (*modify_device)(struct ib_device *device, int device_modify_mask,
> > +			     struct ib_device_modify *device_modify);
> > +	void (*get_dev_fw_str)(struct ib_device *, char *str);
> > +	const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
> > +						     int comp_vector);
> > +	int (*query_port)(struct ib_device *device, u8 port_num,
> > +			  struct ib_port_attr *port_attr);
> > +	int (*modify_port)(struct ib_device *device, u8 port_num,
> > +			   int port_modify_mask,
> > +			   struct ib_port_modify *port_modify);
> > +	/**
> > +	 * The following mandatory functions are used only at device
> > +	 * registration.  Keep functions such as these at the end of this
> > +	 * structure to avoid cache line misses when accessing struct ib_device
> > +	 * in fast paths.
> > +	 */
> > +	int (*get_port_immutable)(struct ib_device *, u8,
> > +				  struct ib_port_immutable *);
> > +	enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
> > +					       u8 port_num);
> > +	/**
> > +	 * When calling get_netdev, the HW vendor's driver should return the
> > +	 * net device of device @device at port @port_num or NULL if such
> > +	 * a net device doesn't exist. The vendor driver should call dev_hold
> > +	 * on this net device. The HW vendor's device driver must guarantee
> > +	 * that this function returns NULL before the net device has finished
> > +	 * NETDEV_UNREGISTER state.
> > +	 */
> > +	struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num);
> > +	/**
> > +	 * rdma netdev operation
> > +	 *
> > +	 * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params
> > +	 * must return -EOPNOTSUPP if it doesn't support the specified type.
> > +	 */
> > +	struct net_device *(*alloc_rdma_netdev)(
> > +					struct ib_device *device,
> > +					u8 port_num,
> > +					enum rdma_netdev_t type,
> > +					const char *name,
> > +					unsigned char name_assign_type,
> > +					void (*setup)(struct net_device *));
> > +
> > +	int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num,
> > +				      enum rdma_netdev_t type,
> > +				      struct rdma_netdev_alloc_params *params);
> > +	/**
> > +	 * query_gid should be return GID value for @device, when @port_num
> > +	 * link layer is either IB or iWarp. It is no-op if @port_num port
> > +	 * is RoCE link layer.
> > +	 */
> > +	int (*query_gid)(struct ib_device *device, u8 port_num, int index,
> > +			 union ib_gid *gid);
> > +	/**
> > +	 * When calling add_gid, the HW vendor's driver should add the gid
> > +	 * of device of port at gid index available at @attr. Meta-info of
> > +	 * that gid (for example, the network device related to this gid) is
> > +	 * available at @attr. @context allows the HW vendor driver to store
> > +	 * extra information together with a GID entry. The HW vendor driver may
> > +	 * allocate memory to contain this information and store it in @context
> > +	 * when a new GID entry is written to. Params are consistent until the
> > +	 * next call of add_gid or delete_gid. The function should return 0 on
> > +	 * success or error otherwise. The function could be called
> > +	 * concurrently for different ports. This function is only called when
> > +	 * roce_gid_table is used.
> > +	 */
> > +	int (*add_gid)(const struct ib_gid_attr *attr, void **context);
> > +	/**
> > +	 * When calling del_gid, the HW vendor's driver should delete the
> > +	 * gid of device @device at gid index gid_index of port port_num
> > +	 * available in @attr.
> > +	 * Upon the deletion of a GID entry, the HW vendor must free any
> > +	 * allocated memory. The caller will clear @context afterwards.
> > +	 * This function is only called when roce_gid_table is used.
> > +	 */
> > +	int (*del_gid)(const struct ib_gid_attr *attr, void **context);
> > +	int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index,
> > +			  u16 *pkey);
> > +	struct ib_ucontext *(*alloc_ucontext)(struct ib_device *device,
> > +					      struct ib_udata *udata);
> > +	int (*dealloc_ucontext)(struct ib_ucontext *context);
> > +	int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
> > +	void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
> > +	struct ib_pd *(*alloc_pd)(struct ib_device *device,
> > +				  struct ib_ucontext *context,
> > +				  struct ib_udata *udata);
> > +	int (*dealloc_pd)(struct ib_pd *pd);
> > +	struct ib_ah *(*create_ah)(struct ib_pd *pd,
> > +				   struct rdma_ah_attr *ah_attr,
> > +				   struct ib_udata *udata);
> > +	int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
> > +	int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
> > +	int (*destroy_ah)(struct ib_ah *ah);
> > +	struct ib_srq *(*create_srq)(struct ib_pd *pd,
> > +				     struct ib_srq_init_attr *srq_init_attr,
> > +				     struct ib_udata *udata);
> > +	int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
> > +			  enum ib_srq_attr_mask srq_attr_mask,
> > +			  struct ib_udata *udata);
> > +	int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
> > +	int (*destroy_srq)(struct ib_srq *srq);
> > +	struct ib_qp *(*create_qp)(struct ib_pd *pd,
> > +				   struct ib_qp_init_attr *qp_init_attr,
> > +				   struct ib_udata *udata);
> > +	int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
> > +			 int qp_attr_mask, struct ib_udata *udata);
> > +	int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
> > +			int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
> > +	int (*destroy_qp)(struct ib_qp *qp);
> > +	struct ib_cq *(*create_cq)(struct ib_device *device,
> > +				   const struct ib_cq_init_attr *attr,
> > +				   struct ib_ucontext *context,
> > +				   struct ib_udata *udata);
> > +	int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
> > +	int (*destroy_cq)(struct ib_cq *cq);
> > +	int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
> > +	struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
> > +	struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
> > +				     u64 virt_addr, int mr_access_flags,
> > +				     struct ib_udata *udata);
> > +	int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length,
> > +			     u64 virt_addr, int mr_access_flags,
> > +			     struct ib_pd *pd, struct ib_udata *udata);
> > +	int (*dereg_mr)(struct ib_mr *mr);
> > +	struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
> > +				  u32 max_num_sg);
> > +	int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
> > +			 unsigned int *sg_offset);
> > +	int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
> > +			       struct ib_mr_status *mr_status);
> > +	struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type,
> > +				  struct ib_udata *udata);
> > +	int (*dealloc_mw)(struct ib_mw *mw);
> > +	struct ib_fmr *(*alloc_fmr)(struct ib_pd *pd, int mr_access_flags,
> > +				    struct ib_fmr_attr *fmr_attr);
> > +	int (*map_phys_fmr)(struct ib_fmr *fmr, u64 *page_list, int list_len,
> > +			    u64 iova);
> > +	int (*unmap_fmr)(struct list_head *fmr_list);
> > +	int (*dealloc_fmr)(struct ib_fmr *fmr);
> > +	int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
> > +	int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
> > +	struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device,
> > +				      struct ib_ucontext *ucontext,
> > +				      struct ib_udata *udata);
> > +	int (*dealloc_xrcd)(struct ib_xrcd *xrcd);
> > +	struct ib_flow *(*create_flow)(struct ib_qp *qp,
> > +				       struct ib_flow_attr *flow_attr,
> > +				       int domain, struct ib_udata *udata);
> > +	int (*destroy_flow)(struct ib_flow *flow_id);
> > +	struct ib_flow_action *(*create_flow_action_esp)(
> > +		struct ib_device *device,
> > +		const struct ib_flow_action_attrs_esp *attr,
> > +		struct uverbs_attr_bundle *attrs);
> > +	int (*destroy_flow_action)(struct ib_flow_action *action);
> > +	int (*modify_flow_action_esp)(
> > +		struct ib_flow_action *action,
> > +		const struct ib_flow_action_attrs_esp *attr,
> > +		struct uverbs_attr_bundle *attrs);
> > +	int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port,
> > +				 int state);
> > +	int (*get_vf_config)(struct ib_device *device, int vf, u8 port,
> > +			     struct ifla_vf_info *ivf);
> > +	int (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
> > +			    struct ifla_vf_stats *stats);
> > +	int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
> > +			   int type);
> > +	struct ib_wq *(*create_wq)(struct ib_pd *pd,
> > +				   struct ib_wq_init_attr *init_attr,
> > +				   struct ib_udata *udata);
> > +	int (*destroy_wq)(struct ib_wq *wq);
> > +	int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr,
> > +			 u32 wq_attr_mask, struct ib_udata *udata);
> > +	struct ib_rwq_ind_table *(*create_rwq_ind_table)(
> > +		struct ib_device *device,
> > +		struct ib_rwq_ind_table_init_attr *init_attr,
> > +		struct ib_udata *udata);
> > +	int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
> > +	struct ib_dm *(*alloc_dm)(struct ib_device *device,
> > +				  struct ib_ucontext *context,
> > +				  struct ib_dm_alloc_attr *attr,
> > +				  struct uverbs_attr_bundle *attrs);
> > +	int (*dealloc_dm)(struct ib_dm *dm);
> > +	struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
> > +				   struct ib_dm_mr_attr *attr,
> > +				   struct uverbs_attr_bundle *attrs);
> > +	struct ib_counters *(*create_counters)(
> > +		struct ib_device *device, struct uverbs_attr_bundle *attrs);
> > +	int (*destroy_counters)(struct ib_counters *counters);
> > +	int (*read_counters)(struct ib_counters *counters,
> > +			     struct ib_counters_read_attr *counters_read_attr,
> > +			     struct uverbs_attr_bundle *attrs);
> > +	/**
> > +	 * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the
> > +	 *   driver initialized data.  The struct is kfree()'ed by the sysfs
> > +	 *   core when the device is removed.  A lifespan of -1 in the return
> > +	 *   struct tells the core to set a default lifespan.
> > +	 */
> > +	struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device,
> > +						u8 port_num);
> > +	/**
> > +	 * get_hw_stats - Fill in the counter value(s) in the stats struct.
> > +	 * @index - The index in the value array we wish to have updated, or
> > +	 *   num_counters if we want all stats updated
> > +	 * Return codes -
> > +	 *   < 0 - Error, no counters updated
> > +	 *   index - Updated the single counter pointed to by index
> > +	 *   num_counters - Updated all counters (will reset the timestamp
> > +	 *     and prevent further calls for lifespan milliseconds)
> > +	 * Drivers are allowed to update all counters in leiu of just the
> > +	 *   one given in index at their option
> > +	 */
> > +	int (*get_hw_stats)(struct ib_device *device,
> > +			    struct rdma_hw_stats *stats, u8 port, int index);
> > +};
> > +
> 
> So now we have each function declared both in ib_device and in
> ib_device_ops?

No, the last patch in the patch set removes the functions within the ib_device.

> 
> Will it be easier to remove all declarations from ib_device and add new
> member struct ib_device_ops *ops to ib_device this way objective will be
> achieved (code more readable) while still preserving single place of
> declaration?

Please take a look on the last patch in this patch set, it removes the functions
within the ib_device and add the ib_device_ops as a member to the ib_device,
this was done to make it easier to review this huge patch set.

> 
> >  struct ib_device {
> >  	/* Do not access @dma_device directly from ULP nor from HW drivers. */
> >  	struct device                *dma_device;
> > @@ -2660,6 +2903,8 @@ void ib_unregister_client(struct ib_client *client);
> >  void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
> >  void  ib_set_client_data(struct ib_device *device, struct ib_client *client,
> >  			 void *data);
> > +void ib_set_device_ops(struct ib_device *device,
> > +		       const struct ib_device_ops *ops);
> >  
> >  #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
> >  int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
> > -- 
> > 2.19.2
> >
Yuval Shaia Dec. 11, 2018, 9:37 a.m. UTC | #3
On Mon, Dec 10, 2018 at 11:14:17PM +0200, Kamal Heib wrote:
> On Mon, Dec 10, 2018 at 10:46:48PM +0200, Yuval Shaia wrote:
> > On Mon, Dec 10, 2018 at 09:09:30PM +0200, Kamal Heib wrote:
> > > This change introduce the ib_device_ops structure that defines all the
> > > InfiniBand device operations in one place, so the code will be more
> > > readable and clean, unlike today that the ops are mixed with ib_device
> > > data members.
> > > 
> > > The providers will need to define the supported operations and assign
> > > them using ib_device_ops(), that will also make the providers code more
> > 
> > You mean ib_set_device_ops, right?
> 
> Correct, it should be ib_set_device_ops().
> 
> Jason, Please let me know if I need to re-send this patch with a fixes
> commit message?
> 
> > 
> > > readable and clean.
> > > 
> > > Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
> > > ---
> > >  drivers/infiniband/core/device.c |  98 +++++++++++++
> > >  include/rdma/ib_verbs.h          | 245 +++++++++++++++++++++++++++++++
> > >  2 files changed, 343 insertions(+)
> > > 
> > > diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
> > > index 0027b0d79b09..3589894c46b8 100644
> > > --- a/drivers/infiniband/core/device.c
> > > +++ b/drivers/infiniband/core/device.c
> > > @@ -1216,6 +1216,104 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
> > >  }
> > >  EXPORT_SYMBOL(ib_get_net_dev_by_params);
> > >  
> > > +void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
> > > +{
> > > +#define SET_DEVICE_OP(ptr, name)                                               \
> > > +	do {                                                                   \
> > > +		if (ops->name)                                                 \
> > > +			if (!((ptr)->name))				       \
> > > +				(ptr)->name = ops->name;                       \
> > > +	} while (0)
> > > +
> > > +	SET_DEVICE_OP(dev, add_gid);
> > > +	SET_DEVICE_OP(dev, alloc_dm);
> > > +	SET_DEVICE_OP(dev, alloc_fmr);
> > > +	SET_DEVICE_OP(dev, alloc_hw_stats);
> > > +	SET_DEVICE_OP(dev, alloc_mr);
> > > +	SET_DEVICE_OP(dev, alloc_mw);
> > > +	SET_DEVICE_OP(dev, alloc_pd);
> > > +	SET_DEVICE_OP(dev, alloc_rdma_netdev);
> > > +	SET_DEVICE_OP(dev, alloc_ucontext);
> > > +	SET_DEVICE_OP(dev, alloc_xrcd);
> > > +	SET_DEVICE_OP(dev, attach_mcast);
> > > +	SET_DEVICE_OP(dev, check_mr_status);
> > > +	SET_DEVICE_OP(dev, create_ah);
> > > +	SET_DEVICE_OP(dev, create_counters);
> > > +	SET_DEVICE_OP(dev, create_cq);
> > > +	SET_DEVICE_OP(dev, create_flow);
> > > +	SET_DEVICE_OP(dev, create_flow_action_esp);
> > > +	SET_DEVICE_OP(dev, create_qp);
> > > +	SET_DEVICE_OP(dev, create_rwq_ind_table);
> > > +	SET_DEVICE_OP(dev, create_srq);
> > > +	SET_DEVICE_OP(dev, create_wq);
> > > +	SET_DEVICE_OP(dev, dealloc_dm);
> > > +	SET_DEVICE_OP(dev, dealloc_fmr);
> > > +	SET_DEVICE_OP(dev, dealloc_mw);
> > > +	SET_DEVICE_OP(dev, dealloc_pd);
> > > +	SET_DEVICE_OP(dev, dealloc_ucontext);
> > > +	SET_DEVICE_OP(dev, dealloc_xrcd);
> > > +	SET_DEVICE_OP(dev, del_gid);
> > > +	SET_DEVICE_OP(dev, dereg_mr);
> > > +	SET_DEVICE_OP(dev, destroy_ah);
> > > +	SET_DEVICE_OP(dev, destroy_counters);
> > > +	SET_DEVICE_OP(dev, destroy_cq);
> > > +	SET_DEVICE_OP(dev, destroy_flow);
> > > +	SET_DEVICE_OP(dev, destroy_flow_action);
> > > +	SET_DEVICE_OP(dev, destroy_qp);
> > > +	SET_DEVICE_OP(dev, destroy_rwq_ind_table);
> > > +	SET_DEVICE_OP(dev, destroy_srq);
> > > +	SET_DEVICE_OP(dev, destroy_wq);
> > > +	SET_DEVICE_OP(dev, detach_mcast);
> > > +	SET_DEVICE_OP(dev, disassociate_ucontext);
> > > +	SET_DEVICE_OP(dev, drain_rq);
> > > +	SET_DEVICE_OP(dev, drain_sq);
> > > +	SET_DEVICE_OP(dev, get_dev_fw_str);
> > > +	SET_DEVICE_OP(dev, get_dma_mr);
> > > +	SET_DEVICE_OP(dev, get_hw_stats);
> > > +	SET_DEVICE_OP(dev, get_link_layer);
> > > +	SET_DEVICE_OP(dev, get_netdev);
> > > +	SET_DEVICE_OP(dev, get_port_immutable);
> > > +	SET_DEVICE_OP(dev, get_vector_affinity);
> > > +	SET_DEVICE_OP(dev, get_vf_config);
> > > +	SET_DEVICE_OP(dev, get_vf_stats);
> > > +	SET_DEVICE_OP(dev, map_mr_sg);
> > > +	SET_DEVICE_OP(dev, map_phys_fmr);
> > > +	SET_DEVICE_OP(dev, mmap);
> > > +	SET_DEVICE_OP(dev, modify_ah);
> > > +	SET_DEVICE_OP(dev, modify_cq);
> > > +	SET_DEVICE_OP(dev, modify_device);
> > > +	SET_DEVICE_OP(dev, modify_flow_action_esp);
> > > +	SET_DEVICE_OP(dev, modify_port);
> > > +	SET_DEVICE_OP(dev, modify_qp);
> > > +	SET_DEVICE_OP(dev, modify_srq);
> > > +	SET_DEVICE_OP(dev, modify_wq);
> > > +	SET_DEVICE_OP(dev, peek_cq);
> > > +	SET_DEVICE_OP(dev, poll_cq);
> > > +	SET_DEVICE_OP(dev, post_recv);
> > > +	SET_DEVICE_OP(dev, post_send);
> > > +	SET_DEVICE_OP(dev, post_srq_recv);
> > > +	SET_DEVICE_OP(dev, process_mad);
> > > +	SET_DEVICE_OP(dev, query_ah);
> > > +	SET_DEVICE_OP(dev, query_device);
> > > +	SET_DEVICE_OP(dev, query_gid);
> > > +	SET_DEVICE_OP(dev, query_pkey);
> > > +	SET_DEVICE_OP(dev, query_port);
> > > +	SET_DEVICE_OP(dev, query_qp);
> > > +	SET_DEVICE_OP(dev, query_srq);
> > > +	SET_DEVICE_OP(dev, rdma_netdev_get_params);
> > > +	SET_DEVICE_OP(dev, read_counters);
> > > +	SET_DEVICE_OP(dev, reg_dm_mr);
> > > +	SET_DEVICE_OP(dev, reg_user_mr);
> > > +	SET_DEVICE_OP(dev, req_ncomp_notif);
> > > +	SET_DEVICE_OP(dev, req_notify_cq);
> > > +	SET_DEVICE_OP(dev, rereg_user_mr);
> > > +	SET_DEVICE_OP(dev, resize_cq);
> > > +	SET_DEVICE_OP(dev, set_vf_guid);
> > > +	SET_DEVICE_OP(dev, set_vf_link_state);
> > > +	SET_DEVICE_OP(dev, unmap_fmr);
> > > +}
> > > +EXPORT_SYMBOL(ib_set_device_ops);
> > > +
> > >  static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
> > >  	[RDMA_NL_LS_OP_RESOLVE] = {
> > >  		.doit = ib_nl_handle_resolve_resp,
> > > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> > > index 85021451eee0..3a02c72e7620 100644
> > > --- a/include/rdma/ib_verbs.h
> > > +++ b/include/rdma/ib_verbs.h
> > > @@ -2257,6 +2257,249 @@ struct ib_counters_read_attr {
> > >  
> > >  struct uverbs_attr_bundle;
> > >  
> > > +/**
> > > + * struct ib_device_ops - InfiniBand device operations
> > > + * This structure defines all the InfiniBand device operations, providers will
> > > + * need to define the supported operations, otherwise they will be set to null.
> > > + */
> > > +struct ib_device_ops {
> > > +	int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr,
> > > +			 const struct ib_send_wr **bad_send_wr);
> > > +	int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
> > > +			 const struct ib_recv_wr **bad_recv_wr);
> > > +	void (*drain_rq)(struct ib_qp *qp);
> > > +	void (*drain_sq)(struct ib_qp *qp);
> > > +	int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
> > > +	int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
> > > +	int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags);
> > > +	int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt);
> > > +	int (*post_srq_recv)(struct ib_srq *srq,
> > > +			     const struct ib_recv_wr *recv_wr,
> > > +			     const struct ib_recv_wr **bad_recv_wr);
> > > +	int (*process_mad)(struct ib_device *device, int process_mad_flags,
> > > +			   u8 port_num, const struct ib_wc *in_wc,
> > > +			   const struct ib_grh *in_grh,
> > > +			   const struct ib_mad_hdr *in_mad, size_t in_mad_size,
> > > +			   struct ib_mad_hdr *out_mad, size_t *out_mad_size,
> > > +			   u16 *out_mad_pkey_index);
> > > +	int (*query_device)(struct ib_device *device,
> > > +			    struct ib_device_attr *device_attr,
> > > +			    struct ib_udata *udata);
> > > +	int (*modify_device)(struct ib_device *device, int device_modify_mask,
> > > +			     struct ib_device_modify *device_modify);
> > > +	void (*get_dev_fw_str)(struct ib_device *, char *str);
> > > +	const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
> > > +						     int comp_vector);
> > > +	int (*query_port)(struct ib_device *device, u8 port_num,
> > > +			  struct ib_port_attr *port_attr);
> > > +	int (*modify_port)(struct ib_device *device, u8 port_num,
> > > +			   int port_modify_mask,
> > > +			   struct ib_port_modify *port_modify);
> > > +	/**
> > > +	 * The following mandatory functions are used only at device
> > > +	 * registration.  Keep functions such as these at the end of this
> > > +	 * structure to avoid cache line misses when accessing struct ib_device
> > > +	 * in fast paths.
> > > +	 */
> > > +	int (*get_port_immutable)(struct ib_device *, u8,
> > > +				  struct ib_port_immutable *);
> > > +	enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
> > > +					       u8 port_num);
> > > +	/**
> > > +	 * When calling get_netdev, the HW vendor's driver should return the
> > > +	 * net device of device @device at port @port_num or NULL if such
> > > +	 * a net device doesn't exist. The vendor driver should call dev_hold
> > > +	 * on this net device. The HW vendor's device driver must guarantee
> > > +	 * that this function returns NULL before the net device has finished
> > > +	 * NETDEV_UNREGISTER state.
> > > +	 */
> > > +	struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num);
> > > +	/**
> > > +	 * rdma netdev operation
> > > +	 *
> > > +	 * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params
> > > +	 * must return -EOPNOTSUPP if it doesn't support the specified type.
> > > +	 */
> > > +	struct net_device *(*alloc_rdma_netdev)(
> > > +					struct ib_device *device,
> > > +					u8 port_num,
> > > +					enum rdma_netdev_t type,
> > > +					const char *name,
> > > +					unsigned char name_assign_type,
> > > +					void (*setup)(struct net_device *));
> > > +
> > > +	int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num,
> > > +				      enum rdma_netdev_t type,
> > > +				      struct rdma_netdev_alloc_params *params);
> > > +	/**
> > > +	 * query_gid should be return GID value for @device, when @port_num
> > > +	 * link layer is either IB or iWarp. It is no-op if @port_num port
> > > +	 * is RoCE link layer.
> > > +	 */
> > > +	int (*query_gid)(struct ib_device *device, u8 port_num, int index,
> > > +			 union ib_gid *gid);
> > > +	/**
> > > +	 * When calling add_gid, the HW vendor's driver should add the gid
> > > +	 * of device of port at gid index available at @attr. Meta-info of
> > > +	 * that gid (for example, the network device related to this gid) is
> > > +	 * available at @attr. @context allows the HW vendor driver to store
> > > +	 * extra information together with a GID entry. The HW vendor driver may
> > > +	 * allocate memory to contain this information and store it in @context
> > > +	 * when a new GID entry is written to. Params are consistent until the
> > > +	 * next call of add_gid or delete_gid. The function should return 0 on
> > > +	 * success or error otherwise. The function could be called
> > > +	 * concurrently for different ports. This function is only called when
> > > +	 * roce_gid_table is used.
> > > +	 */
> > > +	int (*add_gid)(const struct ib_gid_attr *attr, void **context);
> > > +	/**
> > > +	 * When calling del_gid, the HW vendor's driver should delete the
> > > +	 * gid of device @device at gid index gid_index of port port_num
> > > +	 * available in @attr.
> > > +	 * Upon the deletion of a GID entry, the HW vendor must free any
> > > +	 * allocated memory. The caller will clear @context afterwards.
> > > +	 * This function is only called when roce_gid_table is used.
> > > +	 */
> > > +	int (*del_gid)(const struct ib_gid_attr *attr, void **context);
> > > +	int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index,
> > > +			  u16 *pkey);
> > > +	struct ib_ucontext *(*alloc_ucontext)(struct ib_device *device,
> > > +					      struct ib_udata *udata);
> > > +	int (*dealloc_ucontext)(struct ib_ucontext *context);
> > > +	int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
> > > +	void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
> > > +	struct ib_pd *(*alloc_pd)(struct ib_device *device,
> > > +				  struct ib_ucontext *context,
> > > +				  struct ib_udata *udata);
> > > +	int (*dealloc_pd)(struct ib_pd *pd);
> > > +	struct ib_ah *(*create_ah)(struct ib_pd *pd,
> > > +				   struct rdma_ah_attr *ah_attr,
> > > +				   struct ib_udata *udata);
> > > +	int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
> > > +	int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
> > > +	int (*destroy_ah)(struct ib_ah *ah);
> > > +	struct ib_srq *(*create_srq)(struct ib_pd *pd,
> > > +				     struct ib_srq_init_attr *srq_init_attr,
> > > +				     struct ib_udata *udata);
> > > +	int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
> > > +			  enum ib_srq_attr_mask srq_attr_mask,
> > > +			  struct ib_udata *udata);
> > > +	int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
> > > +	int (*destroy_srq)(struct ib_srq *srq);
> > > +	struct ib_qp *(*create_qp)(struct ib_pd *pd,
> > > +				   struct ib_qp_init_attr *qp_init_attr,
> > > +				   struct ib_udata *udata);
> > > +	int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
> > > +			 int qp_attr_mask, struct ib_udata *udata);
> > > +	int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
> > > +			int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
> > > +	int (*destroy_qp)(struct ib_qp *qp);
> > > +	struct ib_cq *(*create_cq)(struct ib_device *device,
> > > +				   const struct ib_cq_init_attr *attr,
> > > +				   struct ib_ucontext *context,
> > > +				   struct ib_udata *udata);
> > > +	int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
> > > +	int (*destroy_cq)(struct ib_cq *cq);
> > > +	int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
> > > +	struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
> > > +	struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
> > > +				     u64 virt_addr, int mr_access_flags,
> > > +				     struct ib_udata *udata);
> > > +	int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length,
> > > +			     u64 virt_addr, int mr_access_flags,
> > > +			     struct ib_pd *pd, struct ib_udata *udata);
> > > +	int (*dereg_mr)(struct ib_mr *mr);
> > > +	struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
> > > +				  u32 max_num_sg);
> > > +	int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
> > > +			 unsigned int *sg_offset);
> > > +	int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
> > > +			       struct ib_mr_status *mr_status);
> > > +	struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type,
> > > +				  struct ib_udata *udata);
> > > +	int (*dealloc_mw)(struct ib_mw *mw);
> > > +	struct ib_fmr *(*alloc_fmr)(struct ib_pd *pd, int mr_access_flags,
> > > +				    struct ib_fmr_attr *fmr_attr);
> > > +	int (*map_phys_fmr)(struct ib_fmr *fmr, u64 *page_list, int list_len,
> > > +			    u64 iova);
> > > +	int (*unmap_fmr)(struct list_head *fmr_list);
> > > +	int (*dealloc_fmr)(struct ib_fmr *fmr);
> > > +	int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
> > > +	int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
> > > +	struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device,
> > > +				      struct ib_ucontext *ucontext,
> > > +				      struct ib_udata *udata);
> > > +	int (*dealloc_xrcd)(struct ib_xrcd *xrcd);
> > > +	struct ib_flow *(*create_flow)(struct ib_qp *qp,
> > > +				       struct ib_flow_attr *flow_attr,
> > > +				       int domain, struct ib_udata *udata);
> > > +	int (*destroy_flow)(struct ib_flow *flow_id);
> > > +	struct ib_flow_action *(*create_flow_action_esp)(
> > > +		struct ib_device *device,
> > > +		const struct ib_flow_action_attrs_esp *attr,
> > > +		struct uverbs_attr_bundle *attrs);
> > > +	int (*destroy_flow_action)(struct ib_flow_action *action);
> > > +	int (*modify_flow_action_esp)(
> > > +		struct ib_flow_action *action,
> > > +		const struct ib_flow_action_attrs_esp *attr,
> > > +		struct uverbs_attr_bundle *attrs);
> > > +	int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port,
> > > +				 int state);
> > > +	int (*get_vf_config)(struct ib_device *device, int vf, u8 port,
> > > +			     struct ifla_vf_info *ivf);
> > > +	int (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
> > > +			    struct ifla_vf_stats *stats);
> > > +	int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
> > > +			   int type);
> > > +	struct ib_wq *(*create_wq)(struct ib_pd *pd,
> > > +				   struct ib_wq_init_attr *init_attr,
> > > +				   struct ib_udata *udata);
> > > +	int (*destroy_wq)(struct ib_wq *wq);
> > > +	int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr,
> > > +			 u32 wq_attr_mask, struct ib_udata *udata);
> > > +	struct ib_rwq_ind_table *(*create_rwq_ind_table)(
> > > +		struct ib_device *device,
> > > +		struct ib_rwq_ind_table_init_attr *init_attr,
> > > +		struct ib_udata *udata);
> > > +	int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
> > > +	struct ib_dm *(*alloc_dm)(struct ib_device *device,
> > > +				  struct ib_ucontext *context,
> > > +				  struct ib_dm_alloc_attr *attr,
> > > +				  struct uverbs_attr_bundle *attrs);
> > > +	int (*dealloc_dm)(struct ib_dm *dm);
> > > +	struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
> > > +				   struct ib_dm_mr_attr *attr,
> > > +				   struct uverbs_attr_bundle *attrs);
> > > +	struct ib_counters *(*create_counters)(
> > > +		struct ib_device *device, struct uverbs_attr_bundle *attrs);
> > > +	int (*destroy_counters)(struct ib_counters *counters);
> > > +	int (*read_counters)(struct ib_counters *counters,
> > > +			     struct ib_counters_read_attr *counters_read_attr,
> > > +			     struct uverbs_attr_bundle *attrs);
> > > +	/**
> > > +	 * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the
> > > +	 *   driver initialized data.  The struct is kfree()'ed by the sysfs
> > > +	 *   core when the device is removed.  A lifespan of -1 in the return
> > > +	 *   struct tells the core to set a default lifespan.
> > > +	 */
> > > +	struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device,
> > > +						u8 port_num);
> > > +	/**
> > > +	 * get_hw_stats - Fill in the counter value(s) in the stats struct.
> > > +	 * @index - The index in the value array we wish to have updated, or
> > > +	 *   num_counters if we want all stats updated
> > > +	 * Return codes -
> > > +	 *   < 0 - Error, no counters updated
> > > +	 *   index - Updated the single counter pointed to by index
> > > +	 *   num_counters - Updated all counters (will reset the timestamp
> > > +	 *     and prevent further calls for lifespan milliseconds)
> > > +	 * Drivers are allowed to update all counters in leiu of just the
> > > +	 *   one given in index at their option
> > > +	 */
> > > +	int (*get_hw_stats)(struct ib_device *device,
> > > +			    struct rdma_hw_stats *stats, u8 port, int index);
> > > +};
> > > +
> > 
> > So now we have each function declared both in ib_device and in
> > ib_device_ops?
> 
> No, the last patch in the patch set removes the functions within the ib_device.
> 
> > 
> > Will it be easier to remove all declarations from ib_device and add new
> > member struct ib_device_ops *ops to ib_device this way objective will be
> > achieved (code more readable) while still preserving single place of
> > declaration?
> 
> Please take a look on the last patch in this patch set, it removes the functions
> within the ib_device and add the ib_device_ops as a member to the ib_device,
> this was done to make it easier to review this huge patch set.

Ok, saw it, so wondering why two separate patches where second undo stuff
made by first.
I guess it was easier to maintain it this way, suggesting, just in case it
is not such a trouble to merge them as it might look funny on git-bisect
when it fails on this one :)

Anyways, is it possible somehow to include the setting of uverbs_cmd_mask?

> 
> > 
> > >  struct ib_device {
> > >  	/* Do not access @dma_device directly from ULP nor from HW drivers. */
> > >  	struct device                *dma_device;
> > > @@ -2660,6 +2903,8 @@ void ib_unregister_client(struct ib_client *client);
> > >  void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
> > >  void  ib_set_client_data(struct ib_device *device, struct ib_client *client,
> > >  			 void *data);
> > > +void ib_set_device_ops(struct ib_device *device,
> > > +		       const struct ib_device_ops *ops);
> > >  
> > >  #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
> > >  int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
> > > -- 
> > > 2.19.2
> > >
Jason Gunthorpe Dec. 11, 2018, 4:10 p.m. UTC | #4
On Tue, Dec 11, 2018 at 11:37:30AM +0200, Yuval Shaia wrote:
> > > Will it be easier to remove all declarations from ib_device and add new
> > > member struct ib_device_ops *ops to ib_device this way objective will be
> > > achieved (code more readable) while still preserving single place of
> > > declaration?
> > 
> > Please take a look on the last patch in this patch set, it removes the functions
> > within the ib_device and add the ib_device_ops as a member to the ib_device,
> > this was done to make it easier to review this huge patch set.
> 
> Ok, saw it, so wondering why two separate patches where second undo stuff
> made by first.

This arrangment has lower LOC changes

> Anyways, is it possible somehow to include the setting of uverbs_cmd_mask?

I want to delete this thing, I think we can now, just needs some
driver auditing (maybe Kamal will do this next :))

Jason
Kamal Heib Dec. 11, 2018, 6:06 p.m. UTC | #5
On Tue, Dec 11, 2018 at 09:10:46AM -0700, Jason Gunthorpe wrote:
> On Tue, Dec 11, 2018 at 11:37:30AM +0200, Yuval Shaia wrote:
> > > > Will it be easier to remove all declarations from ib_device and add new
> > > > member struct ib_device_ops *ops to ib_device this way objective will be
> > > > achieved (code more readable) while still preserving single place of
> > > > declaration?
> > > 
> > > Please take a look on the last patch in this patch set, it removes the functions
> > > within the ib_device and add the ib_device_ops as a member to the ib_device,
> > > this was done to make it easier to review this huge patch set.
> > 
> > Ok, saw it, so wondering why two separate patches where second undo stuff
> > made by first.
> 
> This arrangment has lower LOC changes
> 
> > Anyways, is it possible somehow to include the setting of uverbs_cmd_mask?
> 
> I want to delete this thing, I think we can now, just needs some
> driver auditing (maybe Kamal will do this next :))
>

Maybe :-)
 
> Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 0027b0d79b09..3589894c46b8 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1216,6 +1216,104 @@  struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
 }
 EXPORT_SYMBOL(ib_get_net_dev_by_params);
 
+void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
+{
+#define SET_DEVICE_OP(ptr, name)                                               \
+	do {                                                                   \
+		if (ops->name)                                                 \
+			if (!((ptr)->name))				       \
+				(ptr)->name = ops->name;                       \
+	} while (0)
+
+	SET_DEVICE_OP(dev, add_gid);
+	SET_DEVICE_OP(dev, alloc_dm);
+	SET_DEVICE_OP(dev, alloc_fmr);
+	SET_DEVICE_OP(dev, alloc_hw_stats);
+	SET_DEVICE_OP(dev, alloc_mr);
+	SET_DEVICE_OP(dev, alloc_mw);
+	SET_DEVICE_OP(dev, alloc_pd);
+	SET_DEVICE_OP(dev, alloc_rdma_netdev);
+	SET_DEVICE_OP(dev, alloc_ucontext);
+	SET_DEVICE_OP(dev, alloc_xrcd);
+	SET_DEVICE_OP(dev, attach_mcast);
+	SET_DEVICE_OP(dev, check_mr_status);
+	SET_DEVICE_OP(dev, create_ah);
+	SET_DEVICE_OP(dev, create_counters);
+	SET_DEVICE_OP(dev, create_cq);
+	SET_DEVICE_OP(dev, create_flow);
+	SET_DEVICE_OP(dev, create_flow_action_esp);
+	SET_DEVICE_OP(dev, create_qp);
+	SET_DEVICE_OP(dev, create_rwq_ind_table);
+	SET_DEVICE_OP(dev, create_srq);
+	SET_DEVICE_OP(dev, create_wq);
+	SET_DEVICE_OP(dev, dealloc_dm);
+	SET_DEVICE_OP(dev, dealloc_fmr);
+	SET_DEVICE_OP(dev, dealloc_mw);
+	SET_DEVICE_OP(dev, dealloc_pd);
+	SET_DEVICE_OP(dev, dealloc_ucontext);
+	SET_DEVICE_OP(dev, dealloc_xrcd);
+	SET_DEVICE_OP(dev, del_gid);
+	SET_DEVICE_OP(dev, dereg_mr);
+	SET_DEVICE_OP(dev, destroy_ah);
+	SET_DEVICE_OP(dev, destroy_counters);
+	SET_DEVICE_OP(dev, destroy_cq);
+	SET_DEVICE_OP(dev, destroy_flow);
+	SET_DEVICE_OP(dev, destroy_flow_action);
+	SET_DEVICE_OP(dev, destroy_qp);
+	SET_DEVICE_OP(dev, destroy_rwq_ind_table);
+	SET_DEVICE_OP(dev, destroy_srq);
+	SET_DEVICE_OP(dev, destroy_wq);
+	SET_DEVICE_OP(dev, detach_mcast);
+	SET_DEVICE_OP(dev, disassociate_ucontext);
+	SET_DEVICE_OP(dev, drain_rq);
+	SET_DEVICE_OP(dev, drain_sq);
+	SET_DEVICE_OP(dev, get_dev_fw_str);
+	SET_DEVICE_OP(dev, get_dma_mr);
+	SET_DEVICE_OP(dev, get_hw_stats);
+	SET_DEVICE_OP(dev, get_link_layer);
+	SET_DEVICE_OP(dev, get_netdev);
+	SET_DEVICE_OP(dev, get_port_immutable);
+	SET_DEVICE_OP(dev, get_vector_affinity);
+	SET_DEVICE_OP(dev, get_vf_config);
+	SET_DEVICE_OP(dev, get_vf_stats);
+	SET_DEVICE_OP(dev, map_mr_sg);
+	SET_DEVICE_OP(dev, map_phys_fmr);
+	SET_DEVICE_OP(dev, mmap);
+	SET_DEVICE_OP(dev, modify_ah);
+	SET_DEVICE_OP(dev, modify_cq);
+	SET_DEVICE_OP(dev, modify_device);
+	SET_DEVICE_OP(dev, modify_flow_action_esp);
+	SET_DEVICE_OP(dev, modify_port);
+	SET_DEVICE_OP(dev, modify_qp);
+	SET_DEVICE_OP(dev, modify_srq);
+	SET_DEVICE_OP(dev, modify_wq);
+	SET_DEVICE_OP(dev, peek_cq);
+	SET_DEVICE_OP(dev, poll_cq);
+	SET_DEVICE_OP(dev, post_recv);
+	SET_DEVICE_OP(dev, post_send);
+	SET_DEVICE_OP(dev, post_srq_recv);
+	SET_DEVICE_OP(dev, process_mad);
+	SET_DEVICE_OP(dev, query_ah);
+	SET_DEVICE_OP(dev, query_device);
+	SET_DEVICE_OP(dev, query_gid);
+	SET_DEVICE_OP(dev, query_pkey);
+	SET_DEVICE_OP(dev, query_port);
+	SET_DEVICE_OP(dev, query_qp);
+	SET_DEVICE_OP(dev, query_srq);
+	SET_DEVICE_OP(dev, rdma_netdev_get_params);
+	SET_DEVICE_OP(dev, read_counters);
+	SET_DEVICE_OP(dev, reg_dm_mr);
+	SET_DEVICE_OP(dev, reg_user_mr);
+	SET_DEVICE_OP(dev, req_ncomp_notif);
+	SET_DEVICE_OP(dev, req_notify_cq);
+	SET_DEVICE_OP(dev, rereg_user_mr);
+	SET_DEVICE_OP(dev, resize_cq);
+	SET_DEVICE_OP(dev, set_vf_guid);
+	SET_DEVICE_OP(dev, set_vf_link_state);
+	SET_DEVICE_OP(dev, unmap_fmr);
+}
+EXPORT_SYMBOL(ib_set_device_ops);
+
 static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
 	[RDMA_NL_LS_OP_RESOLVE] = {
 		.doit = ib_nl_handle_resolve_resp,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 85021451eee0..3a02c72e7620 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2257,6 +2257,249 @@  struct ib_counters_read_attr {
 
 struct uverbs_attr_bundle;
 
+/**
+ * struct ib_device_ops - InfiniBand device operations
+ * This structure defines all the InfiniBand device operations, providers will
+ * need to define the supported operations, otherwise they will be set to null.
+ */
+struct ib_device_ops {
+	int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr,
+			 const struct ib_send_wr **bad_send_wr);
+	int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
+			 const struct ib_recv_wr **bad_recv_wr);
+	void (*drain_rq)(struct ib_qp *qp);
+	void (*drain_sq)(struct ib_qp *qp);
+	int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
+	int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
+	int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags);
+	int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt);
+	int (*post_srq_recv)(struct ib_srq *srq,
+			     const struct ib_recv_wr *recv_wr,
+			     const struct ib_recv_wr **bad_recv_wr);
+	int (*process_mad)(struct ib_device *device, int process_mad_flags,
+			   u8 port_num, const struct ib_wc *in_wc,
+			   const struct ib_grh *in_grh,
+			   const struct ib_mad_hdr *in_mad, size_t in_mad_size,
+			   struct ib_mad_hdr *out_mad, size_t *out_mad_size,
+			   u16 *out_mad_pkey_index);
+	int (*query_device)(struct ib_device *device,
+			    struct ib_device_attr *device_attr,
+			    struct ib_udata *udata);
+	int (*modify_device)(struct ib_device *device, int device_modify_mask,
+			     struct ib_device_modify *device_modify);
+	void (*get_dev_fw_str)(struct ib_device *, char *str);
+	const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
+						     int comp_vector);
+	int (*query_port)(struct ib_device *device, u8 port_num,
+			  struct ib_port_attr *port_attr);
+	int (*modify_port)(struct ib_device *device, u8 port_num,
+			   int port_modify_mask,
+			   struct ib_port_modify *port_modify);
+	/**
+	 * The following mandatory functions are used only at device
+	 * registration.  Keep functions such as these at the end of this
+	 * structure to avoid cache line misses when accessing struct ib_device
+	 * in fast paths.
+	 */
+	int (*get_port_immutable)(struct ib_device *, u8,
+				  struct ib_port_immutable *);
+	enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
+					       u8 port_num);
+	/**
+	 * When calling get_netdev, the HW vendor's driver should return the
+	 * net device of device @device at port @port_num or NULL if such
+	 * a net device doesn't exist. The vendor driver should call dev_hold
+	 * on this net device. The HW vendor's device driver must guarantee
+	 * that this function returns NULL before the net device has finished
+	 * NETDEV_UNREGISTER state.
+	 */
+	struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num);
+	/**
+	 * rdma netdev operation
+	 *
+	 * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params
+	 * must return -EOPNOTSUPP if it doesn't support the specified type.
+	 */
+	struct net_device *(*alloc_rdma_netdev)(
+					struct ib_device *device,
+					u8 port_num,
+					enum rdma_netdev_t type,
+					const char *name,
+					unsigned char name_assign_type,
+					void (*setup)(struct net_device *));
+
+	int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num,
+				      enum rdma_netdev_t type,
+				      struct rdma_netdev_alloc_params *params);
+	/**
+	 * query_gid should be return GID value for @device, when @port_num
+	 * link layer is either IB or iWarp. It is no-op if @port_num port
+	 * is RoCE link layer.
+	 */
+	int (*query_gid)(struct ib_device *device, u8 port_num, int index,
+			 union ib_gid *gid);
+	/**
+	 * When calling add_gid, the HW vendor's driver should add the gid
+	 * of device of port at gid index available at @attr. Meta-info of
+	 * that gid (for example, the network device related to this gid) is
+	 * available at @attr. @context allows the HW vendor driver to store
+	 * extra information together with a GID entry. The HW vendor driver may
+	 * allocate memory to contain this information and store it in @context
+	 * when a new GID entry is written to. Params are consistent until the
+	 * next call of add_gid or delete_gid. The function should return 0 on
+	 * success or error otherwise. The function could be called
+	 * concurrently for different ports. This function is only called when
+	 * roce_gid_table is used.
+	 */
+	int (*add_gid)(const struct ib_gid_attr *attr, void **context);
+	/**
+	 * When calling del_gid, the HW vendor's driver should delete the
+	 * gid of device @device at gid index gid_index of port port_num
+	 * available in @attr.
+	 * Upon the deletion of a GID entry, the HW vendor must free any
+	 * allocated memory. The caller will clear @context afterwards.
+	 * This function is only called when roce_gid_table is used.
+	 */
+	int (*del_gid)(const struct ib_gid_attr *attr, void **context);
+	int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index,
+			  u16 *pkey);
+	struct ib_ucontext *(*alloc_ucontext)(struct ib_device *device,
+					      struct ib_udata *udata);
+	int (*dealloc_ucontext)(struct ib_ucontext *context);
+	int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
+	void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
+	struct ib_pd *(*alloc_pd)(struct ib_device *device,
+				  struct ib_ucontext *context,
+				  struct ib_udata *udata);
+	int (*dealloc_pd)(struct ib_pd *pd);
+	struct ib_ah *(*create_ah)(struct ib_pd *pd,
+				   struct rdma_ah_attr *ah_attr,
+				   struct ib_udata *udata);
+	int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+	int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+	int (*destroy_ah)(struct ib_ah *ah);
+	struct ib_srq *(*create_srq)(struct ib_pd *pd,
+				     struct ib_srq_init_attr *srq_init_attr,
+				     struct ib_udata *udata);
+	int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
+			  enum ib_srq_attr_mask srq_attr_mask,
+			  struct ib_udata *udata);
+	int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
+	int (*destroy_srq)(struct ib_srq *srq);
+	struct ib_qp *(*create_qp)(struct ib_pd *pd,
+				   struct ib_qp_init_attr *qp_init_attr,
+				   struct ib_udata *udata);
+	int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+			 int qp_attr_mask, struct ib_udata *udata);
+	int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+			int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+	int (*destroy_qp)(struct ib_qp *qp);
+	struct ib_cq *(*create_cq)(struct ib_device *device,
+				   const struct ib_cq_init_attr *attr,
+				   struct ib_ucontext *context,
+				   struct ib_udata *udata);
+	int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+	int (*destroy_cq)(struct ib_cq *cq);
+	int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
+	struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
+	struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
+				     u64 virt_addr, int mr_access_flags,
+				     struct ib_udata *udata);
+	int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length,
+			     u64 virt_addr, int mr_access_flags,
+			     struct ib_pd *pd, struct ib_udata *udata);
+	int (*dereg_mr)(struct ib_mr *mr);
+	struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
+				  u32 max_num_sg);
+	int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
+			 unsigned int *sg_offset);
+	int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
+			       struct ib_mr_status *mr_status);
+	struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type,
+				  struct ib_udata *udata);
+	int (*dealloc_mw)(struct ib_mw *mw);
+	struct ib_fmr *(*alloc_fmr)(struct ib_pd *pd, int mr_access_flags,
+				    struct ib_fmr_attr *fmr_attr);
+	int (*map_phys_fmr)(struct ib_fmr *fmr, u64 *page_list, int list_len,
+			    u64 iova);
+	int (*unmap_fmr)(struct list_head *fmr_list);
+	int (*dealloc_fmr)(struct ib_fmr *fmr);
+	int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+	int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+	struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device,
+				      struct ib_ucontext *ucontext,
+				      struct ib_udata *udata);
+	int (*dealloc_xrcd)(struct ib_xrcd *xrcd);
+	struct ib_flow *(*create_flow)(struct ib_qp *qp,
+				       struct ib_flow_attr *flow_attr,
+				       int domain, struct ib_udata *udata);
+	int (*destroy_flow)(struct ib_flow *flow_id);
+	struct ib_flow_action *(*create_flow_action_esp)(
+		struct ib_device *device,
+		const struct ib_flow_action_attrs_esp *attr,
+		struct uverbs_attr_bundle *attrs);
+	int (*destroy_flow_action)(struct ib_flow_action *action);
+	int (*modify_flow_action_esp)(
+		struct ib_flow_action *action,
+		const struct ib_flow_action_attrs_esp *attr,
+		struct uverbs_attr_bundle *attrs);
+	int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port,
+				 int state);
+	int (*get_vf_config)(struct ib_device *device, int vf, u8 port,
+			     struct ifla_vf_info *ivf);
+	int (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
+			    struct ifla_vf_stats *stats);
+	int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
+			   int type);
+	struct ib_wq *(*create_wq)(struct ib_pd *pd,
+				   struct ib_wq_init_attr *init_attr,
+				   struct ib_udata *udata);
+	int (*destroy_wq)(struct ib_wq *wq);
+	int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr,
+			 u32 wq_attr_mask, struct ib_udata *udata);
+	struct ib_rwq_ind_table *(*create_rwq_ind_table)(
+		struct ib_device *device,
+		struct ib_rwq_ind_table_init_attr *init_attr,
+		struct ib_udata *udata);
+	int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
+	struct ib_dm *(*alloc_dm)(struct ib_device *device,
+				  struct ib_ucontext *context,
+				  struct ib_dm_alloc_attr *attr,
+				  struct uverbs_attr_bundle *attrs);
+	int (*dealloc_dm)(struct ib_dm *dm);
+	struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
+				   struct ib_dm_mr_attr *attr,
+				   struct uverbs_attr_bundle *attrs);
+	struct ib_counters *(*create_counters)(
+		struct ib_device *device, struct uverbs_attr_bundle *attrs);
+	int (*destroy_counters)(struct ib_counters *counters);
+	int (*read_counters)(struct ib_counters *counters,
+			     struct ib_counters_read_attr *counters_read_attr,
+			     struct uverbs_attr_bundle *attrs);
+	/**
+	 * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the
+	 *   driver initialized data.  The struct is kfree()'ed by the sysfs
+	 *   core when the device is removed.  A lifespan of -1 in the return
+	 *   struct tells the core to set a default lifespan.
+	 */
+	struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device,
+						u8 port_num);
+	/**
+	 * get_hw_stats - Fill in the counter value(s) in the stats struct.
+	 * @index - The index in the value array we wish to have updated, or
+	 *   num_counters if we want all stats updated
+	 * Return codes -
+	 *   < 0 - Error, no counters updated
+	 *   index - Updated the single counter pointed to by index
+	 *   num_counters - Updated all counters (will reset the timestamp
+	 *     and prevent further calls for lifespan milliseconds)
+	 * Drivers are allowed to update all counters in leiu of just the
+	 *   one given in index at their option
+	 */
+	int (*get_hw_stats)(struct ib_device *device,
+			    struct rdma_hw_stats *stats, u8 port, int index);
+};
+
 struct ib_device {
 	/* Do not access @dma_device directly from ULP nor from HW drivers. */
 	struct device                *dma_device;
@@ -2660,6 +2903,8 @@  void ib_unregister_client(struct ib_client *client);
 void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
 void  ib_set_client_data(struct ib_device *device, struct ib_client *client,
 			 void *data);
+void ib_set_device_ops(struct ib_device *device,
+		       const struct ib_device_ops *ops);
 
 #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
 int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,